https://github.com/Acim-Maravic updated https://github.com/llvm/llvm-project/pull/69596
>From af211dc797ead570ce28ccdb1248f588b15d07bd Mon Sep 17 00:00:00 2001 From: Acim Maravic <acim.mara...@syrmia.com> Date: Mon, 13 Nov 2023 13:56:10 +0100 Subject: [PATCH] [AMDGPU] Generic lowering for rint and nearbyint There are three different rounding intrinsics, that are brought down to same instruction. --- .../CodeGen/GlobalISel/MachineIRBuilder.h | 10 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 4 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 7 +- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 1 + llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 40 ++-- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 49 ++-- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 4 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 +- llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 2 +- llvm/lib/Target/AMDGPU/R600Instructions.td | 2 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 +- llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 +- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 6 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 5 + llvm/test/Analysis/CostModel/AMDGPU/fround.ll | 116 +++++++-- .../AMDGPU/GlobalISel/inst-select-frint.mir | 110 --------- .../GlobalISel/inst-select-frint.s16.mir | 97 -------- .../inst-select-scalar-float-sop1.mir | 4 +- .../AMDGPU/GlobalISel/legalize-fexp.mir | 126 +++++----- .../AMDGPU/GlobalISel/legalize-frint.mir | 220 ------------------ .../AMDGPU/GlobalISel/regbankselect-frint.mir | 36 --- 22 files changed, 236 insertions(+), 615 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index bf41c19cd6cc726..634eba76f91210c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1882,10 +1882,12 @@ class MachineIRBuilder { return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0}); } - /// Build and insert \p Dst = G_FRINT \p Src0, \p Src1 - MachineInstrBuilder buildFRint(const DstOp &Dst, const SrcOp &Src0, - std::optional<unsigned> Flags = std::nullopt) { - return buildInstr(TargetOpcode::G_FRINT, {Dst}, {Src0}, Flags); + /// Build and insert \p Dst = G_INTRINSIC_ROUNDEVEN \p Src0, \p Src1 + MachineInstrBuilder + buildIntrinsicRoundeven(const DstOp &Dst, const SrcOp &Src0, + std::optional<unsigned> Flags = std::nullopt) { + return buildInstr(TargetOpcode::G_INTRINSIC_ROUNDEVEN, {Dst}, {Src0}, + Flags); } /// Build and insert \p Res = G_SMIN \p Op0, \p Op1 diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 108768494ccbb28..8c0c18691a368cb 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3570,10 +3570,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerFFloor(MI); case TargetOpcode::G_INTRINSIC_ROUND: return lowerIntrinsicRound(MI); - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { + case TargetOpcode::G_FRINT: { // Since round even is the assumed rounding mode for unconstrained FP // operations, rint and roundeven are the same operation. - changeOpcode(MI, TargetOpcode::G_FRINT); + changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN); return Legalized; } case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2dfdddad3cc389f..646c8ed2a06e41e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17631,6 +17631,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { case ISD::FRINT: case ISD::FTRUNC: case ISD::FNEARBYINT: + case ISD::FROUNDEVEN: case ISD::FFLOOR: case ISD::FCEIL: return N0; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 3e4bff5ddce1264..f5f698aafcd17e8 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -867,9 +867,8 @@ void TargetLoweringBase::initActions() { setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand); // These library functions default to expand. - setOperationAction( - {ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, - VT, Expand); + setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, VT, + Expand); // These operations default to expand for vector types. if (VT.isVector()) @@ -928,7 +927,7 @@ void TargetLoweringBase::initActions() { setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND, - ISD::LLROUND, ISD::LRINT, ISD::LLRINT}, + ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN}, {MVT::f32, MVT::f64, MVT::f128}, Expand); // Default ISD::TRAP to expand (which turns it into abort). diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b5ceaaa14b4fd5e..ba47d3eabea0e58 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -163,6 +163,7 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const { case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::FROUNDEVEN: case ISD::FROUND: case ISD::FFLOOR: case ISD::FMINNUM: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 607d59db7bcf709..cc16a7d73dfff3b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -329,8 +329,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, // Library functions. These default to Expand, but we have instructions // for them. - setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR, ISD::FRINT, - ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM}, + setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR, + ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM}, MVT::f32, Legal); setOperationAction(ISD::FLOG2, MVT::f32, Custom); @@ -341,7 +341,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom); - setOperationAction(ISD::FROUNDEVEN, {MVT::f16, MVT::f32, MVT::f64}, Custom); + setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom); setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom); @@ -457,14 +457,14 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, for (MVT VT : FloatVectorTypes) { setOperationAction( - {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, - ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2, - ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG, - ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC, - ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT, - ISD::FSQRT, ISD::FSIN, ISD::FSUB, ISD::FNEG, - ISD::VSELECT, ISD::SELECT_CC, ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, - ISD::SETCC, ISD::FCANONICALIZE}, + {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, + ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2, + ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG, + ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC, + ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT, + ISD::FSQRT, ISD::FSIN, ISD::FSUB, ISD::FNEG, + ISD::VSELECT, ISD::SELECT_CC, ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, + ISD::SETCC, ISD::FCANONICALIZE, ISD::FROUNDEVEN}, VT, Expand); } @@ -585,6 +585,7 @@ static bool fnegFoldsIntoOpcode(unsigned Opc) { case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::FROUNDEVEN: case ISD::FCANONICALIZE: case AMDGPUISD::RCP: case AMDGPUISD::RCP_LEGACY: @@ -2368,7 +2369,8 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2); } -SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { +SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op, + SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); @@ -2395,18 +2397,19 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2); } -SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const { +SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, + SelectionDAG &DAG) const { // FNEARBYINT and FRINT are the same, except in their handling of FP // exceptions. Those aren't really meaningful for us, and OpenCL only has // rint, so just treat them as equivalent. - return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0)); + return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), Op.getValueType(), + Op.getOperand(0)); } -SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op, - SelectionDAG &DAG) const { +SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { auto VT = Op.getValueType(); auto Arg = Op.getOperand(0u); - return DAG.getNode(ISD::FRINT, SDLoc(Op), VT, Arg); + return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg); } // XXX - May require not supporting f32 denormals? @@ -2936,7 +2939,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags); } - SDValue E = DAG.getNode(ISD::FRINT, SL, VT, PH, Flags); + SDValue E = DAG.getNode(ISD::FROUNDEVEN, SL, VT, PH, Flags); // It is unsafe to contract this fsub into the PH multiply. SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, FlagsNoContract); @@ -4680,6 +4683,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: // XXX - Should fround be handled? + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FCANONICALIZE: case AMDGPUISD::RCP: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 3d70ed150df12f8..f0bb83b21a3f87a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1076,27 +1076,30 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .lower(); - // Lower roundeven into G_FRINT - getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN}) - .scalarize(0) - .lower(); + // Lower G_FNEARBYINT and G_FRINT into G_INTRINSIC_ROUNDEVEN + getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_FRINT, G_FNEARBYINT}) + .scalarize(0) + .lower(); if (ST.has16BitInsts()) { - getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT}) - .legalFor({S16, S32, S64}) - .clampScalar(0, S16, S64) - .scalarize(0); + getActionDefinitionsBuilder( + {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN}) + .legalFor({S16, S32, S64}) + .clampScalar(0, S16, S64) + .scalarize(0); } else if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { - getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT}) - .legalFor({S32, S64}) - .clampScalar(0, S32, S64) - .scalarize(0); + getActionDefinitionsBuilder( + {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN}) + .legalFor({S32, S64}) + .clampScalar(0, S32, S64) + .scalarize(0); } else { - getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT}) - .legalFor({S32}) - .customFor({S64}) - .clampScalar(0, S32, S64) - .scalarize(0); + getActionDefinitionsBuilder( + {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN}) + .legalFor({S32}) + .customFor({S64}) + .clampScalar(0, S32, S64) + .scalarize(0); } getActionDefinitionsBuilder(G_PTR_ADD) @@ -1980,8 +1983,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper, switch (MI.getOpcode()) { case TargetOpcode::G_ADDRSPACE_CAST: return legalizeAddrSpaceCast(MI, MRI, B); - case TargetOpcode::G_FRINT: - return legalizeFrint(MI, MRI, B); + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + return legalizeFroundeven(MI, MRI, B); case TargetOpcode::G_FCEIL: return legalizeFceil(MI, MRI, B); case TargetOpcode::G_FREM: @@ -2286,9 +2289,9 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( return true; } -bool AMDGPULegalizerInfo::legalizeFrint( - MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { +bool AMDGPULegalizerInfo::legalizeFroundeven(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { Register Src = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(Src); assert(Ty.isScalar() && Ty.getSizeInBits() == 64); @@ -3567,7 +3570,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, PL = getMad(B, Ty, XH.getReg(0), CL.getReg(0), Mad0, Flags); } - auto E = B.buildFRint(Ty, PH, Flags); + auto E = B.buildIntrinsicRoundeven(Ty, PH, Flags); // It is unsafe to contract this fsub into the PH multiply. auto PHSubE = B.buildFSub(Ty, PH, E, FlagsNoContract); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index b90fb76a4ccda1a..855fa0ddc214fe8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -43,8 +43,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; - bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) const; + bool legalizeFroundeven(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; bool legalizeFrem(MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 5b056bd9e5dba2c..2f7b42d87a165a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3718,7 +3718,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_FMA: case AMDGPU::G_FFLOOR: case AMDGPU::G_FCEIL: - case AMDGPU::G_FRINT: + case AMDGPU::G_INTRINSIC_ROUNDEVEN: case AMDGPU::G_FMINNUM: case AMDGPU::G_FMAXNUM: case AMDGPU::G_INTRINSIC_TRUNC: diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index ad072cfe23b17d9..c1ba9c514874eb7 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -101,7 +101,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSUB, MVT::f32, Expand); - setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR}, + setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR}, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom); diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index f4dfbe8adc75d25..f82bd55beccc0ce 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -782,7 +782,7 @@ def SETNE_DX10 : R600_2OP < def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>; def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; -def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; +def RNDNE : R600_1OP_Helper <0x13, "RNDNE", froundeven>; def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; def MOV : R600_1OP <0x19, "MOV", []>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f8f1e6d6c9097cc..b6648249caa166f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -479,9 +479,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, {MVT::f32, MVT::f64}, Legal); if (Subtarget->haveRoundOpsF64()) - setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FRINT}, MVT::f64, Legal); + setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FROUNDEVEN}, MVT::f64, + Legal); else - setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR}, + setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR}, MVT::f64, Custom); setOperationAction(ISD::FFLOOR, MVT::f64, Legal); diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index f3309049e7a7fd9..375cabc0249c31d 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -419,7 +419,7 @@ let SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE], def S_CEIL_F32 : SOP1_F32_Inst<"s_ceil_f32", fceil>; def S_FLOOR_F32 : SOP1_F32_Inst<"s_floor_f32", ffloor>; def S_TRUNC_F32 : SOP1_F32_Inst<"s_trunc_f32", ftrunc>; - def S_RNDNE_F32 : SOP1_F32_Inst<"s_rndne_f32", frint>; + def S_RNDNE_F32 : SOP1_F32_Inst<"s_rndne_f32", froundeven>; let FPDPRounding = 1 in def S_CVT_F16_F32 : SOP1_F32_Inst<"s_cvt_f16_f32", fpround, f16, f32>; @@ -427,7 +427,7 @@ let SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE], def S_CEIL_F16 : SOP1_F32_Inst<"s_ceil_f16", fceil, f16>; def S_FLOOR_F16 : SOP1_F32_Inst<"s_floor_f16", ffloor, f16>; def S_TRUNC_F16 : SOP1_F32_Inst<"s_trunc_f16", ftrunc, f16>; - def S_RNDNE_F16 : SOP1_F32_Inst<"s_rndne_f16", frint, f16>; + def S_RNDNE_F16 : SOP1_F32_Inst<"s_rndne_f16", froundeven, f16>; } // End mayRaiseFPException = 1 } // End SubtargetPredicate = HasSALUFloatInsts, Uses = [MODE] // SchedRW = [WriteSFPU], isReMaterializable = 1 diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 734db326fb77ddd..317d636c886d0e3 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -317,7 +317,7 @@ defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f3 defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; -defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; +defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, froundeven>; defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; let TRANS = 1, SchedRW = [WriteTrans32] in { @@ -458,7 +458,7 @@ let SubtargetPredicate = isGFX7Plus in { let SchedRW = [WriteDoubleAdd] in { defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; - defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>; + defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, froundeven>; defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; } // End SchedRW = [WriteDoubleAdd] } // End SubtargetPredicate = isGFX7Plus @@ -502,7 +502,7 @@ defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_ defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>; defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>; defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>; -defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, frint>; +defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, froundeven>; let FPDPRounding = 1 in { defm V_FRACT_F16 : VOP1Inst_t16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; } // End FPDPRounding = 1 diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 20e0210bcec5b6a..882ff380140a5f6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -796,6 +796,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FTRUNC, MVT::f80, Expand); setOperationAction(ISD::FRINT, MVT::f80, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); + setOperationAction(ISD::FROUNDEVEN, MVT::f80, Expand); setOperationAction(ISD::FMA, MVT::f80, Expand); setOperationAction(ISD::LROUND, MVT::f80, Expand); setOperationAction(ISD::LLROUND, MVT::f80, Expand); @@ -929,6 +930,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); + setOperationAction(ISD::FROUNDEVEN, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); @@ -2111,6 +2113,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); + setOperationAction(ISD::FROUNDEVEN, VT, Legal); + setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); setOperationAction(ISD::FROUND, VT, Custom); @@ -43796,6 +43800,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG, case ISD::FCEIL: case ISD::FTRUNC: case ISD::FNEARBYINT: + case ISD::FROUNDEVEN: case ISD::FROUND: case ISD::FFLOOR: case X86ISD::FRCP: diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fround.ll b/llvm/test/Analysis/CostModel/AMDGPU/fround.ll index c4dd524efdd8447..88adabda0bfbb6e 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fround.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fround.ll @@ -142,21 +142,21 @@ define i32 @nearbyint(i32 %arg) { define i32 @rint(i32 %arg) { ; FAST-LABEL: 'rint' -; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'rint' -; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) @@ -164,21 +164,21 @@ define i32 @rint(i32 %arg) { ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; FAST-SIZE-LABEL: 'rint' -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) ; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLOW-SIZE-LABEL: 'rint' -; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) -; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) -; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) -; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef) ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) @@ -198,6 +198,64 @@ define i32 @rint(i32 %arg) { ret i32 undef } +define i32 @roundeven(i32 %arg) { +; FAST-LABEL: 'roundeven' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.roundeven.f64(double undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'roundeven' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.roundeven.f64(double undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'roundeven' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.roundeven.f64(double undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'roundeven' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.roundeven.f64(double undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.roundeven.f32(float undef) + %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef) + + %F64 = call double @llvm.roundeven.f64(double undef) + %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef) + + ret i32 undef +} + define i32 @trunc(i32 %arg) { ; FAST-LABEL: 'trunc' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) @@ -296,6 +354,16 @@ declare <2 x double> @llvm.rint.v2f64(<2 x double>) declare <4 x double> @llvm.rint.v4f64(<4 x double>) declare <8 x double> @llvm.rint.v8f64(<8 x double>) +declare float @llvm.roundeven.f32(float) +declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) +declare <8 x float> @llvm.roundeven.v8f32(<8 x float>) +declare <16 x float> @llvm.roundeven.v16f32(<16 x float>) + +declare double @llvm.roundeven.f64(double) +declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) +declare <4 x double> @llvm.roundeven.v4f64(<4 x double>) +declare <8 x double> @llvm.roundeven.v8f64(<8 x double>) + declare float @llvm.trunc.f32(float) declare <4 x float> @llvm.trunc.v4f32(<4 x float>) declare <8 x float> @llvm.trunc.v8f32(<8 x float>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir deleted file mode 100644 index aaed64f95b08c9d..000000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir +++ /dev/null @@ -1,110 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=bonaire -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s - ---- -name: frint_s32_vv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0 - - ; GCN-LABEL: name: frint_s32_vv - ; GCN: liveins: $vgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FRINT %0 - $vgpr0 = COPY %1 -... - ---- -name: frint_s32_vs -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $sgpr0 - - ; GCN-LABEL: name: frint_s32_vs - ; GCN: liveins: $sgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %1 - %0:sgpr(s32) = COPY $sgpr0 - %1:vgpr(s32) = G_FRINT %0 - $vgpr0 = COPY %1 -... - ---- -name: frint_fneg_s32_vv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0 - - ; GCN-LABEL: name: frint_fneg_s32_vv - ; GCN: liveins: $vgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = G_FNEG %0 - %2:vgpr(s32) = G_FRINT %1 - $vgpr0 = COPY %2 -... - ---- -name: frint_s64_vv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; GCN-LABEL: name: frint_s64_vv - ; GCN: liveins: $vgpr0_vgpr1 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %1:vreg_64 = nofpexcept V_RNDNE_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %1 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FRINT %0 - $vgpr0_vgpr1 = COPY %1 -... - ---- -name: frint_s64_fneg_vv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; GCN-LABEL: name: frint_s64_fneg_vv - ; GCN: liveins: $vgpr0_vgpr1 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: %2:vreg_64 = nofpexcept V_RNDNE_F64_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0_vgpr1 = COPY %2 - %0:vgpr(s64) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_FNEG %0 - %2:vgpr(s64) = G_FRINT %1 - $vgpr0_vgpr1 = COPY %2 -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir deleted file mode 100644 index 0e6a3ccacd16810..000000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir +++ /dev/null @@ -1,97 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s - ---- -name: frint_s16_ss -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $sgpr0 - - ; GCN-LABEL: name: frint_s16_ss - ; GCN: liveins: $sgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GCN-NEXT: [[FRINT:%[0-9]+]]:sreg_32(s16) = G_FRINT [[TRUNC]] - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FRINT]](s16) - ; GCN-NEXT: $sgpr0 = COPY [[COPY1]](s32) - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:sgpr(s16) = G_FRINT %1 - %3:sgpr(s32) = G_ANYEXT %2 - $sgpr0 = COPY %3 -... - ---- -name: frint_s16_vv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0 - - ; GCN-LABEL: name: frint_s16_vv - ; GCN: liveins: $vgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FRINT %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 -... - ---- -name: frint_s16_vs -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $sgpr0 - - ; GCN-LABEL: name: frint_s16_vs - ; GCN: liveins: $sgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %2 - %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FRINT %1 - %3:vgpr(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 -... - ---- -name: frint_fneg_s16_vv -legalized: true -regBankSelected: true -tracksRegLiveness: true - -body: | - bb.0: - liveins: $vgpr0 - - ; GCN-LABEL: name: frint_fneg_s16_vv - ; GCN: liveins: $vgpr0 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_RNDNE_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %3 - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s16) = G_TRUNC %0 - %2:vgpr(s16) = G_FNEG %1 - %3:vgpr(s16) = G_FRINT %2 - %4:vgpr(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir index bb86413964098ba..4e2f33b9505d076 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-scalar-float-sop1.mir @@ -212,7 +212,7 @@ body: | ; GFX1150-NEXT: %1:sreg_32 = nofpexcept S_RNDNE_F32 [[COPY]], implicit $mode ; GFX1150-NEXT: $sgpr0 = COPY %1 %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_FRINT %0 + %1:sgpr(s32) = G_INTRINSIC_ROUNDEVEN %0 $sgpr0 = COPY %1(s32) ... @@ -295,7 +295,7 @@ body: | ; GFX1150-NEXT: $sgpr0 = COPY %2 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0(s32) - %2:sgpr(s16) = G_FRINT %1 + %2:sgpr(s16) = G_INTRINSIC_ROUNDEVEN %1 %3:sgpr(s32) = G_ANYEXT %2(s16) $sgpr0 = COPY %3(s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir index 7bcd4e9d9cf67b6..1b675a83307e866 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir @@ -21,10 +21,10 @@ body: | ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C]], [[FNEG]] ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C1]], [[FMA]] - ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -52,10 +52,10 @@ body: | ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]] ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]] ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]] - ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]] - ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -78,10 +78,10 @@ body: | ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C]], [[FNEG]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[C1]], [[FMA]] - ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -115,10 +115,10 @@ body: | ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C]], [[FNEG]] ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C1]], [[FMA]] - ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = nnan G_FRINT [[FMUL]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[FRINT]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB]], [[FMA1]] - ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -146,10 +146,10 @@ body: | ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL2]], [[FMUL1]] ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = nnan G_FMUL [[AND]], [[C2]] ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL3]], [[FADD]] - ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = nnan G_FRINT [[FMUL]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[FRINT]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB1]], [[FADD1]] - ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -172,10 +172,10 @@ body: | ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C]], [[FNEG]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[COPY]], [[C1]], [[FMA]] - ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = nnan G_FRINT [[FMUL]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[FRINT]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = nnan G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nnan G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FSUB]], [[FMA1]] - ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = nnan G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -210,10 +210,10 @@ body: | ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] - ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -229,10 +229,10 @@ body: | ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] ; GFX6-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] ; GFX6-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] - ; GFX6-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]] - ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] - ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) + ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] @@ -258,10 +258,10 @@ body: | ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]] ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]] ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]] - ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]] - ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -281,10 +281,10 @@ body: | ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL5]] ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C2]] ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FADD3]] - ; GFX8-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL4]] - ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[FRINT1]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL4]] + ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[INTRINSIC_ROUNDEVEN1]] ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FSUB3]], [[FADD4]] - ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) + ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD5]](s32) ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C3]] @@ -305,10 +305,10 @@ body: | ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] - ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -324,10 +324,10 @@ body: | ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] - ; GFX9-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] - ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) + ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) ; GFX9-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] @@ -359,10 +359,10 @@ body: | ; GFX6-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 ; GFX6-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] - ; GFX6-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] - ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX6-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) ; GFX6-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -378,10 +378,10 @@ body: | ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] ; GFX6-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] ; GFX6-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] - ; GFX6-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]] - ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] - ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) + ; GFX6-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) ; GFX6-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] @@ -393,10 +393,10 @@ body: | ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL2]] ; GFX6-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C]], [[FNEG2]] ; GFX6-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C1]], [[FMA4]] - ; GFX6-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FMUL2]] - ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[FRINT2]] + ; GFX6-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL2]] + ; GFX6-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[INTRINSIC_ROUNDEVEN2]] ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB2]], [[FMA5]] - ; GFX6-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT2]](s32) + ; GFX6-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](s32) ; GFX6-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) ; GFX6-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32) ; GFX6-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C2]] @@ -422,10 +422,10 @@ body: | ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[FMUL1]] ; GFX8-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[AND]], [[C2]] ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FADD]] - ; GFX8-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] - ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FADD1]] - ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX8-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) ; GFX8-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -445,10 +445,10 @@ body: | ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL6]], [[FMUL5]] ; GFX8-NEXT: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[AND1]], [[C2]] ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FADD3]] - ; GFX8-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL4]] - ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[FRINT1]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL4]] + ; GFX8-NEXT: [[FSUB3:%[0-9]+]]:_(s32) = G_FSUB [[FMUL4]], [[INTRINSIC_ROUNDEVEN1]] ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FSUB3]], [[FADD4]] - ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) + ; GFX8-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD5]](s32) ; GFX8-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C3]] @@ -464,10 +464,10 @@ body: | ; GFX8-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL10]], [[FMUL9]] ; GFX8-NEXT: [[FMUL11:%[0-9]+]]:_(s32) = G_FMUL [[AND2]], [[C2]] ; GFX8-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[FMUL11]], [[FADD6]] - ; GFX8-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FMUL8]] - ; GFX8-NEXT: [[FSUB5:%[0-9]+]]:_(s32) = G_FSUB [[FMUL8]], [[FRINT2]] + ; GFX8-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL8]] + ; GFX8-NEXT: [[FSUB5:%[0-9]+]]:_(s32) = G_FSUB [[FMUL8]], [[INTRINSIC_ROUNDEVEN2]] ; GFX8-NEXT: [[FADD8:%[0-9]+]]:_(s32) = G_FADD [[FSUB5]], [[FADD7]] - ; GFX8-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT2]](s32) + ; GFX8-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](s32) ; GFX8-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD8]](s32) ; GFX8-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32) ; GFX8-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C3]] @@ -488,10 +488,10 @@ body: | ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C]], [[FNEG]] ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3E54AE0BE0000000 ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[C1]], [[FMA]] - ; GFX9-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FMUL]] - ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[FRINT]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FMUL]], [[INTRINSIC_ROUNDEVEN]] ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FSUB]], [[FMA1]] - ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT]](s32) + ; GFX9-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN]](s32) ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD]](s32) ; GFX9-NEXT: [[FLDEXP:%[0-9]+]]:_(s32) = G_FLDEXP [[INT]], [[FPTOSI]](s32) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC059D1DA00000000 @@ -507,10 +507,10 @@ body: | ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FMUL1]] ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C]], [[FNEG1]] ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[C1]], [[FMA2]] - ; GFX9-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FMUL1]] - ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[FRINT1]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL1]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[FMUL1]], [[INTRINSIC_ROUNDEVEN1]] ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FSUB1]], [[FMA3]] - ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT1]](s32) + ; GFX9-NEXT: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN1]](s32) ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD1]](s32) ; GFX9-NEXT: [[FLDEXP1:%[0-9]+]]:_(s32) = G_FLDEXP [[INT1]], [[FPTOSI1]](s32) ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV1]](s32), [[C2]] @@ -522,10 +522,10 @@ body: | ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FMUL2]] ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C]], [[FNEG2]] ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[C1]], [[FMA4]] - ; GFX9-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FMUL2]] - ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[FRINT2]] + ; GFX9-NEXT: [[INTRINSIC_ROUNDEVEN2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FMUL2]] + ; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s32) = G_FSUB [[FMUL2]], [[INTRINSIC_ROUNDEVEN2]] ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FSUB2]], [[FMA5]] - ; GFX9-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[FRINT2]](s32) + ; GFX9-NEXT: [[FPTOSI2:%[0-9]+]]:_(s32) = G_FPTOSI [[INTRINSIC_ROUNDEVEN2]](s32) ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.exp2), [[FADD2]](s32) ; GFX9-NEXT: [[FLDEXP2:%[0-9]+]]:_(s32) = G_FLDEXP [[INT2]], [[FPTOSI2]](s32) ; GFX9-NEXT: [[FCMP4:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[UV2]](s32), [[C2]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir deleted file mode 100644 index b208c1283f34b49..000000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir +++ /dev/null @@ -1,220 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=legalizer -o - %s | FileCheck -check-prefix=CI %s - ---- -name: test_frint_s16 -body: | - bb.0: - liveins: $vgpr0 - - ; SI-LABEL: name: test_frint_s16 - ; SI: liveins: $vgpr0 - ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CI-LABEL: name: test_frint_s16 - ; CI: liveins: $vgpr0 - ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]] - ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32) - ; CI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; CI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s16) = G_TRUNC %0 - %2:_(s16) = G_FRINT %1 - %3:_(s32) = G_ANYEXT %2 - $vgpr0 = COPY %3 -... - ---- -name: test_frint_s32 -body: | - bb.0: - liveins: $vgpr0 - - ; SI-LABEL: name: test_frint_s32 - ; SI: liveins: $vgpr0 - ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[COPY]] - ; SI-NEXT: $vgpr0 = COPY [[FRINT]](s32) - ; CI-LABEL: name: test_frint_s32 - ; CI: liveins: $vgpr0 - ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[COPY]] - ; CI-NEXT: $vgpr0 = COPY [[FRINT]](s32) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FRINT %0 - $vgpr0 = COPY %1 -... - ---- -name: test_frint_s64 -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; SI-LABEL: name: test_frint_s64 - ; SI: liveins: $vgpr0_vgpr1 - ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4841369599423283200 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C1]], [[AND]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[OR]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[OR]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FADD]], [[FNEG]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x432FFFFFFFFFFFFF - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s64), [[C2]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[COPY]], [[FADD1]] - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) - ; CI-LABEL: name: test_frint_s64 - ; CI: liveins: $vgpr0_vgpr1 - ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s64) = G_FRINT [[COPY]] - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[FRINT]](s64) - %0:_(s64) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_FRINT %0 - $vgpr0_vgpr1 = COPY %1 -... - ---- -name: test_frint_v2s16 -body: | - bb.0: - liveins: $vgpr0 - - ; SI-LABEL: name: test_frint_v2s16 - ; SI: liveins: $vgpr0 - ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]] - ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32) - ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT1]] - ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT1]](s32) - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; CI-LABEL: name: test_frint_v2s16 - ; CI: liveins: $vgpr0 - ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]] - ; CI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32) - ; CI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; CI-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT1]] - ; CI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT1]](s32) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = G_FRINT %0 - $vgpr0 = COPY %1 -... - ---- -name: test_frint_v2s32 -body: | - bb.0: - liveins: $vgpr0_vgpr1 - - ; SI-LABEL: name: test_frint_v2s32 - ; SI: liveins: $vgpr0_vgpr1 - ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[UV]] - ; SI-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[UV1]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FRINT]](s32), [[FRINT1]](s32) - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CI-LABEL: name: test_frint_v2s32 - ; CI: liveins: $vgpr0_vgpr1 - ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[UV]] - ; CI-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[UV1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FRINT]](s32), [[FRINT1]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_FRINT %0 - $vgpr0_vgpr1 = COPY %1 -... - ---- -name: test_frint_v2s64 -body: | - bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3 - - ; SI-LABEL: name: test_frint_v2s64 - ; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: {{ $}} - ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4841369599423283200 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C]] - ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C1]], [[AND]] - ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[OR]] - ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[OR]] - ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FADD]], [[FNEG]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x432FFFFFFFFFFFFF - ; SI-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[UV]] - ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s64), [[C2]] - ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[UV]], [[FADD1]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C]] - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C1]], [[AND1]] - ; SI-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[OR1]] - ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[OR1]] - ; SI-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[FADD2]], [[FNEG1]] - ; SI-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] - ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS1]](s64), [[C2]] - ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[UV1]], [[FADD3]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - ; CI-LABEL: name: test_frint_v2s64 - ; CI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 - ; CI-NEXT: {{ $}} - ; CI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CI-NEXT: [[FRINT:%[0-9]+]]:_(s64) = G_FRINT [[UV]] - ; CI-NEXT: [[FRINT1:%[0-9]+]]:_(s64) = G_FRINT [[UV1]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FRINT]](s64), [[FRINT1]](s64) - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) - %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s64>) = G_FRINT %0 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir deleted file mode 100644 index 9f4383bf30c2a33..000000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frint.mir +++ /dev/null @@ -1,36 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s - ---- -name: frint_s -legalized: true - -body: | - bb.0: - liveins: $sgpr0 - ; CHECK-LABEL: name: frint_s - ; CHECK: liveins: $sgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[FRINT:%[0-9]+]]:vgpr(s32) = G_FRINT [[COPY1]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_FRINT %0 -... - ---- -name: frint_v -legalized: true - -body: | - bb.0: - liveins: $vgpr0 - ; CHECK-LABEL: name: frint_v - ; CHECK: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[FRINT:%[0-9]+]]:vgpr(s32) = G_FRINT [[COPY]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_FRINT %0 -... _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits