https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/86783
>From b886dcf2da25417d9f8cd75ff4aa58686e35139d Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Wed, 27 Mar 2024 17:11:04 +0800 Subject: [PATCH 1/4] [PowerPC] Implement 32-bit expansion for rldimi rldimi is 64-bit instruction, due to backward compatibility, it needs to be expanded into series of rlwimi in 32-bit environment. In the future, we may improve bit permutation selector and remove such direct codegen. --- clang/lib/Sema/SemaChecking.cpp | 1 - llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 109 ++++-- llvm/test/CodeGen/PowerPC/rldimi.ll | 366 ++++++++++++++++++++ 3 files changed, 454 insertions(+), 22 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 11401b6f56c0ea..d2cbe5417d682d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5236,7 +5236,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: - case PPC::BI__builtin_ppc_rldimi: return true; } return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7436b202fba0d9..3281a0dfd08729 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -643,6 +643,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); @@ -10757,6 +10758,88 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, return true; } +static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME) { + assert(SH < 32 && MB < 32 && ME < 32 && + "Invalid argument for rotate insert!"); + return SDValue( + DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32, + {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32), + DAG.getTargetConstant(MB, Loc, MVT::i32), + DAG.getTargetConstant(ME, Loc, MVT::i32)}), + 0); +} + +static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, + SDValue Src, unsigned SH, unsigned MB, + unsigned ME, bool IsPPC64) { + assert(SH < 64 && MB < 64 && ME < 64 && + "Invalid argument for rotate insert!"); + if (IsPPC64) { + // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. + if (ME < 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, + DAG.getConstant(ME + SH + 1, Loc, MVT::i32)); + } else if (ME > 63 - SH) { + Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, + DAG.getConstant(ME + SH - 63, Loc, MVT::i32)); + } + return SDValue(DAG.getMachineNode( + PPC::RLDIMI, Loc, MVT::i64, + {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32), + DAG.getTargetConstant(MB, Loc, MVT::i32)}), + 0); + } + + // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH + // is adjusted to simplify cases. Invalid ranges will be skipped. + // - SrcHi inserted into DstHi with [0, 32-SH) + // - SrcLo inserted into DstHi with [32-SH, 32) + // - SrcHi inserted into DstLo with [32, 64-SH) + // - SrcLo inserted into DstLo with [64-SH, 64) + auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32); + auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32); + if (SH >= 32) { + SH -= 32; + std::swap(SrcLo, SrcHi); + } + auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right, + SDValue Src, SDValue Dst, unsigned MB, + unsigned ME) { + if (Left > Right) + return Dst; + + if (MB <= ME) { + if (MB <= Right && ME >= Left) + return getRotateInsert32(DAG, Loc, Dst, Src, SH, + std::max(MB, Left) % 32, + std::min(ME, Right) % 32); + } else { + if (MB < Left || ME > Right) + return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32); + + if (MB <= Right && ME < Left) + return getRotateInsert32(DAG, Loc, Dst, Src, SH, MB % 32, Right % 32); + + if (MB <= Right && ME <= Right) + return getRotateInsert32( + DAG, Loc, + getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32), Src, + SH, MB % 32, Right % 32); + + if (MB > Right && ME >= Left && ME <= Right) + return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32); + } + return Dst; + }; + DstHi = GetSubInsert(0, 31 - SH, SrcHi, DstHi, MB, ME); + DstHi = GetSubInsert(32 - SH, 31, SrcLo, DstHi, MB, ME); + DstLo = GetSubInsert(32, 63 - SH, SrcLo, DstLo, MB, ME); + DstLo = GetSubInsert(64 - SH, 63, SrcHi, DstLo, MB, ME); + return DAG.getNode(ISD::BUILD_PAIR, Loc, MVT::i64, DstLo, DstHi); +} + /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, @@ -10773,7 +10856,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getRegister(PPC::R2, MVT::i32); case Intrinsic::ppc_rldimi: { - assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!"); SDValue Src = Op.getOperand(1); APInt Mask = Op.getConstantOperandAPInt(4); if (Mask.isZero()) @@ -10784,20 +10866,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, unsigned MB = 0, ME = 0; if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME)) report_fatal_error("invalid rldimi mask!"); - // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. - if (ME < 63 - SH) { - Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, - DAG.getConstant(ME + SH + 1, dl, MVT::i32)); - } else if (ME > 63 - SH) { - Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, - DAG.getConstant(ME + SH - 63, dl, MVT::i32)); - } - return SDValue( - DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64, - {Op.getOperand(2), Src, - DAG.getTargetConstant(63 - ME, dl, MVT::i32), - DAG.getTargetConstant(MB, dl, MVT::i32)}), - 0); + return getRotateInsert64(DAG, dl, Op.getOperand(2), Op.getOperand(1), SH, + MB, ME, Subtarget.isPPC64()); } case Intrinsic::ppc_rlwimi: { @@ -10810,12 +10880,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, unsigned MB = 0, ME = 0; if (!isRunOfOnes(Mask.getZExtValue(), MB, ME)) report_fatal_error("invalid rlwimi mask!"); - return SDValue(DAG.getMachineNode( - PPC::RLWIMI, dl, MVT::i32, - {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3), - DAG.getTargetConstant(MB, dl, MVT::i32), - DAG.getTargetConstant(ME, dl, MVT::i32)}), - 0); + return getRotateInsert32(DAG, dl, Op.getOperand(2), Op.getOperand(1), + Op.getConstantOperandVal(3), MB, ME); } case Intrinsic::ppc_rlwnm: { @@ -11833,6 +11899,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, case Intrinsic::ppc_maxfe: case Intrinsic::ppc_minfe: case Intrinsic::ppc_fnmsub: + case Intrinsic::ppc_rldimi: case Intrinsic::ppc_convert_f128_to_ppcf128: Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG)); break; diff --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll index 78ea9aa862f2c2..7495c5d66dbbb3 100644 --- a/llvm/test/CodeGen/PowerPC/rldimi.ll +++ b/llvm/test/CodeGen/PowerPC/rldimi.ll @@ -1,12 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-ibm-aix -mcpu=pwr8 | FileCheck %s --check-prefix=32BIT define i64 @rldimi1(i64 %a) { ; CHECK-LABEL: rldimi1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: rldimi 3, 3, 8, 0 ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi1: +; 32BIT: # %bb.0: # %entry +; 32BIT-NEXT: rotlwi 5, 4, 8 +; 32BIT-NEXT: rlwimi 4, 4, 8, 0, 23 +; 32BIT-NEXT: rlwimi 5, 3, 8, 0, 23 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: blr entry: %x0 = shl i64 %a, 8 %x1 = and i64 %a, 255 @@ -23,6 +32,18 @@ define i64 @rldimi2(i64 %a) { ; CHECK-NEXT: rldimi 4, 3, 24, 0 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi2: +; 32BIT: # %bb.0: # %entry +; 32BIT-NEXT: mr 6, 4 +; 32BIT-NEXT: rotlwi 5, 4, 24 +; 32BIT-NEXT: rlwimi 6, 4, 8, 16, 23 +; 32BIT-NEXT: rlwimi 5, 3, 24, 0, 7 +; 32BIT-NEXT: rlwimi 6, 4, 16, 8, 15 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: rlwimi 6, 4, 24, 0, 7 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr entry: %x0 = shl i64 %a, 8 %x1 = and i64 %a, 255 @@ -46,6 +67,15 @@ define i64 @rldimi3(i64 %a) { ; CHECK-NEXT: rldimi 4, 3, 56, 0 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi3: +; 32BIT: # %bb.0: # %entry +; 32BIT-NEXT: mr 3, 4 +; 32BIT-NEXT: rlwimi 3, 4, 8, 16, 23 +; 32BIT-NEXT: rlwimi 3, 4, 16, 8, 15 +; 32BIT-NEXT: rlwimi 3, 4, 24, 0, 7 +; 32BIT-NEXT: mr 4, 3 +; 32BIT-NEXT: blr entry: %0 = shl i64 %a, 8 %1 = and i64 %a, 255 @@ -66,6 +96,17 @@ define i64 @rldimi4(i64 %a) { ; CHECK-NEXT: rldimi 3, 3, 16, 0 ; CHECK-NEXT: rldimi 3, 3, 32, 0 ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi4: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 3, 3, 8, 0, 23 +; 32BIT-NEXT: rlwimi 3, 4, 8, 24, 31 +; 32BIT-NEXT: rlwimi 4, 4, 8, 0, 23 +; 32BIT-NEXT: rlwimi 3, 3, 16, 0, 15 +; 32BIT-NEXT: rlwimi 3, 4, 16, 16, 31 +; 32BIT-NEXT: rlwimi 4, 4, 16, 0, 15 +; 32BIT-NEXT: rlwimi 3, 4, 0, 0, 31 +; 32BIT-NEXT: blr %r1 = call i64 @llvm.ppc.rldimi(i64 %a, i64 %a, i32 8, i64 -256) %r2 = call i64 @llvm.ppc.rldimi(i64 %r1, i64 %r1, i32 16, i64 -65536) %r3 = call i64 @llvm.ppc.rldimi(i64 %r2, i64 %r2, i32 32, i64 -4294967296) @@ -78,6 +119,13 @@ define i64 @rldimi5(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 8, 40 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi5: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 6, 4, 8, 8, 23 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 16776960) ; 0xffff << 8 ret i64 %r } @@ -89,6 +137,14 @@ define i64 @rldimi6(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 7, 41 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi6: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 6, 4, 8, 9, 23 +; 32BIT-NEXT: rlwimi 6, 3, 8, 24, 24 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 8388480) ; 0xffff << 7 ret i64 %r } @@ -100,6 +156,13 @@ define i64 @rldimi7(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 9, 39 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi7: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 6, 4, 8, 7, 22 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 33553920) ; 0xffff << 9 ret i64 %r } @@ -109,6 +172,12 @@ define i64 @rldimi8(i64 %a, i64 %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi8: +; 32BIT: # %bb.0: +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 0) ret i64 %r } @@ -118,6 +187,12 @@ define i64 @rldimi9(i64 %a, i64 %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi9: +; 32BIT: # %bb.0: +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 63, i64 0) ret i64 %r } @@ -126,6 +201,10 @@ define i64 @rldimi10(i64 %a, i64 %b) { ; CHECK-LABEL: rldimi10: ; CHECK: # %bb.0: ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi10: +; 32BIT: # %bb.0: +; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 -1) ret i64 %r } @@ -135,8 +214,295 @@ define i64 @rldimi11(i64 %a, i64 %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: rotldi 3, 3, 8 ; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi11: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rotlwi 5, 4, 8 +; 32BIT-NEXT: rotlwi 6, 3, 8 +; 32BIT-NEXT: rlwimi 5, 3, 8, 0, 23 +; 32BIT-NEXT: rlwimi 6, 4, 8, 0, 23 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 -1) ret i64 %r } +define i64 @rldimi12(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi12: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 20 +; CHECK-NEXT: rldimi 4, 3, 44, 31 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi12: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 6, 4, 0, 0, 31 +; 32BIT-NEXT: rlwimi 5, 3, 0, 0, 19 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: rlwimi 3, 5, 0, 0, 30 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 18446726490113441791) + ret i64 %r +} + +define i64 @rldimi13(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi13: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 62 +; CHECK-NEXT: rldimi 4, 3, 32, 2 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi13: +; 32BIT: # %bb.0: +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: rlwimi 3, 4, 30, 2, 31 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 30, i64 4611686014132420608) + ret i64 %r +} + +define i64 @rldimi14(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi14: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 23 +; CHECK-NEXT: rldimi 4, 3, 53, 0 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi14: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874454810624) ; mb=0, me=10 + ret i64 %r +} + +define i64 @rldimi15(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi15: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 36 +; CHECK-NEXT: rldimi 4, 3, 40, 10 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi15: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 5, 3, 12, 10, 19 +; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 23 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18013298997854208) ; mb=10, me=23 + ret i64 %r +} + +define i64 @rldimi16(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi16: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 57 +; CHECK-NEXT: rldimi 4, 3, 19, 10 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi16: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 5, 3, 12, 10, 19 +; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 12 +; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 31 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18014398508957696) ; mb=10, me=44 + ret i64 %r +} + +define i64 @rldimi17(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi17: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 43 +; CHECK-NEXT: rldimi 4, 3, 33, 25 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi17: +; 32BIT: # %bb.0: +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: rlwimi 3, 4, 12, 25, 30 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 541165879296) ; mb=25, me=30 + ret i64 %r +} + +define i64 @rldimi18(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi18: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 57 +; CHECK-NEXT: rldimi 4, 3, 19, 25 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi18: +; 32BIT: # %bb.0: +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 12 +; 32BIT-NEXT: rlwimi 3, 4, 12, 25, 31 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 549755289600) ; mb=25, me=44 + ret i64 %r +} + +define i64 @rldimi19(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi19: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 57 +; CHECK-NEXT: rldimi 4, 3, 19, 33 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi19: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 6, 4, 12, 1, 12 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 2146959360) ; mb=33, me=44 + ret i64 %r +} + +define i64 @rldimi20(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi20: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 23 +; CHECK-NEXT: rldimi 4, 3, 53, 15 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi20: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10 +; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 19 +; 32BIT-NEXT: rlwimi 5, 3, 12, 15, 19 +; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 +; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 31 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18438299824408231935) ; mb=15, me=10 + ret i64 %r +} + +define i64 @rldimi21(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi21: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 23 +; CHECK-NEXT: rldimi 4, 3, 53, 25 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi21: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10 +; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 19 +; 32BIT-NEXT: rlwimi 5, 4, 12, 25, 31 +; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437737424210624511) ; mb=25, me=10 + ret i64 %r +} + +define i64 @rldimi22(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi22: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 34 +; CHECK-NEXT: rldimi 4, 3, 42, 25 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi22: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 19 +; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 19 +; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 21 +; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 +; 32BIT-NEXT: rlwimi 5, 4, 12, 25, 31 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446740225418854399) ; mb=25, me=21 + ret i64 %r +} + +define i64 @rldimi23(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi23: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 23 +; CHECK-NEXT: rldimi 4, 3, 53, 44 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi23: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 6, 4, 12, 12, 19 +; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10 +; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874455859199) ; mb=44, me=10 + ret i64 %r +} + +define i64 @rldimi24(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi24: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 38 +; CHECK-NEXT: rldimi 4, 3, 38, 44 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi24: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 19 +; 32BIT-NEXT: rlwimi 6, 4, 12, 12, 19 +; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 25 +; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446743798832693247) ; mb=44, me=25 + ret i64 %r +} + +define i64 @rldimi25(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi25: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 48 +; CHECK-NEXT: rldimi 4, 3, 28, 44 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr +; +; 32BIT-LABEL: rldimi25: +; 32BIT: # %bb.0: +; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 3 +; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 19 +; 32BIT-NEXT: rlwimi 6, 4, 12, 12, 19 +; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 31 +; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 +; 32BIT-NEXT: mr 3, 5 +; 32BIT-NEXT: mr 4, 6 +; 32BIT-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446744073442164735) ; mb=44, me=35 + ret i64 %r +} + declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg) >From 518eaadaef88245be88ffd5c6563081eeeb73fda Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Tue, 2 Apr 2024 12:01:02 +0800 Subject: [PATCH 2/4] Fallback 32-bit to expansion --- clang/lib/CodeGen/CGBuiltin.cpp | 8 + .../PowerPC/builtins-ppc-xlcompat-error.c | 6 - llvm/include/llvm/IR/IntrinsicsPowerPC.td | 7 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 111 +++------ llvm/test/CodeGen/PowerPC/rldimi.ll | 212 ------------------ 5 files changed, 36 insertions(+), 308 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index bb007231c0b783..601a8f67d6dbd2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17297,6 +17297,14 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); + if (BuiltinID == PPC::BI__builtin_ppc_rldimi && + !getTarget().getTriple().isPPC64()) { + Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType()); + Op2 = Builder.CreateZExt(Op2, Int64Ty); + Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); + return Builder.CreateOr(Builder.CreateAnd(Shift, Op3), + Builder.CreateAnd(Op1, Builder.CreateNot(Op3))); + } return Builder.CreateCall( CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi ? Intrinsic::ppc_rldimi diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c index 272e0222dc9e41..f7f357df62af16 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c @@ -24,7 +24,6 @@ void test_trap(void) { __tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}} } -#ifdef __PPC64__ void test_builtin_ppc_rldimi() { unsigned int shift; unsigned long long mask; @@ -33,7 +32,6 @@ void test_builtin_ppc_rldimi() { res = __builtin_ppc_rldimi(ull, ull, 63, 0xFFFF000000000F00); // expected-error {{argument 3 value should represent a contiguous bit field}} res = __builtin_ppc_rldimi(ull, ull, 64, 0xFFFF000000000000); // expected-error {{argument value 64 is outside the valid range [0, 63]}} } -#endif void test_builtin_ppc_rlwimi() { unsigned int shift; @@ -86,10 +84,6 @@ void testalignx(const void *pointer, unsigned int alignment) { } #ifndef __PPC64__ -unsigned long long testrldimi32() { - return __rldimi(ull, ui, 3, 0x7ffff8ULL); //expected-error {{this builtin is only available on 64-bit targets}} -} - long long testbpermd(long long bit_selector, long long source) { return __bpermd(bit_selector, source); //expected-error {{this builtin is only available on 64-bit targets}} } diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index ee9a04241ac2ec..aff1fc7f085c43 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -182,10 +182,6 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_fctuwz : ClangBuiltin<"__builtin_ppc_fctuwz">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_ppc_rldimi - : ClangBuiltin<"__builtin_ppc_rldimi">, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], - [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; def int_ppc_rlwimi : ClangBuiltin<"__builtin_ppc_rlwimi">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], @@ -194,6 +190,9 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". : ClangBuiltin<"__builtin_ppc_rlwnm">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_ppc_rldimi + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; // XL compatible select functions // TODO: Add llvm_f128_ty support. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 3281a0dfd08729..36d892e3c96365 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10758,88 +10758,6 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, return true; } -static SDValue getRotateInsert32(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, - SDValue Src, unsigned SH, unsigned MB, - unsigned ME) { - assert(SH < 32 && MB < 32 && ME < 32 && - "Invalid argument for rotate insert!"); - return SDValue( - DAG.getMachineNode(PPC::RLWIMI, Loc, MVT::i32, - {Dst, Src, DAG.getTargetConstant(SH, Loc, MVT::i32), - DAG.getTargetConstant(MB, Loc, MVT::i32), - DAG.getTargetConstant(ME, Loc, MVT::i32)}), - 0); -} - -static SDValue getRotateInsert64(SelectionDAG &DAG, SDLoc Loc, SDValue Dst, - SDValue Src, unsigned SH, unsigned MB, - unsigned ME, bool IsPPC64) { - assert(SH < 64 && MB < 64 && ME < 64 && - "Invalid argument for rotate insert!"); - if (IsPPC64) { - // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. - if (ME < 63 - SH) { - Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, - DAG.getConstant(ME + SH + 1, Loc, MVT::i32)); - } else if (ME > 63 - SH) { - Src = DAG.getNode(ISD::ROTL, Loc, MVT::i64, Src, - DAG.getConstant(ME + SH - 63, Loc, MVT::i32)); - } - return SDValue(DAG.getMachineNode( - PPC::RLDIMI, Loc, MVT::i64, - {Dst, Src, DAG.getTargetConstant(63 - ME, Loc, MVT::i32), - DAG.getTargetConstant(MB, Loc, MVT::i32)}), - 0); - } - - // To implement rldimi(Dst, Src) on 32-bit target, four parts are needed. SH - // is adjusted to simplify cases. Invalid ranges will be skipped. - // - SrcHi inserted into DstHi with [0, 32-SH) - // - SrcLo inserted into DstHi with [32-SH, 32) - // - SrcHi inserted into DstLo with [32, 64-SH) - // - SrcLo inserted into DstLo with [64-SH, 64) - auto [SrcLo, SrcHi] = DAG.SplitScalar(Src, Loc, MVT::i32, MVT::i32); - auto [DstLo, DstHi] = DAG.SplitScalar(Dst, Loc, MVT::i32, MVT::i32); - if (SH >= 32) { - SH -= 32; - std::swap(SrcLo, SrcHi); - } - auto GetSubInsert = [&DAG, &Loc, SH](unsigned Left, unsigned Right, - SDValue Src, SDValue Dst, unsigned MB, - unsigned ME) { - if (Left > Right) - return Dst; - - if (MB <= ME) { - if (MB <= Right && ME >= Left) - return getRotateInsert32(DAG, Loc, Dst, Src, SH, - std::max(MB, Left) % 32, - std::min(ME, Right) % 32); - } else { - if (MB < Left || ME > Right) - return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, Right % 32); - - if (MB <= Right && ME < Left) - return getRotateInsert32(DAG, Loc, Dst, Src, SH, MB % 32, Right % 32); - - if (MB <= Right && ME <= Right) - return getRotateInsert32( - DAG, Loc, - getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32), Src, - SH, MB % 32, Right % 32); - - if (MB > Right && ME >= Left && ME <= Right) - return getRotateInsert32(DAG, Loc, Dst, Src, SH, Left % 32, ME % 32); - } - return Dst; - }; - DstHi = GetSubInsert(0, 31 - SH, SrcHi, DstHi, MB, ME); - DstHi = GetSubInsert(32 - SH, 31, SrcLo, DstHi, MB, ME); - DstLo = GetSubInsert(32, 63 - SH, SrcLo, DstLo, MB, ME); - DstLo = GetSubInsert(64 - SH, 63, SrcHi, DstLo, MB, ME); - return DAG.getNode(ISD::BUILD_PAIR, Loc, MVT::i64, DstLo, DstHi); -} - /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, @@ -10856,18 +10774,33 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getRegister(PPC::R2, MVT::i32); case Intrinsic::ppc_rldimi: { + assert(Subtarget.isPPC64() && "rldimi intrinsic is 64-bit only!"); SDValue Src = Op.getOperand(1); APInt Mask = Op.getConstantOperandAPInt(4); if (Mask.isZero()) return Op.getOperand(2); if (Mask.isAllOnes()) return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3)); + uint64_t SH = Op.getConstantOperandVal(3); unsigned MB = 0, ME = 0; if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME)) report_fatal_error("invalid rldimi mask!"); - return getRotateInsert64(DAG, dl, Op.getOperand(2), Op.getOperand(1), SH, - MB, ME, Subtarget.isPPC64()); + + // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. + if (ME < 63 - SH) { + Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, + DAG.getConstant(ME + SH + 1, dl, MVT::i32)); + } else if (ME > 63 - SH) { + Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, + DAG.getConstant(ME + SH - 63, dl, MVT::i32)); + } + return SDValue( + DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64, + {Op.getOperand(2), Src, + DAG.getTargetConstant(63 - ME, dl, MVT::i32), + DAG.getTargetConstant(MB, dl, MVT::i32)}), + 0); } case Intrinsic::ppc_rlwimi: { @@ -10880,8 +10813,14 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, unsigned MB = 0, ME = 0; if (!isRunOfOnes(Mask.getZExtValue(), MB, ME)) report_fatal_error("invalid rlwimi mask!"); - return getRotateInsert32(DAG, dl, Op.getOperand(2), Op.getOperand(1), - Op.getConstantOperandVal(3), MB, ME); + return SDValue( + DAG.getMachineNode( + PPC::RLWIMI, dl, MVT::i32, + {Op.getOperand(2), Op.getOperand(1), + DAG.getTargetConstant(Op.getConstantOperandVal(3), dl, MVT::i32), + DAG.getTargetConstant(MB, dl, MVT::i32), + DAG.getTargetConstant(ME, dl, MVT::i32)}), + 0); } case Intrinsic::ppc_rlwnm: { diff --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll index 7495c5d66dbbb3..4ce015849d9ea3 100644 --- a/llvm/test/CodeGen/PowerPC/rldimi.ll +++ b/llvm/test/CodeGen/PowerPC/rldimi.ll @@ -1,21 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix -mcpu=pwr8 | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-ibm-aix -mcpu=pwr8 | FileCheck %s --check-prefix=32BIT define i64 @rldimi1(i64 %a) { ; CHECK-LABEL: rldimi1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: rldimi 3, 3, 8, 0 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi1: -; 32BIT: # %bb.0: # %entry -; 32BIT-NEXT: rotlwi 5, 4, 8 -; 32BIT-NEXT: rlwimi 4, 4, 8, 0, 23 -; 32BIT-NEXT: rlwimi 5, 3, 8, 0, 23 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: blr entry: %x0 = shl i64 %a, 8 %x1 = and i64 %a, 255 @@ -32,18 +23,6 @@ define i64 @rldimi2(i64 %a) { ; CHECK-NEXT: rldimi 4, 3, 24, 0 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi2: -; 32BIT: # %bb.0: # %entry -; 32BIT-NEXT: mr 6, 4 -; 32BIT-NEXT: rotlwi 5, 4, 24 -; 32BIT-NEXT: rlwimi 6, 4, 8, 16, 23 -; 32BIT-NEXT: rlwimi 5, 3, 24, 0, 7 -; 32BIT-NEXT: rlwimi 6, 4, 16, 8, 15 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: rlwimi 6, 4, 24, 0, 7 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr entry: %x0 = shl i64 %a, 8 %x1 = and i64 %a, 255 @@ -67,15 +46,6 @@ define i64 @rldimi3(i64 %a) { ; CHECK-NEXT: rldimi 4, 3, 56, 0 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi3: -; 32BIT: # %bb.0: # %entry -; 32BIT-NEXT: mr 3, 4 -; 32BIT-NEXT: rlwimi 3, 4, 8, 16, 23 -; 32BIT-NEXT: rlwimi 3, 4, 16, 8, 15 -; 32BIT-NEXT: rlwimi 3, 4, 24, 0, 7 -; 32BIT-NEXT: mr 4, 3 -; 32BIT-NEXT: blr entry: %0 = shl i64 %a, 8 %1 = and i64 %a, 255 @@ -96,17 +66,6 @@ define i64 @rldimi4(i64 %a) { ; CHECK-NEXT: rldimi 3, 3, 16, 0 ; CHECK-NEXT: rldimi 3, 3, 32, 0 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi4: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 3, 3, 8, 0, 23 -; 32BIT-NEXT: rlwimi 3, 4, 8, 24, 31 -; 32BIT-NEXT: rlwimi 4, 4, 8, 0, 23 -; 32BIT-NEXT: rlwimi 3, 3, 16, 0, 15 -; 32BIT-NEXT: rlwimi 3, 4, 16, 16, 31 -; 32BIT-NEXT: rlwimi 4, 4, 16, 0, 15 -; 32BIT-NEXT: rlwimi 3, 4, 0, 0, 31 -; 32BIT-NEXT: blr %r1 = call i64 @llvm.ppc.rldimi(i64 %a, i64 %a, i32 8, i64 -256) %r2 = call i64 @llvm.ppc.rldimi(i64 %r1, i64 %r1, i32 16, i64 -65536) %r3 = call i64 @llvm.ppc.rldimi(i64 %r2, i64 %r2, i32 32, i64 -4294967296) @@ -119,13 +78,6 @@ define i64 @rldimi5(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 8, 40 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi5: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 6, 4, 8, 8, 23 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 16776960) ; 0xffff << 8 ret i64 %r } @@ -137,14 +89,6 @@ define i64 @rldimi6(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 7, 41 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi6: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 6, 4, 8, 9, 23 -; 32BIT-NEXT: rlwimi 6, 3, 8, 24, 24 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 8388480) ; 0xffff << 7 ret i64 %r } @@ -156,13 +100,6 @@ define i64 @rldimi7(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 9, 39 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi7: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 6, 4, 8, 7, 22 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 33553920) ; 0xffff << 9 ret i64 %r } @@ -172,12 +109,6 @@ define i64 @rldimi8(i64 %a, i64 %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi8: -; 32BIT: # %bb.0: -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 0) ret i64 %r } @@ -187,12 +118,6 @@ define i64 @rldimi9(i64 %a, i64 %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi9: -; 32BIT: # %bb.0: -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 63, i64 0) ret i64 %r } @@ -201,10 +126,6 @@ define i64 @rldimi10(i64 %a, i64 %b) { ; CHECK-LABEL: rldimi10: ; CHECK: # %bb.0: ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi10: -; 32BIT: # %bb.0: -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 -1) ret i64 %r } @@ -214,16 +135,6 @@ define i64 @rldimi11(i64 %a, i64 %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: rotldi 3, 3, 8 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi11: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rotlwi 5, 4, 8 -; 32BIT-NEXT: rotlwi 6, 3, 8 -; 32BIT-NEXT: rlwimi 5, 3, 8, 0, 23 -; 32BIT-NEXT: rlwimi 6, 4, 8, 0, 23 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 8, i64 -1) ret i64 %r } @@ -235,14 +146,6 @@ define i64 @rldimi12(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 44, 31 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi12: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 6, 4, 0, 0, 31 -; 32BIT-NEXT: rlwimi 5, 3, 0, 0, 19 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: rlwimi 3, 5, 0, 0, 30 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 18446726490113441791) ret i64 %r } @@ -254,13 +157,6 @@ define i64 @rldimi13(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 32, 2 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi13: -; 32BIT: # %bb.0: -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: rlwimi 3, 4, 30, 2, 31 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 30, i64 4611686014132420608) ret i64 %r } @@ -272,13 +168,6 @@ define i64 @rldimi14(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 53, 0 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi14: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874454810624) ; mb=0, me=10 ret i64 %r } @@ -290,14 +179,6 @@ define i64 @rldimi15(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 40, 10 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi15: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 5, 3, 12, 10, 19 -; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 23 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18013298997854208) ; mb=10, me=23 ret i64 %r } @@ -309,15 +190,6 @@ define i64 @rldimi16(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 19, 10 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi16: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 5, 3, 12, 10, 19 -; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 12 -; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 31 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18014398508957696) ; mb=10, me=44 ret i64 %r } @@ -329,13 +201,6 @@ define i64 @rldimi17(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 33, 25 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi17: -; 32BIT: # %bb.0: -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: rlwimi 3, 4, 12, 25, 30 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 541165879296) ; mb=25, me=30 ret i64 %r } @@ -347,14 +212,6 @@ define i64 @rldimi18(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 19, 25 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi18: -; 32BIT: # %bb.0: -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 12 -; 32BIT-NEXT: rlwimi 3, 4, 12, 25, 31 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 549755289600) ; mb=25, me=44 ret i64 %r } @@ -366,13 +223,6 @@ define i64 @rldimi19(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 19, 33 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi19: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 6, 4, 12, 1, 12 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 2146959360) ; mb=33, me=44 ret i64 %r } @@ -384,17 +234,6 @@ define i64 @rldimi20(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 53, 15 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi20: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10 -; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 19 -; 32BIT-NEXT: rlwimi 5, 3, 12, 15, 19 -; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 -; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 31 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18438299824408231935) ; mb=15, me=10 ret i64 %r } @@ -406,16 +245,6 @@ define i64 @rldimi21(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 53, 25 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi21: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10 -; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 19 -; 32BIT-NEXT: rlwimi 5, 4, 12, 25, 31 -; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437737424210624511) ; mb=25, me=10 ret i64 %r } @@ -427,17 +256,6 @@ define i64 @rldimi22(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 42, 25 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi22: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 19 -; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 19 -; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 21 -; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 -; 32BIT-NEXT: rlwimi 5, 4, 12, 25, 31 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446740225418854399) ; mb=25, me=21 ret i64 %r } @@ -449,15 +267,6 @@ define i64 @rldimi23(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 53, 44 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi23: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 6, 4, 12, 12, 19 -; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 10 -; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874455859199) ; mb=44, me=10 ret i64 %r } @@ -469,16 +278,6 @@ define i64 @rldimi24(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 38, 44 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi24: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 19 -; 32BIT-NEXT: rlwimi 6, 4, 12, 12, 19 -; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 25 -; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446743798832693247) ; mb=44, me=25 ret i64 %r } @@ -490,17 +289,6 @@ define i64 @rldimi25(i64 %a, i64 %b) { ; CHECK-NEXT: rldimi 4, 3, 28, 44 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr -; -; 32BIT-LABEL: rldimi25: -; 32BIT: # %bb.0: -; 32BIT-NEXT: rlwimi 6, 4, 12, 0, 3 -; 32BIT-NEXT: rlwimi 5, 3, 12, 0, 19 -; 32BIT-NEXT: rlwimi 6, 4, 12, 12, 19 -; 32BIT-NEXT: rlwimi 5, 4, 12, 20, 31 -; 32BIT-NEXT: rlwimi 6, 3, 12, 20, 31 -; 32BIT-NEXT: mr 3, 5 -; 32BIT-NEXT: mr 4, 6 -; 32BIT-NEXT: blr %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446744073442164735) ; mb=44, me=35 ret i64 %r } >From 343005c2539729291497ce07f8a0b8bbb122439e Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Tue, 2 Apr 2024 12:03:38 +0800 Subject: [PATCH 3/4] Fix format --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 36d892e3c96365..24af768596cf77 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10781,12 +10781,10 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return Op.getOperand(2); if (Mask.isAllOnes()) return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3)); - uint64_t SH = Op.getConstantOperandVal(3); unsigned MB = 0, ME = 0; if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME)) report_fatal_error("invalid rldimi mask!"); - // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. if (ME < 63 - SH) { Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, @@ -10813,14 +10811,12 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, unsigned MB = 0, ME = 0; if (!isRunOfOnes(Mask.getZExtValue(), MB, ME)) report_fatal_error("invalid rlwimi mask!"); - return SDValue( - DAG.getMachineNode( - PPC::RLWIMI, dl, MVT::i32, - {Op.getOperand(2), Op.getOperand(1), - DAG.getTargetConstant(Op.getConstantOperandVal(3), dl, MVT::i32), - DAG.getTargetConstant(MB, dl, MVT::i32), - DAG.getTargetConstant(ME, dl, MVT::i32)}), - 0); + return SDValue(DAG.getMachineNode( + PPC::RLWIMI, dl, MVT::i32, + {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3), + DAG.getTargetConstant(MB, dl, MVT::i32), + DAG.getTargetConstant(ME, dl, MVT::i32)}), + 0); } case Intrinsic::ppc_rlwnm: { >From 324dc374bf477de71ad9626ad2659b7a4302c7ee Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Tue, 2 Apr 2024 13:17:11 +0800 Subject: [PATCH 4/4] Remove unnecessary change --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 24af768596cf77..7436b202fba0d9 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -643,7 +643,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); @@ -10774,7 +10773,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getRegister(PPC::R2, MVT::i32); case Intrinsic::ppc_rldimi: { - assert(Subtarget.isPPC64() && "rldimi intrinsic is 64-bit only!"); + assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!"); SDValue Src = Op.getOperand(1); APInt Mask = Op.getConstantOperandAPInt(4); if (Mask.isZero()) @@ -11834,7 +11833,6 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, case Intrinsic::ppc_maxfe: case Intrinsic::ppc_minfe: case Intrinsic::ppc_fnmsub: - case Intrinsic::ppc_rldimi: case Intrinsic::ppc_convert_f128_to_ppcf128: Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG)); break; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits