https://github.com/maryammo created https://github.com/llvm/llvm-project/pull/178061
This commit adds support for lwat/ldat atomic operations with function code 16 (Compare and Swap Not Equal) via 4 clang builtins: __builtin_amo_lwat_csne for 32-bit unsigned operations __builtin_amo_ldat_csne for 64-bit unsigned operations __builtin_amo_lwat_csne_s for 32-bit signed operations __builtin_amo_ldat_csne_s for 64-bit signed operations >From c06a757218c74320010902c07d601373bc05373b Mon Sep 17 00:00:00 2001 From: Maryam Moghadas <[email protected]> Date: Mon, 26 Jan 2026 21:55:52 +0000 Subject: [PATCH] Add AMO load with Compare and Swap Not Equal This commit adds support for lwat/ldat atomic operations with function code 16 (Compare and Swap Not Equal) via 4 clang builtins: __builtin_amo_lwat_csne for 32-bit unsigned operations __builtin_amo_ldat_csne for 64-bit unsigned operations __builtin_amo_lwat_csne_s for 32-bit signed operations __builtin_amo_ldat_csne_s for 64-bit signed operations --- clang/include/clang/Basic/BuiltinsPPC.def | 4 + clang/lib/CodeGen/TargetBuiltins/PPC.cpp | 14 +++ clang/lib/Sema/SemaPPC.cpp | 4 + clang/test/CodeGen/PowerPC/builtins-amo-err.c | 16 ++++ clang/test/CodeGen/PowerPC/builtins-ppc-amo.c | 76 ++++++++++++++++ llvm/include/llvm/IR/IntrinsicsPowerPC.td | 8 ++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 21 +++++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 44 ++++++++- llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 + llvm/test/CodeGen/PowerPC/amo-enable.ll | 90 +++++++++++++++++++ 10 files changed, 277 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index f518429136e3c..4fcf03307c63d 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -1014,6 +1014,10 @@ TARGET_BUILTIN(__builtin_amo_stwat, "vUi*UiIi", "", "isa-v30-instructions") TARGET_BUILTIN(__builtin_amo_stdat, "vULi*ULiIi", "", "isa-v30-instructions") TARGET_BUILTIN(__builtin_amo_stwat_s, "vSi*SiIi", "", "isa-v30-instructions") TARGET_BUILTIN(__builtin_amo_stdat_s, "vSLi*SLiIi", "", "isa-v30-instructions") +TARGET_BUILTIN(__builtin_amo_lwat_csne, "UiUi*UiUi", "", "isa-v30-instructions") +TARGET_BUILTIN(__builtin_amo_ldat_csne, "ULiULi*ULiULi", "", "isa-v30-instructions") +TARGET_BUILTIN(__builtin_amo_lwat_csne_s, "SiSi*SiSi", "", "isa-v30-instructions") +TARGET_BUILTIN(__builtin_amo_ldat_csne_s, "SLiSLi*SLiSLi", "", "isa-v30-instructions") // Set the floating point rounding mode diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp index 6568959351a5d..cca24b981679a 100644 --- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp @@ -1386,6 +1386,20 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_amo_ldat_cond), {Op0, Op1}); } + case PPC::BI__builtin_amo_lwat_csne_s: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_amo_lwat_csne), + {Op0, Op1, Op2}); + } + case PPC::BI__builtin_amo_ldat_csne_s: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_amo_ldat_csne), + {Op0, Op1, Op2}); + } case PPC::BI__builtin_amo_stwat_s: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 1bf806d996887..7cc8322b9ebaa 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -99,6 +99,10 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_amo_stdat: case PPC::BI__builtin_amo_stwat_s: case PPC::BI__builtin_amo_stdat_s: + case PPC::BI__builtin_amo_lwat_csne: + case PPC::BI__builtin_amo_ldat_csne: + case PPC::BI__builtin_amo_lwat_csne_s: + case PPC::BI__builtin_amo_ldat_csne_s: return true; } return false; diff --git a/clang/test/CodeGen/PowerPC/builtins-amo-err.c b/clang/test/CodeGen/PowerPC/builtins-amo-err.c index f99efd0505818..02eec74b789f3 100644 --- a/clang/test/CodeGen/PowerPC/builtins-amo-err.c +++ b/clang/test/CodeGen/PowerPC/builtins-amo-err.c @@ -75,4 +75,20 @@ void test_amo() { __builtin_amo_stdat_s(ptr12, value12, 24); // FC-ERROR: error: argument value 6 is outside the valid range [0, 5, 7, 24] __builtin_amo_stdat_s(ptr12, value12, 6); + + unsigned int *ptr13, value13, value14; + // AIX32-ERROR: error: this builtin is only available on 64-bit targets + __builtin_amo_lwat_csne(ptr13, value12, value14); + + unsigned long int *ptr14, value15, value16; + // AIX32-ERROR: error: this builtin is only available on 64-bit targets + __builtin_amo_ldat_csne(ptr14, value15, value16); + + signed int *ptr15, value17, value18; + // AIX32-ERROR: error: this builtin is only available on 64-bit targets + __builtin_amo_lwat_csne_s(ptr15, value17, value18); + + signed long int *ptr16, value19, value20; + // AIX32-ERROR: error: this builtin is only available on 64-bit targets + __builtin_amo_ldat_csne_s(ptr16, value19, value20); } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c b/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c index ffbedd8c50202..32b25de059936 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c @@ -219,6 +219,82 @@ void test_signed_stwat(int *ptr, int value, int * resp) { void test_signed_stdat(long int *ptr, long int value, long int * resp) { __builtin_amo_stdat_s(ptr, value, 5); } + +// CHECK-LABEL: define dso_local void @test_unsigned_lwat_csne( +// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VAL1:%.*]], i32 noundef zeroext [[VAL2:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat.csne(ptr [[PTR]], i32 [[VAL1]], i32 [[VAL2]]) +// CHECK-NEXT: store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_unsigned_lwat_csne( +// AIX-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VAL1:%.*]], i32 noundef zeroext [[VAL2:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat.csne(ptr [[PTR]], i32 [[VAL1]], i32 [[VAL2]]) +// AIX-NEXT: store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2]] +// AIX-NEXT: ret void +// +void test_unsigned_lwat_csne(unsigned int *ptr, unsigned int val1, unsigned int val2, unsigned int * resp) { + unsigned int res = __builtin_amo_lwat_csne(ptr, val1, val2); + *resp = res; +} + +// CHECK-LABEL: define dso_local void @test_unsigned_ldat_csne( +// CHECK-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VAL1:%.*]], i64 noundef [[VAL2:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat.csne(ptr [[PTR]], i64 [[VAL1]], i64 [[VAL2]]) +// CHECK-NEXT: store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_unsigned_ldat_csne( +// AIX-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VAL1:%.*]], i64 noundef [[VAL2:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat.csne(ptr [[PTR]], i64 [[VAL1]], i64 [[VAL2]]) +// AIX-NEXT: store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6]] +// AIX-NEXT: ret void +// +void test_unsigned_ldat_csne(unsigned long int *ptr, unsigned long int val1, unsigned long int val2, unsigned long int * resp) { + unsigned long int res = __builtin_amo_ldat_csne(ptr, val1, val2); + *resp = res; +} + +// CHECK-LABEL: define dso_local void @test_unsigned_lwat_csne_s( +// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef signext [[VAL1:%.*]], i32 noundef signext [[VAL2:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat.csne(ptr [[PTR]], i32 [[VAL1]], i32 [[VAL2]]) +// CHECK-NEXT: store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_unsigned_lwat_csne_s( +// AIX-SAME: ptr noundef [[PTR:%.*]], i32 noundef signext [[VAL1:%.*]], i32 noundef signext [[VAL2:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat.csne(ptr [[PTR]], i32 [[VAL1]], i32 [[VAL2]]) +// AIX-NEXT: store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2]] +// AIX-NEXT: ret void +// +void test_unsigned_lwat_csne_s(signed int *ptr, signed int val1, signed int val2, signed int * resp) { + signed int res = __builtin_amo_lwat_csne_s(ptr, val1, val2); + *resp = res; +} + +// CHECK-LABEL: define dso_local void @test_unsigned_ldat_csne_s( +// CHECK-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VAL1:%.*]], i64 noundef [[VAL2:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat.csne(ptr [[PTR]], i64 [[VAL1]], i64 [[VAL2]]) +// CHECK-NEXT: store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6]] +// CHECK-NEXT: ret void +// +// AIX-LABEL: define void @test_unsigned_ldat_csne_s( +// AIX-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VAL1:%.*]], i64 noundef [[VAL2:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AIX-NEXT: [[ENTRY:.*:]] +// AIX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat.csne(ptr [[PTR]], i64 [[VAL1]], i64 [[VAL2]]) +// AIX-NEXT: store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6]] +// AIX-NEXT: ret void +// +void test_unsigned_ldat_csne_s(signed long int *ptr, signed long int val1, signed long int val2, signed long int * resp) { + signed long int res = __builtin_amo_ldat_csne_s(ptr, val1, val2); + *resp = res; +} //. // CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} // CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index ce27e9163560b..33d9d73bafa7e 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -2167,4 +2167,12 @@ let TargetPrefix = "ppc" in { DefaultAttrsIntrinsic<[],[llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], [IntrArgMemOnly, ImmArg<ArgIndex<2>>]>; + def int_ppc_amo_lwat_csne : ClangBuiltin<"__builtin_amo_lwat_csne">, + DefaultAttrsIntrinsic<[llvm_i32_ty],[llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], + [IntrArgMemOnly]>; + def int_ppc_amo_ldat_csne : ClangBuiltin<"__builtin_amo_ldat_csne">, + DefaultAttrsIntrinsic<[llvm_i64_ty],[llvm_ptr_ty, + llvm_i64_ty, llvm_i64_ty], + [IntrArgMemOnly]>; } diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 718d51c5a0673..4d79c0946014f 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1674,6 +1674,27 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::EnforceIEIO)); return; } + case PPC::BL8: + case PPC::BL8_NOP: { + const MachineOperand &MO = MI->getOperand(0); + if (MO.isSymbol()) { + StringRef Name = MO.getSymbolName(); + Name.consume_front("."); + Name.consume_back("[PR]"); + bool IsLWAT = Name == "__lwat_csne_dummy"; + bool IsLDAT = Name == "__ldat_csne_dummy"; + if (IsLWAT || IsLDAT) { + EmitToStreamer(*OutStreamer, + MCInstBuilder(IsLWAT ? PPC::LWAT : PPC::LDAT) + .addReg(PPC::X3) + .addReg(PPC::X3) + .addReg(PPC::X6) + .addImm(16)); + return; + } + } + break; + } } LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 641b7804097f8..bae37b71922b7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11480,6 +11480,48 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return Flags; } +SDValue PPCTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntrinsicID = Op.getConstantOperandVal(1); + SDLoc dl(Op); + switch (IntrinsicID) { + case Intrinsic::ppc_amo_lwat_csne: + case Intrinsic::ppc_amo_ldat_csne: + SDValue Chain = Op.getOperand(0); + SDValue Ptr = Op.getOperand(2); + SDValue CmpVal = Op.getOperand(3); + SDValue NewVal = Op.getOperand(4); + + EVT VT = IntrinsicID == Intrinsic::ppc_amo_ldat_csne ? MVT::i64 : MVT::i32; + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + Args.emplace_back(DAG.getUNDEF(VT), Ty); + Args.emplace_back(CmpVal, Ty); + Args.emplace_back(NewVal, Ty); + Args.emplace_back(Ptr, IntPtrTy); + + // Lower to dummy call to use ABI for consecutive register allocation. + // Places return value, compare value, and new value in X3/X4/X5 as required + // by lwat/ldat FC=16, avoiding a new register class for 3 adjacent + // registers. + const char *SymName = IntrinsicID == Intrinsic::ppc_amo_ldat_csne + ? "__ldat_csne_dummy" + : "__lwat_csne_dummy"; + SDValue Callee = + DAG.getExternalSymbol(SymName, getPointerTy(DAG.getDataLayout())); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(CallingConv::C, Ty, Callee, + std::move(Args)); + + auto Result = LowerCallTo(CLI); + return DAG.getMergeValues({Result.first, Result.second}, dl); + } + return SDValue(); +} + SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to @@ -12689,7 +12731,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: - return SDValue(); + return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::BITCAST: return LowerBITCAST(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 52e79469c78da..0279f7d994a56 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -736,6 +736,7 @@ namespace llvm { EVT VT, SDValue V1, SDValue V2) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/PowerPC/amo-enable.ll b/llvm/test/CodeGen/PowerPC/amo-enable.ll index 549f262022ab1..1f7655d80ac5e 100644 --- a/llvm/test/CodeGen/PowerPC/amo-enable.ll +++ b/llvm/test/CodeGen/PowerPC/amo-enable.ll @@ -112,9 +112,99 @@ entry: ret void } +define void @test_lwat_csne(ptr noundef %ptr, i32 noundef %value1, i32 noundef %value2, ptr nocapture %resp) { +; CHECK-LABEL: test_lwat_csne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r6 +; CHECK-NEXT: mr r6, r3 +; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: clrldi r5, r5, 32 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: lwat r3, r6, 16 +; CHECK-NEXT: stw r3, 0(r30) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_lwat_csne: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mflr r0 +; CHECK-BE-NEXT: stdu r1, -128(r1) +; CHECK-BE-NEXT: std r0, 144(r1) +; CHECK-BE-NEXT: std r31, 120(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: mr r31, r6 +; CHECK-BE-NEXT: mr r6, r3 +; CHECK-BE-NEXT: li r3, 0 +; CHECK-BE-NEXT: clrldi r4, r4, 32 +; CHECK-BE-NEXT: clrldi r5, r5, 32 +; CHECK-BE-NEXT: lwat r3, r6, 16 +; CHECK-BE-NEXT: stw r3, 0(r31) +; CHECK-BE-NEXT: ld r31, 120(r1) # 8-byte Folded Reload +; CHECK-BE-NEXT: addi r1, r1, 128 +; CHECK-BE-NEXT: ld r0, 16(r1) +; CHECK-BE-NEXT: mtlr r0 +; CHECK-BE-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.amo.lwat.csne(ptr %ptr, i32 %value1, i32 %value2) + store i32 %0, ptr %resp, align 4 + ret void +} + +define void @test_ldat_csne(ptr noundef %ptr, i64 noundef %value1, i64 noundef %value2, ptr nocapture %resp) { +; CHECK-LABEL: test_ldat_csne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r6 +; CHECK-NEXT: mr r6, r3 +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: ldat r3, r6, 16 +; CHECK-NEXT: std r3, 0(r30) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_ldat_csne: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mflr r0 +; CHECK-BE-NEXT: stdu r1, -128(r1) +; CHECK-BE-NEXT: std r0, 144(r1) +; CHECK-BE-NEXT: std r31, 120(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: mr r31, r6 +; CHECK-BE-NEXT: mr r6, r3 +; CHECK-BE-NEXT: ldat r3, r6, 16 +; CHECK-BE-NEXT: std r3, 0(r31) +; CHECK-BE-NEXT: ld r31, 120(r1) # 8-byte Folded Reload +; CHECK-BE-NEXT: addi r1, r1, 128 +; CHECK-BE-NEXT: ld r0, 16(r1) +; CHECK-BE-NEXT: mtlr r0 +; CHECK-BE-NEXT: blr +entry: + %0 = tail call i64 @llvm.ppc.amo.ldat.csne(ptr %ptr, i64 %value1, i64 %value2) + store i64 %0, ptr %resp, align 8 + ret void +} + declare i64 @llvm.ppc.amo.ldat(ptr, i64, i32 immarg) declare i32 @llvm.ppc.amo.lwat(ptr, i32, i32 immarg) declare i64 @llvm.ppc.amo.ldat.cond(ptr, i32 immarg) declare i32 @llvm.ppc.amo.lwat.cond(ptr, i32 immarg) declare void @llvm.ppc.amo.stwat(ptr, i32, i32 immarg) declare void @llvm.ppc.amo.stdat(ptr, i64, i32 immarg) +declare i64 @llvm.ppc.amo.ldat.csne(ptr, i64, i64) +declare i32 @llvm.ppc.amo.lwat.csne(ptr, i32, i32) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
