llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) <details> <summary>Changes</summary> - Add clang built-ins + sema/codegen - Add IR Intrinsic + verifier - Add DAG/GlobalISel codegen for the intrinsics - Add lowering in SIMemoryLegalizer using a MMO flag. --- Patch is 156.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156418.diff 24 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+10) - (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+3-1) - (modified) clang/include/clang/Sema/SemaAMDGPU.h (+2) - (modified) clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (+43) - (modified) clang/lib/Sema/SemaAMDGPU.cpp (+53) - (added) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl (+104) - (added) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl (+66) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+30) - (modified) llvm/include/llvm/Target/TargetSelectionDAG.td (+13) - (modified) llvm/lib/IR/Verifier.cpp (+22) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (+6) - (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+14) - (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+4) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+75) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+1) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+4) - (modified) llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (+41-3) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll (+521) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-basic.ll (+49) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-singlethread.ll (+479) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-system.ll (+533) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-wavefront.ll (+479) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll (+479) - (added) llvm/test/Verifier/AMDGPU/llvm.amdgcn.cooperative.atomic.ll (+47) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 7b7dbf7043099..0f9c9720a1199 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -831,5 +831,15 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbI TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_f16, "V8fIbV16hIbV32hV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x64_f16, "V8hIbV16hIbV32hV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +// GFX12.5 128B cooperative atomics +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_32x4B, "ii*IicC*", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_32x4B, "vi*iIicC*", "nc", "gfx1250-insts,wavefrontsize32") + +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_16x8B, "V2iV2i*IicC*", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_16x8B, "vV2i*V2iIicC*", "nc", "gfx1250-insts,wavefrontsize32") + +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_8x16B, "V4iV4i*IicC*", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_8x16B, "vV4i*V4iIicC*", "nc", "gfx1250-insts,wavefrontsize32") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 0f3aa9aea215f..3038763bac31b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10692,7 +10692,7 @@ def warn_dangling_reference_captured_by_unknown : Warning< // Diagnostics based on the Lifetime safety analysis. def warn_lifetime_safety_loan_expires_permissive : Warning< - "object whose reference is captured does not live long enough">, + "object whose reference is captured does not live long enough">, InGroup<LifetimeSafetyPermissive>, DefaultIgnore; def warn_lifetime_safety_loan_expires_strict : Warning< "object whose reference is captured may not live long enough">, @@ -13603,4 +13603,6 @@ def warn_acc_var_referenced_lacks_op // AMDGCN builtins diagnostics def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">; def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">; + +def err_amdgcn_coop_atomic_invalid_as : Error<"cooperative atomic requires a global or generic pointer">; } // end of sema component. diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h index d62c9bb65fadb..bac812a9d4fcf 100644 --- a/clang/include/clang/Sema/SemaAMDGPU.h +++ b/clang/include/clang/Sema/SemaAMDGPU.h @@ -26,6 +26,8 @@ class SemaAMDGPU : public SemaBase { bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore); + bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, unsigned NumDataArgs); diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 4c1953e4b8e34..87a46287c4022 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -701,6 +701,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitBuiltinWithOneOverloadedType<5>(*this, E, Intrinsic::amdgcn_load_to_lds); } + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: { + Intrinsic::ID IID; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B: + IID = Intrinsic::amdgcn_cooperative_atomic_load_32x4B; + break; + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B: + IID = Intrinsic::amdgcn_cooperative_atomic_store_32x4B; + break; + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B: + IID = Intrinsic::amdgcn_cooperative_atomic_load_16x8B; + break; + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B: + IID = Intrinsic::amdgcn_cooperative_atomic_store_16x8B; + break; + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B: + IID = Intrinsic::amdgcn_cooperative_atomic_load_8x16B; + break; + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: + IID = Intrinsic::amdgcn_cooperative_atomic_store_8x16B; + break; + } + + LLVMContext &Ctx = CGM.getLLVMContext(); + SmallVector<Value *, 5> Args; + // last argument is a MD string + const unsigned ScopeArg = E->getNumArgs() - 1; + for (unsigned i = 0; i != ScopeArg; ++i) + Args.push_back(EmitScalarExpr(E->getArg(i))); + StringRef Arg = cast<StringLiteral>(E->getArg(ScopeArg)->IgnoreParenCasts()) + ->getString(); + llvm::MDNode *MD = llvm::MDNode::get(Ctx, {llvm::MDString::get(Ctx, Arg)}); + Args.push_back(llvm::MetadataAsValue::get(Ctx, MD)); + // Intrinsic is typed based on the pointer AS. Pointer is always the first + // argument. + llvm::Function *F = CGM.getIntrinsic(IID, {Args[0]->getType()}); + return Builder.CreateCall(F, {Args}); + } case AMDGPU::BI__builtin_amdgcn_get_fpenv: { Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv, {llvm::Type::getInt64Ty(getLLVMContext())}); diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 1913bb830ccd0..baba503239e9f 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/TargetBuiltins.h" #include "clang/Sema/Ownership.h" #include "clang/Sema/Sema.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/AtomicOrdering.h" #include <cstdint> @@ -100,6 +101,14 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6: case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6: return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7); + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B: + return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/false); + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_32x4B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B: + case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B: + return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true); default: return false; } @@ -145,6 +154,50 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, return false; } +bool SemaAMDGPU::checkCoopAtomicFunctionCall(CallExpr *TheCall, bool IsStore) { + bool Fail = false; + + // First argument is a global or generic pointer. + Expr *PtrArg = TheCall->getArg(0); + QualType PtrTy = PtrArg->getType()->getPointeeType(); + unsigned AS = getASTContext().getTargetAddressSpace(PtrTy.getAddressSpace()); + if (AS != llvm::AMDGPUAS::FLAT_ADDRESS && + AS != llvm::AMDGPUAS::GLOBAL_ADDRESS) { + Fail = true; + Diag(TheCall->getBeginLoc(), diag::err_amdgcn_coop_atomic_invalid_as) + << PtrArg->getSourceRange(); + } + + // Check atomic ordering + Expr *AtomicOrdArg = TheCall->getArg(IsStore ? 2 : 1); + Expr::EvalResult AtomicOrdArgRes; + if (!AtomicOrdArg->EvaluateAsInt(AtomicOrdArgRes, getASTContext())) + llvm_unreachable("Intrinsic requires imm for atomic ordering argument!"); + auto Ord = + llvm::AtomicOrderingCABI(AtomicOrdArgRes.Val.getInt().getZExtValue()); + + // Atomic ordering cannot be acq_rel in any case, acquire for stores or + // release for loads. + if (!llvm::isValidAtomicOrderingCABI((unsigned)Ord) || + (Ord == llvm::AtomicOrderingCABI::acq_rel) || + Ord == (IsStore ? llvm::AtomicOrderingCABI::acquire + : llvm::AtomicOrderingCABI::release)) { + return Diag(AtomicOrdArg->getBeginLoc(), + diag::warn_atomic_op_has_invalid_memory_order) + << 0 << AtomicOrdArg->getSourceRange(); + } + + // Last argument is a string literal + Expr *Arg = TheCall->getArg(TheCall->getNumArgs() - 1); + if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts())) { + Fail = true; + Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) + << Arg->getSourceRange(); + } + + return Fail; +} + bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, unsigned NumDataArgs) { assert(NumDataArgs <= 2); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl new file mode 100644 index 0000000000000..8768f2f367654 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-cooperative-atomics.cl @@ -0,0 +1,104 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s + +typedef int v2i __attribute__((ext_vector_type(2))); +typedef int v4i __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B( +// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[VAL]], i32 0, metadata [[META4:![0-9]+]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_cooperative_atomic_store_32x4B(global int* gaddr, int val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent"); +} + +// CHECK-LABEL: define dso_local i32 @test_amdgcn_cooperative_atomic_load_32x4B( +// CHECK-SAME: ptr noundef readonly captures(none) [[ADDR:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr [[ADDR]], i32 0, metadata [[META5:![0-9]+]]) +// CHECK-NEXT: ret i32 [[TMP0]] +// +int test_amdgcn_cooperative_atomic_load_32x4B(int* addr) +{ + return __builtin_amdgcn_cooperative_atomic_load_32x4B(addr, __ATOMIC_RELAXED, ""); +} + +// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_16x8B( +// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], <2 x i32> noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p1(ptr addrspace(1) [[GADDR]], <2 x i32> [[VAL]], i32 0, metadata [[META5]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_cooperative_atomic_store_16x8B(global v2i* gaddr, v2i val) +{ + __builtin_amdgcn_cooperative_atomic_store_16x8B(gaddr, val, __ATOMIC_RELAXED, ""); +} + +// CHECK-LABEL: define dso_local <2 x i32> @test_amdgcn_cooperative_atomic_load_16x8B( +// CHECK-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p1(ptr addrspace(1) [[GADDR]], i32 0, metadata [[META6:![0-9]+]]) +// CHECK-NEXT: ret <2 x i32> [[TMP0]] +// +v2i test_amdgcn_cooperative_atomic_load_16x8B(global v2i* gaddr) +{ + return __builtin_amdgcn_cooperative_atomic_load_16x8B(gaddr, __ATOMIC_RELAXED, "workgroup"); +} + +// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_8x16B( +// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], <4 x i32> noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p1(ptr addrspace(1) [[GADDR]], <4 x i32> [[VAL]], i32 0, metadata [[META7:![0-9]+]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_cooperative_atomic_store_8x16B(global v4i* gaddr, v4i val) +{ + __builtin_amdgcn_cooperative_atomic_store_8x16B(gaddr, val, __ATOMIC_RELAXED, "singlethread"); +} + +// CHECK-LABEL: define dso_local <4 x i32> @test_amdgcn_cooperative_atomic_load_8x16B( +// CHECK-SAME: ptr addrspace(1) noundef readonly captures(none) [[GADDR:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p1(ptr addrspace(1) [[GADDR]], i32 0, metadata [[META4]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +v4i test_amdgcn_cooperative_atomic_load_8x16B(global v4i* gaddr) +{ + return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELAXED, "agent"); +} + +// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B_truncated( +// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i64 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[VAL]] to i32 +// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[CONV]], i32 0, metadata [[META4]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_cooperative_atomic_store_32x4B_truncated(global int* gaddr, long val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent"); +} + +// CHECK-LABEL: define dso_local void @test_amdgcn_cooperative_atomic_store_32x4B_extended( +// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) [[GADDR:%.*]], i8 noundef signext [[VAL:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[VAL]] to i32 +// CHECK-NEXT: tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p1(ptr addrspace(1) [[GADDR]], i32 [[CONV]], i32 0, metadata [[META4]]) +// CHECK-NEXT: ret void +// +void test_amdgcn_cooperative_atomic_store_32x4B_extended(global int* gaddr, char val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, "agent"); +} + +//. +// CHECK: [[META4]] = !{!"agent"} +// CHECK: [[META5]] = !{!""} +// CHECK: [[META6]] = !{!"workgroup"} +// CHECK: [[META7]] = !{!"singlethread"} +//. diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl new file mode 100644 index 0000000000000..0ab9a5a43e718 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-cooperative-atomics.cl @@ -0,0 +1,66 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s + +typedef int v2i __attribute__((ext_vector_type(2))); +typedef int v4i __attribute__((ext_vector_type(4))); + +void test_amdgcn_cooperative_atomic_store_32x4B(global int* gaddr, int val, const char* syncscope) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +int test_amdgcn_cooperative_atomic_load_32x4B(global int* gaddr, const char* syncscope) +{ + return __builtin_amdgcn_cooperative_atomic_load_32x4B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +void test_amdgcn_cooperative_atomic_store_16x8B(global v2i* gaddr, v2i val, const char* syncscope) +{ + __builtin_amdgcn_cooperative_atomic_store_16x8B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +v2i test_amdgcn_cooperative_atomic_load_16x8B(global v2i* gaddr, const char* syncscope) +{ + return __builtin_amdgcn_cooperative_atomic_load_16x8B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +void test_amdgcn_cooperative_atomic_store_8x16B(global v4i* gaddr, v4i val, const char* syncscope) +{ + __builtin_amdgcn_cooperative_atomic_store_8x16B(gaddr, val, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +v4i test_amdgcn_cooperative_atomic_load_8x16B(global v4i* gaddr, const char* syncscope) +{ + return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELAXED, syncscope); // expected-error {{expression is not a string literal}} +} + +v4i test_amdgcn_cooperative_atomic_load_8x16B_release(global v4i* gaddr) +{ + return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_RELEASE, ""); // expected-warning {{memory order argument to atomic operation is invalid}} +} + +v4i test_amdgcn_cooperative_atomic_load_8x16B_acq_rel(global v4i* gaddr) +{ + return __builtin_amdgcn_cooperative_atomic_load_8x16B(gaddr, __ATOMIC_ACQ_REL, ""); // expected-warning {{memory order argument to atomic operation is invalid}} +} + +void test_amdgcn_cooperative_atomic_store_32x4B__sharedptr(local int* addr, int val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_RELAXED, ""); // expected-error {{cooperative atomic requires a global or generic pointer}} +} + +void test_amdgcn_cooperative_atomic_store_32x4B__ordering_not_imm(local int* addr, int ord, int val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, ord, ""); // expected-error {{argument to '__builtin_amdgcn_cooperative_atomic_store_32x4B' must be a constant integer}} +} + +void test_amdgcn_cooperative_atomic_store_32x4B__acquire(int* addr, int ord, int val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_ACQUIRE, ""); // expected-warning {{memory order argument to atomic operation is invalid}} +} + +void test_amdgcn_cooperative_atomic_store_32x4B__acq_rel(int* addr, int ord, int val) +{ + __builtin_amdgcn_cooperative_atomic_store_32x4B(addr, val, __ATOMIC_ACQ_REL, ""); // expected-warning {{memory order argument to atomic operation is invalid}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 3c5ac99512a64..afb12a910b1ee 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3769,6 +3769,36 @@ def int_amdgcn_perm_pk16_b8_u4 : ClangBuiltin<"__builtin_amdgcn_perm_pk16_b8_u4" DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_v2i32_ty], [IntrNoMem, IntrSpeculatable]>; +class AMDGPUCooperativeAtomicStore<LLVMType Ty> : Intrinsic < + [], + [llvm_anyptr_ty, // pointer to store to + Ty, // value to store + llvm_i32_ty, // C ABI Atomic Ordering ID + llvm_metadata_ty], // syncscope + [IntrWriteMem, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<2>>, + IntrNoCallback, IntrNoFree, IntrConvergent], + "", + [SDNPMemOperand, SDNPMayStore] +>; + +class AMDGPUCooperativeAtomicLoad<LLVMType Ty> : Intrinsic < + [Ty], + [llvm_anyptr_ty, // pointer to load from + llvm_i32_ty, // C ABI Atomic Ordering ID + llvm_metadata_ty], // syncscope + [IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>, + IntrNoCallback, IntrNoFree, IntrConvergent], + "", + [SDNPMemOperand, SDNPMayLoad] +>; + +def int_amdgcn_cooperative_atomic_load_32x4B : AMDGPUCooperativeAtomicLoad<llvm_i32_ty>; +def int_amdgcn_cooperative_atomic_store_32x4B : AMDGPUCooperativeAtomicStore<llvm_i32_ty>; +def int_amdgcn_cooperative_atomic_load_16x8B : AMDGPUCooperativeAtomicLoad<llvm_v2i32_ty>; +def int_amdgcn_cooperative_atomic_store_16x8B : ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/156418 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits