llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Gang Chen (cmc-rep) <details> <summary>Changes</summary> --- Patch is 132.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114550.diff 21 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+6-6) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl (+7) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+59-79) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+32-9) - (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+3-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+122-73) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (+2) - (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+3-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp (+124) - (modified) llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (+7) - (modified) llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp (+22-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.h (+5) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+7-16) - (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+6) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+65-42) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+2-2) - (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+6-4) - (modified) llvm/test/Assembler/target-type-param-errors.ll (+5) - (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll (-1373) - (added) llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll (+66) - (added) llvm/test/CodeGen/AMDGPU/s-barrier.ll (+299) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 29001e32085151..8f44afa4059386 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -439,15 +439,15 @@ TARGET_BUILTIN(__builtin_amdgcn_s_sleep_var, "vUi", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_permlane16_var, "UiUiUiUiIbIb", "nc", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_permlanex16_var, "UiUiUiUiIbIb", "nc", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal, "vIi", "n", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vi", "n", "gfx12-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vv*i", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst, "bIi", "n", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst_var, "bi", "n", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vii", "n", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vi", "n", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vi", "n", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "b", "n", "gfx12-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vv*i", "n", "gfx12-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vv*", "n", "gfx12-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "gfx12-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "vIs", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_get_named_barrier_state, "Uiv*", "n", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_prefetch_data, "vvC*Ui", "nc", "gfx12-insts") TARGET_BUILTIN(__builtin_amdgcn_s_buffer_prefetch_data, "vQbIiUi", "nc", "gfx12-insts") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl index 5d86a9b369429f..1a5043328895ac 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl @@ -23,6 +23,13 @@ kernel void builtins_amdgcn_s_barrier_signal_isfirst_err(global int* in, global *out = *in; } +kernel void builtins_amdgcn_s_barrier_leave_err(global int* in, global int* out, int barrier) { + + __builtin_amdgcn_s_barrier_signal(-1); + __builtin_amdgcn_s_barrier_leave(barrier); // expected-error {{'__builtin_amdgcn_s_barrier_leave' must be a constant integer}} + *out = *in; +} + void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int off) { __builtin_amdgcn_s_buffer_prefetch_data(rsrc, off, 31); // expected-error {{'__builtin_amdgcn_s_buffer_prefetch_data' must be a constant integer}} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl index 9bfedac0032965..b1866a8e492c84 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl @@ -87,16 +87,21 @@ void test_s_barrier_signal() // CHECK-LABEL: @test_s_barrier_signal_var( // CHECK-NEXT: entry: +// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) [[TMP1]], i32 [[TMP2]]) // CHECK-NEXT: ret void // -void test_s_barrier_signal_var(int a) +void test_s_barrier_signal_var(void *bar, int a) { - __builtin_amdgcn_s_barrier_signal_var(a); + __builtin_amdgcn_s_barrier_signal_var(bar, a); } // CHECK-LABEL: @test_s_barrier_signal_isfirst( @@ -134,110 +139,63 @@ void test_s_barrier_signal_isfirst(int* a, int* b, int *c) __builtin_amdgcn_s_barrier_wait(1); } -// CHECK-LABEL: @test_s_barrier_isfirst_var( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr -// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr -// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst.var(i32 [[TMP0]]) -// CHECK-NEXT: br i1 [[TMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -// CHECK: if.then: -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK-NEXT: br label [[IF_END:%.*]] -// CHECK: if.else: -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[TMP3]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: -// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 1) -// CHECK-NEXT: ret void -// -void test_s_barrier_isfirst_var(int* a, int* b, int *c, int d) -{ - if ( __builtin_amdgcn_s_barrier_signal_isfirst_var(d)) - a = b; - else - a = c; - - __builtin_amdgcn_s_barrier_wait(1); - -} - // CHECK-LABEL: @test_s_barrier_init( // CHECK-NEXT: entry: +// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 -// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(i32 1, i32 [[TMP0]]) +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) [[TMP1]], i32 [[TMP2]]) // CHECK-NEXT: ret void // -void test_s_barrier_init(int a) +void test_s_barrier_init(void *bar, int a) { - __builtin_amdgcn_s_barrier_init(1, a); + __builtin_amdgcn_s_barrier_init(bar, a); } // CHECK-LABEL: @test_s_barrier_join( // CHECK-NEXT: entry: -// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(i32 1) +// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr +// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) +// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) [[TMP1]]) // CHECK-NEXT: ret void // -void test_s_barrier_join() +void test_s_barrier_join(void *bar) { - __builtin_amdgcn_s_barrier_join(1); + __builtin_amdgcn_s_barrier_join(bar); } // CHECK-LABEL: @test_s_wakeup_barrier( // CHECK-NEXT: entry: -// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(i32 1) +// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr +// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) +// CHECK-NEXT: call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) [[TMP1]]) // CHECK-NEXT: ret void // -void test_s_wakeup_barrier() +void test_s_wakeup_barrier(void *bar) { - __builtin_amdgcn_s_barrier_join(1); + __builtin_amdgcn_s_wakeup_barrier(bar); } // CHECK-LABEL: @test_s_barrier_leave( // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr -// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr -// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.leave() -// CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -// CHECK: if.then: -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[TMP1]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK-NEXT: br label [[IF_END:%.*]] -// CHECK: if.else: -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8 -// CHECK-NEXT: br label [[IF_END]] -// CHECK: if.end: +// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.leave(i16 1) // CHECK-NEXT: ret void // -void test_s_barrier_leave(int* a, int* b, int *c) +void test_s_barrier_leave() { - if (__builtin_amdgcn_s_barrier_leave()) - a = b; - else - a = c; + __builtin_amdgcn_s_barrier_leave(1); } // CHECK-LABEL: @test_s_get_barrier_state( @@ -261,6 +219,28 @@ unsigned test_s_get_barrier_state(int a) return State; } +// CHECK-LABEL: @test_s_get_named_barrier_state( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr +// CHECK-NEXT: [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr +// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) [[TMP1]]) +// CHECK-NEXT: store i32 [[TMP2]], ptr [[STATE_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4 +// CHECK-NEXT: ret i32 [[TMP3]] +// +unsigned test_s_get_named_barrier_state(void *bar) +{ + unsigned State = __builtin_amdgcn_s_get_named_barrier_state(bar); + return State; +} + // CHECK-LABEL: @test_s_ttracedata( // CHECK-NEXT: entry: // CHECK-NEXT: call void @llvm.amdgcn.s.ttracedata(i32 1) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 143b538b361c9c..d6375ab77cfb32 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// def global_ptr_ty : LLVMQualPointerType<1>; +def local_ptr_ty : LLVMQualPointerType<3>; // The amdgpu-no-* attributes (ex amdgpu-no-workitem-id-z) typically inferred // by the backend cause whole-program undefined behavior when violated, such as @@ -247,48 +248,70 @@ def int_amdgcn_s_sendmsghalt : ClangBuiltin<"__builtin_amdgcn_s_sendmsghalt">, def int_amdgcn_s_sendmsg_rtn : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>; +// Vanilla workgroup sync-barrier def int_amdgcn_s_barrier : ClangBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// Lower-level split-barrier intrinsics + +// void @llvm.amdgcn.s.barrier.signal(i32 %barrierType) +// only for non-named barrier def int_amdgcn_s_barrier_signal : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal">, Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %barrier, i32 %memberCnt) +// The %barrier and %memberCnt argument must be uniform, otherwise behavior is undefined. def int_amdgcn_s_barrier_signal_var : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_var">, - Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, + Intrinsic<[], [local_ptr_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// bool @llvm.amdgcn.s.barrier.signal.isfirst(i32 %barrierType) +// only for non-named barrier def int_amdgcn_s_barrier_signal_isfirst : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_isfirst">, Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; -def int_amdgcn_s_barrier_signal_isfirst_var : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_isfirst_var">, - Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, - IntrNoCallback, IntrNoFree]>; - +// void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %barrier, i32 %memberCnt) +// The %barrier and %memberCnt argument must be uniform, otherwise behavior is undefined. def int_amdgcn_s_barrier_init : ClangBuiltin<"__builtin_amdgcn_s_barrier_init">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, + Intrinsic<[], [local_ptr_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %barrier) +// The %barrier argument must be uniform, otherwise behavior is undefined. def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">, - Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, + Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %barrier) +// The %barrier argument must be uniform, otherwise behavior is undefined. def int_amdgcn_s_wakeup_barrier : ClangBuiltin<"__builtin_amdgcn_s_wakeup_barrier">, - Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, + Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// void @llvm.amdgcn.s.barrier.wait(i16 %barrierType) def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">, Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// void @llvm.amdgcn.s.barrier.leave(i16 %barrierType) def int_amdgcn_s_barrier_leave : ClangBuiltin<"__builtin_amdgcn_s_barrier_leave">, - Intrinsic<[llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent, + IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// uint32_t @llvm.amdgcn.s.get.barrier.state(i32 %barrierId) +// The %barrierType argument must be uniform, otherwise behavior is undefined. def int_amdgcn_s_get_barrier_state : ClangBuiltin<"__builtin_amdgcn_s_get_barrier_state">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// uint32_t @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %barrier) +// The %barrier argument must be uniform, otherwise behavior is undefined. +def int_amdgcn_s_get_named_barrier_state : ClangBuiltin<"__builtin_amdgcn_s_get_named_barrier_state">, + Intrinsic<[llvm_i32_ty], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, + IntrNoCallback, IntrNoFree]>; + def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index e4b54c7d72b083..8c640ec18e1a49 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -16,6 +16,7 @@ #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUMachineFunction.h" +#include "AMDGPUMemoryUtils.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" @@ -1508,7 +1509,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) { if (!MFI->isModuleEntryFunction() && - GV->getName() != "llvm.amdgcn.module.lds") { + GV->getName() != "llvm.amdgcn.module.lds" && + !AMDGPU::isNamedBarrier(*cast<GlobalVariable>(GV))) { SDLoc DL(Op); const Function &Fn = DAG.getMachineFunction().getFunction(); DiagnosticInfoUnsupported BadLDSDecl( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 800bdbe04cf70d..1873251ea358b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2181,15 +2181,16 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( case Intrinsic::amdgcn_ds_bvh_stack_rtn: return selectDSBvhStackIntrinsic(I); case Intrinsic::amdgcn_s_barrier_init: + case Intrinsic::amdgcn_s_barrier_signal_var: + return selectNamedBarrierInit(I, IntrinsicID); case Intrinsic::amdgcn_s_barrier_join: case Intrinsic::amdgcn_s_wakeup_barrier: - case Intrinsic::amdgcn_s_get_barrier_state: + case Intrinsic::amdgcn_s_get_named_barrier_state: ret... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/114550 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits