https://github.com/JonChesterfield created https://github.com/llvm/llvm-project/pull/137678
We have a clang builtin for one of four very similar IR intrinsics. This patch adds builtins for the other three. IR intrinsics introduced in https://reviews.llvm.org/D124884. The request from composable kernels was for llvm.amdgcn.struct.buffer.load.lds but as that brings us to 2/4 with clang builtins, filling in the remainder at the same time. The differences are whether the first argument is a v4u32 or a __amdgpu_buffer_rsrc_t and whether there is an index argument added to the list. Test cases are in existing files where possible. Merging the four single-function expected-error cases into a single file stops check-clang passing, don't understand what lit/verify quirk I'm missing there. Existing semantic checking does the right thing with a few more cases, ClangBuiltin<> does the right thing, named the new builtins after the IR intrinsics. Fixes AMD internal SWDEV-529245 >From 23b3dfcb25709ca7c00ce07d22526526c45978d3 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield <jonathanchesterfi...@gmail.com> Date: Mon, 28 Apr 2025 16:07:31 +0100 Subject: [PATCH] [clang][amdgpu] Add builtins for raw/struct buffer lds load --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 3 ++ clang/lib/Sema/SemaAMDGPU.cpp | 5 +++- .../builtins-amdgcn-raw-buffer-load-lds.cl | 29 +++++++++++++++++++ ...amdgcn-raw-buffer-load-lds-target-error.cl | 12 ++++++++ ...ns-amdgcn-raw-ptr-buffer-load-lds-error.cl | 23 +++++++++++++++ ...gcn-struct-buffer-load-lds-target-error.cl | 12 ++++++++ ...struct-ptr-buffer-load-lds-target-error.cl | 10 +++++++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 12 ++++++-- 8 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-lds-target-error.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-struct-buffer-load-lds-target-error.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-struct-ptr-buffer-load-lds-target-error.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 39fef9e4601f8..98e060658778a 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -163,7 +163,10 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n") +TARGET_BUILTIN(__builtin_amdgcn_raw_buffer_load_lds, "vV4Uiv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts") TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts") +TARGET_BUILTIN(__builtin_amdgcn_struct_buffer_load_lds, "vV4Uiv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts") +TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, "vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts") //===----------------------------------------------------------------------===// // Ballot builtins. diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index a6366aceec2a6..69db969b6bfbb 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -35,8 +35,11 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap); switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_global_load_lds: + case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_lds: case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds: - case AMDGPU::BI__builtin_amdgcn_global_load_lds: { + case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds: + case AMDGPU::BI__builtin_amdgcn_struct_buffer_load_lds: { constexpr const int SizeIdx = 2; llvm::APSInt Size; Expr *ArgExpr = TheCall->getArg(SizeIdx); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl index 8256b61525f9d..5f38cafb6a21e 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl @@ -2,6 +2,17 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s +typedef unsigned int v4u32 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: @test_amdgcn_raw_buffer_load_lds( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3) +// CHECK-NEXT: ret void +// +void test_amdgcn_raw_buffer_load_lds(v4u32 rsrc, __local void * lds, int offset, int soffset) { + __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3); +} + // CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_lds( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3) @@ -10,3 +21,21 @@ void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) { __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3); } + +// CHECK-LABEL: @test_amdgcn_struct_buffer_load_lds( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 4, i32 [[VINDEX:%.*]], i32 [[VOFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3) +// CHECK-NEXT: ret void +// +void test_amdgcn_struct_buffer_load_lds(v4u32 rsrc, __local void * lds, int size, int vindex, int voffset, int soffset) { + __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 2, 3); +} + +// CHECK-LABEL: @test_amdgcn_struct_ptr_buffer_load_lds( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 4, i32 [[VINDEX:%.*]], i32 [[VOFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3) +// CHECK-NEXT: ret void +// +void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int size, int vindex, int voffset, int soffset) { + __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, soffset, 2, 3); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-lds-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-lds-target-error.cl new file mode 100644 index 0000000000000..234c6150a4ff5 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-lds-target-error.cl @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tahiti -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu bonaire -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu carrizo -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +typedef unsigned int v4u32 __attribute__((ext_vector_type(4))); + +void test_amdgcn_raw_buffer_load_lds(v4u32 rsrc, __local void* lds, int offset, int soffset, int x) { + __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl index 5915393ae7f56..3af99ab58ff93 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl @@ -2,9 +2,32 @@ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s // REQUIRES: amdgpu-registered-target +typedef unsigned int v4u32 __attribute__((ext_vector_type(4))); + +void test_amdgcn_raw_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) { + __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, x, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 4, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}} +} + void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) { __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, x, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}} __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}} __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}} __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}} } + +void test_amdgcn_struct_buffer_load_lds(v4u32 rsrc, __local void* lds, int index, int offset, int soffset, int x) { + __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, x, index, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_struct_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_struct_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_struct_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 3, index, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}} +} + +void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int index, int offset, int soffset, int x) { + __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, x, index, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 3, index, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-struct-buffer-load-lds-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-struct-buffer-load-lds-target-error.cl new file mode 100644 index 0000000000000..0b529fa0aa2df --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-struct-buffer-load-lds-target-error.cl @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tahiti -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu bonaire -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu carrizo -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +typedef unsigned int v4u32 __attribute__((ext_vector_type(4))); + +void test_amdgcn_struct_buffer_load_lds(v4u32 rsrc, __local void* lds, int index, int offset, int soffset, int x) { + __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-struct-ptr-buffer-load-lds-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-struct-ptr-buffer-load-lds-target-error.cl new file mode 100644 index 0000000000000..d438afcf6ce56 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-struct-ptr-buffer-load-lds-target-error.cl @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tahiti -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu bonaire -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu carrizo -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int index, int offset, int soffset, int x) { + __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, index, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index a57eb4a6dba49..b3dc2fc9bdf93 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1861,7 +1861,9 @@ def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic < ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; -class AMDGPURawBufferLoadLDS : Intrinsic < +class AMDGPURawBufferLoadLDS : + ClangBuiltin<"__builtin_amdgcn_raw_buffer_load_lds">, +Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>, // LDS base offset @@ -1904,7 +1906,9 @@ class AMDGPURawPtrBufferLoadLDS : ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_ptr_buffer_load_lds : AMDGPURawPtrBufferLoadLDS; -class AMDGPUStructBufferLoadLDS : Intrinsic < +class AMDGPUStructBufferLoadLDS : + ClangBuiltin<"__builtin_amdgcn_struct_buffer_load_lds">, + Intrinsic < [], [llvm_v4i32_ty, // rsrc(SGPR) LLVMQualPointerType<3>, // LDS base offset @@ -1924,7 +1928,9 @@ class AMDGPUStructBufferLoadLDS : Intrinsic < ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS; -class AMDGPUStructPtrBufferLoadLDS : Intrinsic < +class AMDGPUStructPtrBufferLoadLDS : + ClangBuiltin<"__builtin_amdgcn_struct_ptr_buffer_load_lds">, + Intrinsic < [], [AMDGPUBufferRsrcTy, // rsrc(SGPR) LLVMQualPointerType<3>, // LDS base offset _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits