https://github.com/JonChesterfield created 
https://github.com/llvm/llvm-project/pull/137678

We have a clang builtin for one of four very similar IR intrinsics. This patch 
adds builtins for the other three.

IR intrinsics introduced in https://reviews.llvm.org/D124884. The request from 
composable kernels was for llvm.amdgcn.struct.buffer.load.lds but as that 
brings us to 2/4 with clang builtins, filling in the remainder at the same time.

The differences are whether the first argument is a v4u32 or a 
__amdgpu_buffer_rsrc_t and whether there is an index argument added to the list.

Test cases are in existing files where possible. Merging the four 
single-function expected-error cases into a single file stops check-clang 
passing, don't understand what lit/verify quirk I'm missing there.

Existing semantic checking does the right thing with a few more cases, 
ClangBuiltin<> does the right thing, named the new builtins after the IR 
intrinsics.

Fixes AMD internal SWDEV-529245

>From 23b3dfcb25709ca7c00ce07d22526526c45978d3 Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfi...@gmail.com>
Date: Mon, 28 Apr 2025 16:07:31 +0100
Subject: [PATCH] [clang][amdgpu] Add builtins for raw/struct buffer lds load

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |  3 ++
 clang/lib/Sema/SemaAMDGPU.cpp                 |  5 +++-
 .../builtins-amdgcn-raw-buffer-load-lds.cl    | 29 +++++++++++++++++++
 ...amdgcn-raw-buffer-load-lds-target-error.cl | 12 ++++++++
 ...ns-amdgcn-raw-ptr-buffer-load-lds-error.cl | 23 +++++++++++++++
 ...gcn-struct-buffer-load-lds-target-error.cl | 12 ++++++++
 ...struct-ptr-buffer-load-lds-target-error.cl | 10 +++++++
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      | 12 ++++++--
 8 files changed, 102 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-lds-target-error.cl
 create mode 100644 
clang/test/SemaOpenCL/builtins-amdgcn-struct-buffer-load-lds-target-error.cl
 create mode 100644 
clang/test/SemaOpenCL/builtins-amdgcn-struct-ptr-buffer-load-lds-target-error.cl

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 39fef9e4601f8..98e060658778a 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -163,7 +163,10 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, 
"V2UiQbiiIi", "n")
 BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
 BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
 
+TARGET_BUILTIN(__builtin_amdgcn_raw_buffer_load_lds, "vV4Uiv*3IUiiiIiIi", "t", 
"vmem-to-lds-load-insts")
 TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", 
"t", "vmem-to-lds-load-insts")
+TARGET_BUILTIN(__builtin_amdgcn_struct_buffer_load_lds, "vV4Uiv*3IUiiiiIiIi", 
"t", "vmem-to-lds-load-insts")
+TARGET_BUILTIN(__builtin_amdgcn_struct_ptr_buffer_load_lds, 
"vQbv*3IUiiiiIiIi", "t", "vmem-to-lds-load-insts")
 
 
//===----------------------------------------------------------------------===//
 // Ballot builtins.
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index a6366aceec2a6..69db969b6bfbb 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -35,8 +35,11 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned 
BuiltinID,
       Builtin::evaluateRequiredTargetFeatures("gfx950-insts", 
CallerFeatureMap);
 
   switch (BuiltinID) {
+  case AMDGPU::BI__builtin_amdgcn_global_load_lds:
+  case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_lds:
   case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
-  case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+  case AMDGPU::BI__builtin_amdgcn_struct_ptr_buffer_load_lds:
+  case AMDGPU::BI__builtin_amdgcn_struct_buffer_load_lds: {
     constexpr const int SizeIdx = 2;
     llvm::APSInt Size;
     Expr *ArgExpr = TheCall->getArg(SizeIdx);
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl
index 8256b61525f9d..5f38cafb6a21e 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load-lds.cl
@@ -2,6 +2,17 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a 
-emit-llvm -o - %s | FileCheck %s
 
+typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: @test_amdgcn_raw_buffer_load_lds(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> 
[[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 
[[SOFFSET:%.*]], i32 2, i32 3)
+// CHECK-NEXT:    ret void
+//
+void test_amdgcn_raw_buffer_load_lds(v4u32 rsrc, __local void * lds, int 
offset, int soffset) {
+    __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3);
+}
+
 // CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_lds(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    tail call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr 
addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 
[[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
@@ -10,3 +21,21 @@
 void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local 
void * lds, int offset, int soffset) {
     __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 
3);
 }
+
+// CHECK-LABEL: @test_amdgcn_struct_buffer_load_lds(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> 
[[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 4, i32 [[VINDEX:%.*]], i32 
[[VOFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
+// CHECK-NEXT:    ret void
+//
+void test_amdgcn_struct_buffer_load_lds(v4u32 rsrc, __local void * lds, int 
size, int vindex, int voffset, int soffset) {
+    __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, vindex, voffset, 
soffset, 2, 3);
+}
+
+// CHECK-LABEL: @test_amdgcn_struct_ptr_buffer_load_lds(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr 
addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 4, i32 
[[VINDEX:%.*]], i32 [[VOFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
+// CHECK-NEXT:    ret void
+//
+void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, 
__local void * lds, int size, int vindex, int voffset, int soffset) {
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, vindex, voffset, 
soffset, 2, 3);
+}
diff --git 
a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-lds-target-error.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-lds-target-error.cl
new file mode 100644
index 0000000000000..234c6150a4ff5
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-lds-target-error.cl
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tahiti -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu bonaire -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu carrizo -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S 
-verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
+
+void test_amdgcn_raw_buffer_load_lds(v4u32 rsrc, __local void* lds, int 
offset, int soffset, int x) {
+  __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 0); 
//expected-error{{needs target feature vmem-to-lds-load-insts}}
+}
diff --git 
a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
index 5915393ae7f56..3af99ab58ff93 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
@@ -2,9 +2,32 @@
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S 
-verify=gfx950,expected  -o - %s
 // REQUIRES: amdgpu-registered-target
 
+typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
+
+void test_amdgcn_raw_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local 
void* lds, int offset, int soffset, int x) {
+  __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, x, offset, soffset, 0, 0); 
//expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_lds' must be a 
constant integer}}
+  __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 4, offset, soffset, x, 0); 
//expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_lds' must be a 
constant integer}}
+  __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); 
//expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_lds' must be a 
constant integer}}
+  __builtin_amdgcn_raw_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); 
//expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 
16}} gfx90a-note{{size must be 1, 2, or 4}}
+}
+
 void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local 
void* lds, int offset, int soffset, int x) {
   __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, x, offset, soffset, 0, 
0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' 
must be a constant integer}}
   __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, x, 
0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' 
must be a constant integer}}
   __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 
x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' 
must be a constant integer}}
   __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 
0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 
12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
 }
+
+void test_amdgcn_struct_buffer_load_lds(v4u32 rsrc, __local void* lds, int 
index, int offset, int soffset, int x) {
+  __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, x, index, offset, 
soffset, 0, 0); //expected-error{{argument to 
'__builtin_amdgcn_struct_buffer_load_lds' must be a constant integer}}
+  __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, index, offset, 
soffset, x, 0); //expected-error{{argument to 
'__builtin_amdgcn_struct_buffer_load_lds' must be a constant integer}}
+  __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, index, offset, 
soffset, 0, x); //expected-error{{argument to 
'__builtin_amdgcn_struct_buffer_load_lds' must be a constant integer}}
+  __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 3, index, offset, 
soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must 
be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
+}
+
+void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, 
__local void* lds, int index, int offset, int soffset, int x) {
+  __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, x, index, offset, 
soffset, 0, 0); //expected-error{{argument to 
'__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
+  __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, index, offset, 
soffset, x, 0); //expected-error{{argument to 
'__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
+  __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, index, offset, 
soffset, 0, x); //expected-error{{argument to 
'__builtin_amdgcn_struct_ptr_buffer_load_lds' must be a constant integer}}
+  __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 3, index, offset, 
soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must 
be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
+}
diff --git 
a/clang/test/SemaOpenCL/builtins-amdgcn-struct-buffer-load-lds-target-error.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-struct-buffer-load-lds-target-error.cl
new file mode 100644
index 0000000000000..0b529fa0aa2df
--- /dev/null
+++ 
b/clang/test/SemaOpenCL/builtins-amdgcn-struct-buffer-load-lds-target-error.cl
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tahiti -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu bonaire -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu carrizo -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S 
-verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
+
+void test_amdgcn_struct_buffer_load_lds(v4u32 rsrc, __local void* lds, int 
index, int offset, int soffset, int x) {
+  __builtin_amdgcn_struct_buffer_load_lds(rsrc, lds, 4, index, offset, 
soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
+}
diff --git 
a/clang/test/SemaOpenCL/builtins-amdgcn-struct-ptr-buffer-load-lds-target-error.cl
 
b/clang/test/SemaOpenCL/builtins-amdgcn-struct-ptr-buffer-load-lds-target-error.cl
new file mode 100644
index 0000000000000..d438afcf6ce56
--- /dev/null
+++ 
b/clang/test/SemaOpenCL/builtins-amdgcn-struct-ptr-buffer-load-lds-target-error.cl
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tahiti -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu bonaire -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu carrizo -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S 
-verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S 
-verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+void test_amdgcn_struct_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, 
__local void* lds, int index, int offset, int soffset, int x) {
+  __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, lds, 4, index, offset, 
soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index a57eb4a6dba49..b3dc2fc9bdf93 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1861,7 +1861,9 @@ def int_amdgcn_struct_tbuffer_store : 
DefaultAttrsIntrinsic <
      ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
 
-class AMDGPURawBufferLoadLDS : Intrinsic <
+class AMDGPURawBufferLoadLDS :
+  ClangBuiltin<"__builtin_amdgcn_raw_buffer_load_lds">,
+Intrinsic <
   [],
   [llvm_v4i32_ty,             // rsrc(SGPR)
    LLVMQualPointerType<3>,    // LDS base offset
@@ -1904,7 +1906,9 @@ class AMDGPURawPtrBufferLoadLDS :
    ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, 
AMDGPURsrcIntrinsic<0>;
 def int_amdgcn_raw_ptr_buffer_load_lds : AMDGPURawPtrBufferLoadLDS;
 
-class AMDGPUStructBufferLoadLDS : Intrinsic <
+class AMDGPUStructBufferLoadLDS :
+  ClangBuiltin<"__builtin_amdgcn_struct_buffer_load_lds">,
+  Intrinsic <
   [],
   [llvm_v4i32_ty,             // rsrc(SGPR)
    LLVMQualPointerType<3>,    // LDS base offset
@@ -1924,7 +1928,9 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
    ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, 
AMDGPURsrcIntrinsic<0>;
 def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;
 
-class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
+class AMDGPUStructPtrBufferLoadLDS :
+  ClangBuiltin<"__builtin_amdgcn_struct_ptr_buffer_load_lds">,
+  Intrinsic <
   [],
   [AMDGPUBufferRsrcTy,        // rsrc(SGPR)
    LLVMQualPointerType<3>,    // LDS base offset

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to