================
@@ -0,0 +1,48 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx950 -emit-llvm 
-fcuda-is-device -o - %s | FileCheck %s
+
+// COM: Most tests are in the OpenCL semastics, this is just a verification 
for HIP
+
+#define __device__ __attribute__((device))
+#define __shared__ __attribute__((shared))
+
+typedef unsigned int u32;
+
+// CHECK-LABEL: define dso_local void @_Z20test_load_to_lds_u32PjS_(
+// CHECK-SAME: ptr noundef [[SRC:%.*]], ptr noundef [[DST:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SRC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[SRC_ADDR]] to ptr
+// CHECK-NEXT:    [[DST_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[DST_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[SRC]], ptr [[SRC_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store ptr [[DST]], ptr [[DST_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[SRC_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DST_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3)
+// CHECK-NEXT:    call void @llvm.amdgcn.load.to.lds.p0(ptr [[TMP0]], ptr 
addrspace(3) [[TMP2]], i32 4, i32 0, i32 0)
+// CHECK-NEXT:    ret void
+//
+__device__ void test_load_to_lds_u32(u32* src, __shared__ u32 *dst) {
+  __builtin_amdgcn_load_to_lds(src, dst, /*size=*/4, /*offset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: define dso_local void @_Z20test_load_to_lds_128PvS_(
+// CHECK-SAME: ptr noundef [[SRC:%.*]], ptr noundef [[DST:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SRC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[SRC_ADDR]] to ptr
+// CHECK-NEXT:    [[DST_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[DST_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[SRC]], ptr [[SRC_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store ptr [[DST]], ptr [[DST_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[SRC_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DST_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3)
+// CHECK-NEXT:    call void @llvm.amdgcn.load.to.lds.p0(ptr [[TMP0]], ptr 
addrspace(3) [[TMP2]], i32 16, i32 0, i32 0)
+// CHECK-NEXT:    ret void
+//
+__device__ void test_load_to_lds_128(void* src, __shared__ void *dst) {
+  __builtin_amdgcn_load_to_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0);
+}
----------------
arsenm wrote:

Also test where dst isn't qualified with __shared__ 

https://github.com/llvm/llvm-project/pull/137425
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to