llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-codegen

Author: Alex Voicu (AlexVlx)

<details>
<summary>Changes</summary>

Back in the dawn of time, 
[D35082](https://github.com/llvm/llvm-project/commit/b7318e02c1094a31fda53dcdb27d3ca60277e091)
 added explicit representation for the OpenCL `private` address space, and 
modelled OpenCL semantics which imply almost always defaulting to it. 
Unfortunately, whilst the patch was comprehensive, it either missed a pair of 
cases or said cases came about after and missed the semantics:

- we should not cast the result of `alloca`s to the Target's mapping for 
`LangAS::Default`, as this is not guaranteed to be `private` (and, for sanity, 
it shouldn't be);
- `sret` params point to a stack i.e. `private` location, and not to the 
generic AS (aka we should handle this similarly to e.g. `byref`).

This patch addresses both. The `alloca` change will allow for the subsequent 
deletion of a bunch of `stripPointerCasts` calls, as @<!-- -->arsenm suggested 
in #<!-- -->112442. An alternative would be to move to always using 
`CreateTempAllocaWithoutCast` when compiling for OCL, but that'd be noisier.

---

Patch is 176.93 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/113930.diff


13 Files Affected:

- (modified) clang/lib/CodeGen/CGCall.cpp (+2) 
- (modified) clang/lib/CodeGen/CGExpr.cpp (+5-1) 
- (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+70-99) 
- (modified) clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (+15-20) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+70-99) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+219-276) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-nullptr.cl (+14-14) 
- (modified) clang/test/CodeGenOpenCL/atomic-ops.cl (+4-7) 
- (modified) clang/test/CodeGenOpenCL/blocks.cl (+12-11) 
- (modified) clang/test/CodeGenOpenCL/builtins-alloca.cl (+48-80) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+62-93) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl (+12-18) 
- (modified) clang/test/Index/pipe-size.cl (+2-2) 


``````````diff
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 1949b4ceb7f204..23a33dbb78292e 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1648,6 +1648,8 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
   // Add type for sret argument.
   if (IRFunctionArgs.hasSRetArg()) {
     QualType Ret = FI.getReturnType();
+    if (CGM.getLangOpts().OpenCL)
+      Ret = getContext().getAddrSpaceQualType(Ret, LangAS::opencl_private);
     unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret);
     ArgTypes[IRFunctionArgs.getSRetArgNo()] =
         llvm::PointerType::get(getLLVMContext(), AddressSpace);
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index e0ea65bcaf3637..1e2e321a31730f 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -108,11 +108,15 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type 
*Ty, CharUnits Align,
   if (AllocaAddr)
     *AllocaAddr = Alloca;
   llvm::Value *V = Alloca.getPointer();
+  assert((!getLangOpts().OpenCL ||
+          CGM.getTarget().getTargetAddressSpace(getASTAllocaAddressSpace()) ==
+            CGM.getTarget().getTargetAddressSpace(LangAS::opencl_private)) &&
+          "For OpenCL allocas must allocate in the private address space!");
   // Alloca always returns a pointer in alloca address space, which may
   // be different from the type defined by the language. For example,
   // in C++ the auto variables are in the default address space. Therefore
   // cast alloca to the default address space when necessary.
-  if (getASTAllocaAddressSpace() != LangAS::Default) {
+  if (!getLangOpts().OpenCL && getASTAllocaAddressSpace() != LangAS::Default) {
     auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
     llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
     // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt,
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl 
b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 57d056b0ff9d51..7377b5bcbc347a 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -69,11 +69,9 @@ struct LargeStructOneMember g_s;
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
 // AMDGCN20-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, 
addrspace(5)
 // AMDGCN20-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, 
addrspace(5)
-// AMDGCN20-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
-// AMDGCN20-NEXT:    [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
-// AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
-// AMDGCN20-NEXT:    store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 
4
-// AMDGCN20-NEXT:    [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr 
[[RETVAL_ASCAST]], align 4
+// AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
+// AMDGCN20-NEXT:    store [9 x i32] [[IN_COERCE]], ptr addrspace(5) 
[[COERCE_DIVE]], align 4
+// AMDGCN20-NEXT:    [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) 
[[RETVAL]], align 4
 // AMDGCN20-NEXT:    ret [[STRUCT_MAT4X4]] [[TMP0]]
 //
 // SPIR-LABEL: define dso_local spir_func void @foo(
@@ -152,22 +150,19 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
 // AMDGCN20-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // AMDGCN20-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // AMDGCN20-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, 
addrspace(5)
-// AMDGCN20-NEXT:    [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[IN_ADDR]] to ptr
-// AMDGCN20-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[OUT_ADDR]] to ptr
-// AMDGCN20-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[TMP]] to ptr
-// AMDGCN20-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], 
align 8
-// AMDGCN20-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], 
align 8
-// AMDGCN20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr 
[[OUT_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT:    store ptr addrspace(1) [[IN]], ptr addrspace(5) 
[[IN_ADDR]], align 8
+// AMDGCN20-NEXT:    store ptr addrspace(1) [[OUT]], ptr addrspace(5) 
[[OUT_ADDR]], align 8
+// AMDGCN20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[OUT_ADDR]], align 8
 // AMDGCN20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds 
[[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
-// AMDGCN20-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr 
[[IN_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[IN_ADDR]], align 8
 // AMDGCN20-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds 
[[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
 // AMDGCN20-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw 
[[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
 // AMDGCN20-NEXT:    [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], 
align 4
 // AMDGCN20-NEXT:    [[CALL:%.*]] = call [[STRUCT_MAT4X4]] 
@[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] 
[[TMP3]]) #[[ATTR3:[0-9]+]]
-// AMDGCN20-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw 
[[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0
+// AMDGCN20-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw 
[[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
 // AMDGCN20-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
-// AMDGCN20-NEXT:    store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4
-// AMDGCN20-NEXT:    call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 
[[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false)
+// AMDGCN20-NEXT:    store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], 
align 4
+// AMDGCN20-NEXT:    call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 
[[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
 // AMDGCN20-NEXT:    ret void
 //
 // SPIR-LABEL: define dso_local spir_kernel void @ker(
@@ -250,11 +245,10 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
 // AMDGCN-NEXT:    ret void
 //
 // AMDGCN20-LABEL: define dso_local void @foo_large(
-// AMDGCN20-SAME: ptr dead_on_unwind noalias writable 
sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) 
noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
+// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable 
sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) 
noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
-// AMDGCN20-NEXT:    [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, 
addrspace(5)
-// AMDGCN20-NEXT:    [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to 
ptr
-// AMDGCN20-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr 
addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
+// AMDGCN20-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, 
addrspace(5)
+// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 
[[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
 // AMDGCN20-NEXT:    ret void
 //
 // SPIR-LABEL: define dso_local spir_func void @foo_large(
@@ -325,18 +319,15 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) 
{
 // AMDGCN20-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // AMDGCN20-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, 
addrspace(5)
 // AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], 
align 4, addrspace(5)
-// AMDGCN20-NEXT:    [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[IN_ADDR]] to ptr
-// AMDGCN20-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[OUT_ADDR]] to ptr
-// AMDGCN20-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[TMP]] to ptr
-// AMDGCN20-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], 
align 8
-// AMDGCN20-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], 
align 8
-// AMDGCN20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr 
[[OUT_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT:    store ptr addrspace(1) [[IN]], ptr addrspace(5) 
[[IN_ADDR]], align 8
+// AMDGCN20-NEXT:    store ptr addrspace(1) [[OUT]], ptr addrspace(5) 
[[OUT_ADDR]], align 8
+// AMDGCN20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[OUT_ADDR]], align 8
 // AMDGCN20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds 
[[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
-// AMDGCN20-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr 
[[IN_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[IN_ADDR]], align 8
 // AMDGCN20-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds 
[[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
 // AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 
[[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
-// AMDGCN20-NEXT:    call void @foo_large(ptr dead_on_unwind writable 
sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef 
byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
-// AMDGCN20-NEXT:    call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 
[[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false)
+// AMDGCN20-NEXT:    call void @foo_large(ptr addrspace(5) dead_on_unwind 
writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef 
byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
+// AMDGCN20-NEXT:    call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 
[[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
 // AMDGCN20-NEXT:    ret void
 //
 // SPIR-LABEL: define dso_local spir_kernel void @ker_large(
@@ -428,14 +419,12 @@ kernel void ker_large(global Mat32X32 *in, global 
Mat64X64 *out) {
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
 // AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 
8, addrspace(5)
 // AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, 
addrspace(5)
-// AMDGCN20-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
-// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
-// AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN20-NEXT:    store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
-// AMDGCN20-NEXT:    store <2 x i32> zeroinitializer, ptr 
[[DOTCOMPOUNDLITERAL_ASCAST]], align 8
-// AMDGCN20-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr 
[[DOTCOMPOUNDLITERAL_ASCAST]], align 8
-// AMDGCN20-NEXT:    [[X:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN20-NEXT:    store <2 x i32> [[TMP0]], ptr [[X]], align 8
+// AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN20-NEXT:    store <2 x i32> [[U_COERCE]], ptr addrspace(5) 
[[COERCE_DIVE]], align 8
+// AMDGCN20-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) 
[[DOTCOMPOUNDLITERAL]], align 8
+// AMDGCN20-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) 
[[DOTCOMPOUNDLITERAL]], align 8
+// AMDGCN20-NEXT:    [[X:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN20-NEXT:    store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
 // AMDGCN20-NEXT:    ret void
 //
 // SPIR-LABEL: define dso_local spir_func void @FuncOneMember(
@@ -508,16 +497,14 @@ void FuncOneMember(struct StructOneMember u) {
 // AMDGCN20-LABEL: define dso_local void @FuncOneLargeMember(
 // AMDGCN20-SAME: ptr addrspace(5) noundef 
byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
-// AMDGCN20-NEXT:    [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], 
align 8, addrspace(5)
+// AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 
8, addrspace(5)
 // AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, 
addrspace(5)
-// AMDGCN20-NEXT:    [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to 
ptr
-// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
-// AMDGCN20-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr 
addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
-// AMDGCN20-NEXT:    store <2 x i32> zeroinitializer, ptr 
[[DOTCOMPOUNDLITERAL_ASCAST]], align 8
-// AMDGCN20-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr 
[[DOTCOMPOUNDLITERAL_ASCAST]], align 8
-// AMDGCN20-NEXT:    [[X:%.*]] = getelementptr inbounds nuw 
[[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
-// AMDGCN20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x 
i32>], ptr [[X]], i64 0, i64 0
-// AMDGCN20-NEXT:    store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
+// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 
[[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
+// AMDGCN20-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) 
[[DOTCOMPOUNDLITERAL]], align 8
+// AMDGCN20-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) 
[[DOTCOMPOUNDLITERAL]], align 8
+// AMDGCN20-NEXT:    [[X:%.*]] = getelementptr inbounds nuw 
[[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x 
i32>], ptr addrspace(5) [[X]], i64 0, i64 0
+// AMDGCN20-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], 
align 8
 // AMDGCN20-NEXT:    ret void
 //
 // SPIR-LABEL: define dso_local spir_func void @FuncOneLargeMember(
@@ -656,10 +643,7 @@ kernel void test_indirect_arg_local(void) {
 // AMDGCN20-SAME: ) #[[ATTR0]] {
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
 // AMDGCN20-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], 
align 8, addrspace(5)
-// AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca 
[[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN20-NEXT:    [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[P_S]] to ptr
-// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 
[[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
-// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef 
byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
+// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef 
byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
 // AMDGCN20-NEXT:    ret void
 //
 // SPIR-LABEL: define dso_local spir_func void @test_indirect_arg_private(
@@ -710,11 +694,10 @@ void test_indirect_arg_private(void) {
 // AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] 
!kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual 
[[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type 
[[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
 // AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 
8, addrspace(5)
-// AMDGCN20-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
-// AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN20-NEXT:    store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
-// AMDGCN20-NEXT:    [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN20-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], 
align 8
+// AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN20-NEXT:    store <2 x i32> [[U_COERCE]], ptr addrspace(5) 
[[COERCE_DIVE]], align 8
+// AMDGCN20-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN20-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) 
[[COERCE_DIVE1]], align 8
 // AMDGCN20-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
 // AMDGCN20-NEXT:    ret void
 //
@@ -777,9 +760,8 @@ kernel void KernelOneMember(struct StructOneMember u) {
 // AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] 
!kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] 
!kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] 
!kernel_arg_type_qual [[META13]] {
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
 // AMDGCN20-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
-// AMDGCN20-NEXT:    [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[U_ADDR]] to ptr
-// AMDGCN20-NEXT:    store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], 
align 8
-// AMDGCN20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr 
[[U_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT:    store ptr addrspace(1) [[U]], ptr addrspace(5) 
[[U_ADDR]], align 8
+// AMDGCN20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[U_ADDR]], align 8
 // AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
 // AMDGCN20-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) 
[[COERCE_DIVE]], align 8
 // AMDGCN20-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
@@ -843,13 +825,10 @@ kernel void KernelOneMemberSpir(global struct 
StructOneMember* u) {
 // AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) 
#[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] 
!kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] 
!kernel_arg_type_qual [[META13]] {
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
 // AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 
8, addrspace(5)
-// AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca 
[[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN20-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
-// AMDGCN20-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw 
[[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
+// AMDGCN20-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw 
[[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
 // AMDGCN20-NEXT:    [[TMP1:%.*]] = extractvalue 
[[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
-// AMDGCN20-NEXT:    store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
-// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 
[[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false)
-// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef 
byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
+// AMDGCN20-NEXT:    store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) 
[[TMP0]], align 8
+// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef 
byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
 // AMDGCN20-NEXT:    ret void
 //
 // SPIR-LABEL: define dso_local spir_kernel void @KernelLargeOneMember(
@@ -915,16 +894,14 @@ kernel void KernelLargeOneMember(struct 
LargeStructOneMember u) {
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
 // AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 
8, addrspace(5)
 // AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, 
addrspace(5)
-// AMDGCN20-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
-// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
-// AMDGCN20-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN20-NEXT:    store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8
-// AMDGCN20-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
-// AMDGCN20-NEXT:    store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8
-// AMDGCN20-NEXT:    store <2 x i32> zeroinitializer, ptr 
[[DOTCOMPOUNDLITERAL_ASCAST]], align 8
-//...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/113930
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to