https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/114062
>From d2d2d3d5db3f639aab178f9ca9a20db2842d2b65 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Tue, 29 Oct 2024 14:20:44 +0000 Subject: [PATCH 1/2] `sret` args should always point to the `alloca` AS, so we can use that. --- clang/lib/CodeGen/CGCall.cpp | 15 ++++++++------- clang/test/CodeGen/partial-reinitialization2.c | 4 ++-- clang/test/CodeGen/sret.c | 11 +++++++++++ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 8f4f5d3ed81601..56acfae7ae9e51 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1672,8 +1672,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { // Add type for sret argument. if (IRFunctionArgs.hasSRetArg()) { - QualType Ret = FI.getReturnType(); - unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret); + unsigned AddressSpace = CGM.getDataLayout().getAllocaAddrSpace(); ArgTypes[IRFunctionArgs.getSRetArgNo()] = llvm::PointerType::get(getLLVMContext(), AddressSpace); } @@ -5145,7 +5144,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If the call returns a temporary with struct return, create a temporary // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); - RawAddress SRetAlloca = RawAddress::invalid(); llvm::Value *UnusedReturnSizePtr = nullptr; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { // For virtual function pointer thunks and musttail calls, we must always @@ -5159,16 +5157,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } else if (!ReturnValue.isNull()) { SRetPtr = ReturnValue.getAddress(); } else { - SRetPtr = CreateMemTemp(RetTy, "tmp", &SRetAlloca); + SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp"); if (HaveInsertPoint() && ReturnValue.isUnused()) { llvm::TypeSize size = CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy)); - UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer()); + UnusedReturnSizePtr = EmitLifetimeStart(size, SRetPtr.getBasePointer()); } } if (IRFunctionArgs.hasSRetArg()) { + // If the caller allocated the return slot, it is possible that the + // alloca was AS casted to the default as, so we ensure the cast is + // stripped before binding to the sret arg, which is in the allocaAS. IRCallArgs[IRFunctionArgs.getSRetArgNo()] = - getAsNaturalPointerTo(SRetPtr, RetTy); + getAsNaturalPointerTo(SRetPtr, RetTy)->stripPointerCasts(); } else if (RetAI.isInAlloca()) { Address Addr = Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex()); @@ -5740,7 +5741,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // pop this cleanup later on. Being eager about this is OK, since this // temporary is 'invisible' outside of the callee. if (UnusedReturnSizePtr) - pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetAlloca, + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetPtr, UnusedReturnSizePtr); llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); diff --git a/clang/test/CodeGen/partial-reinitialization2.c b/clang/test/CodeGen/partial-reinitialization2.c index e709c1d4ad1ee1..7949a69555031e 100644 --- a/clang/test/CodeGen/partial-reinitialization2.c +++ b/clang/test/CodeGen/partial-reinitialization2.c @@ -91,8 +91,8 @@ void test5(void) // CHECK-LABEL: test6 void test6(void) { - // CHECK: [[LP:%[a-z0-9]+]] = getelementptr{{.*}}%struct.LLP2P2, ptr{{.*}}, i32 0, i32 0 - // CHECK: call {{.*}}get456789(ptr {{.*}}[[LP]]) + // CHECK: [[VAR:%[a-z0-9]+]] = alloca + // CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[VAR]]) // CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get235() // CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9.]+]] diff --git a/clang/test/CodeGen/sret.c b/clang/test/CodeGen/sret.c index 6d905e89b2c6fd..3b4914f29d2bfe 100644 --- a/clang/test/CodeGen/sret.c +++ b/clang/test/CodeGen/sret.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 %s -Wno-strict-prototypes -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -Wno-strict-prototypes -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefix=NONZEROALLOCAAS %s struct abc { long a; @@ -6,18 +7,28 @@ struct abc { long c; long d; long e; + long f; + long g; + long h; + long i; + long j; }; struct abc foo1(void); // CHECK-DAG: declare {{.*}} @foo1(ptr dead_on_unwind writable sret(%struct.abc) +// NONZEROALLOCAAS-DAG: declare {{.*}} @foo1(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc) struct abc foo2(); // CHECK-DAG: declare {{.*}} @foo2(ptr dead_on_unwind writable sret(%struct.abc) +// NONZEROALLOCAAS-DAG: declare {{.*}} @foo2(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc) struct abc foo3(void){} // CHECK-DAG: define {{.*}} @foo3(ptr dead_on_unwind noalias writable sret(%struct.abc) +// NONZEROALLOCAAS-DAG: define {{.*}} @foo3(ptr addrspace(5) dead_on_unwind noalias writable sret(%struct.abc) void bar(void) { struct abc dummy1 = foo1(); // CHECK-DAG: call {{.*}} @foo1(ptr dead_on_unwind writable sret(%struct.abc) + // NONZEROALLOCAAS-DAG: call {{.*}} @foo1(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc) struct abc dummy2 = foo2(); // CHECK-DAG: call {{.*}} @foo2(ptr dead_on_unwind writable sret(%struct.abc) + // NONZEROALLOCAAS-DAG: call {{.*}} @foo2(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc) } >From b5a7df0a771cb70d60e58a8727a5d856219dacb3 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Tue, 29 Oct 2024 17:16:17 +0000 Subject: [PATCH 2/2] Fix broken tests. --- clang/test/CodeGenOpenCL/addr-space-struct-arg.cl | 4 ++-- clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index 57d056b0ff9d51..4a1db2c3564a57 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -250,7 +250,7 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { // AMDGCN-NEXT: ret void // // AMDGCN20-LABEL: define dso_local void @foo_large( -// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) // AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr @@ -335,7 +335,7 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN20-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] // AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) // AMDGCN20-NEXT: ret void // diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl index 084281a8cada46..c2b2e00d15e13f 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl @@ -91,7 +91,7 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { } // AMDGCN-LABEL: define dso_local void @foo_large( -// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) // AMDGCN-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr @@ -120,7 +120,7 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] // AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) // AMDGCN-NEXT: ret void // _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits