Author: Matt Arsenault Date: 2025-05-08T07:51:57+02:00 New Revision: a11d86461e7d7d9bce3d04a39ded1cad394239ca
URL: https://github.com/llvm/llvm-project/commit/a11d86461e7d7d9bce3d04a39ded1cad394239ca DIFF: https://github.com/llvm/llvm-project/commit/a11d86461e7d7d9bce3d04a39ded1cad394239ca.diff LOG: clang: Fix broken implicit cast to generic address space (#138863) This fixes emitting undefined behavior where a 64-bit generic pointer is written to a 32-bit slot allocated for a private pointer. This can be seen in test/CodeGenOpenCL/amdgcn-automatic-variable.cl's wrong_pointer_alloca. Added: Modified: clang/lib/CodeGen/CGDecl.cpp clang/lib/CodeGen/CGExpr.cpp clang/lib/CodeGen/CodeGenFunction.h clang/test/CodeGenOpenCL/addr-space-struct-arg.cl clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl clang/test/CodeGenOpenCL/amdgpu-nullptr.cl clang/test/CodeGenOpenCL/blocks.cl clang/test/CodeGenOpenCL/builtins-alloca.cl clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl clang/test/Index/pipe-size.cl Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index db34e2738b4cf..1e54e55c5abbb 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1588,7 +1588,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Create the alloca. Note that we set the name separately from // building the instruction so that it's there even in no-asserts // builds. - address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName(), + address = CreateTempAlloca(allocaTy, Ty.getAddressSpace(), + allocaAlignment, D.getName(), /*ArraySize=*/nullptr, &AllocaAddr); // Don't emit lifetime markers for MSVC catch parameters. The lifetime of diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 6f5ead78f2b23..1a835c97decef 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -100,13 +100,11 @@ CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits Align, return RawAddress(Alloca, Ty, Align, KnownNonNull); } -/// CreateTempAlloca - This creates a alloca and inserts it into the entry -/// block. The alloca is casted to default address space if necessary. -RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, - const Twine &Name, +RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS, + CharUnits Align, const Twine &Name, llvm::Value *ArraySize, RawAddress *AllocaAddr) { - auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize); + RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize); if (AllocaAddr) *AllocaAddr = Alloca; llvm::Value *V = Alloca.getPointer(); @@ -114,8 +112,9 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, // be diff erent from the type defined by the language. For example, // in C++ the auto variables are in the default address space. Therefore // cast alloca to the default address space when necessary. - if (getASTAllocaAddressSpace() != LangAS::Default) { - auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); + + unsigned DestAddrSpace = getContext().getTargetAddressSpace(DestLangAS); + if (DestAddrSpace != Alloca.getAddressSpace()) { llvm::IRBuilderBase::InsertPointGuard IPG(Builder); // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt, // otherwise alloca is inserted at the current insertion point of the @@ -123,8 +122,8 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, if (!ArraySize) Builder.SetInsertPoint(getPostAllocaInsertPoint()); V = getTargetHooks().performAddrSpaceCast( - *this, V, getASTAllocaAddressSpace(), LangAS::Default, - Builder.getPtrTy(DestAddrSpace), /*non-null*/ true); + *this, V, getASTAllocaAddressSpace(), DestLangAS, + Builder.getPtrTy(DestAddrSpace), /*IsNonNull=*/true); } return RawAddress(V, Ty, Align, KnownNonNull); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 561f8f6a2a2fb..c0bc3825f0188 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2861,10 +2861,28 @@ class CodeGenFunction : public CodeGenTypeCache { /// more efficient if the caller knows that the address will not be exposed. llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp", llvm::Value *ArraySize = nullptr); + + /// CreateTempAlloca - This creates a alloca and inserts it into the entry + /// block. The alloca is casted to the address space of \p UseAddrSpace if + /// necessary. + RawAddress CreateTempAlloca(llvm::Type *Ty, LangAS UseAddrSpace, + CharUnits align, const Twine &Name = "tmp", + llvm::Value *ArraySize = nullptr, + RawAddress *Alloca = nullptr); + + /// CreateTempAlloca - This creates a alloca and inserts it into the entry + /// block. The alloca is casted to default address space if necessary. + /// + /// FIXME: This version should be removed, and context should provide the + /// context use address space used instead of default. RawAddress CreateTempAlloca(llvm::Type *Ty, CharUnits align, const Twine &Name = "tmp", llvm::Value *ArraySize = nullptr, - RawAddress *Alloca = nullptr); + RawAddress *Alloca = nullptr) { + return CreateTempAlloca(Ty, LangAS::Default, align, Name, ArraySize, + Alloca); + } + RawAddress CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align, const Twine &Name = "tmp", llvm::Value *ArraySize = nullptr); diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index 49604c6c5e61b..a70e9af75fa38 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -798,10 +798,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // AMDGCN20-SAME: ) #[[ATTR0]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false) -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]] // AMDGCN20-NEXT: ret void // // diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl index e28120adc0364..af50928d8ecf0 100644 --- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl @@ -55,22 +55,19 @@ void func1(int *x) { // CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5) // CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5) // CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5) +// CL20-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4 +// CL20-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4 +// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0 +// CL20-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4 // CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr -// CL20-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr -// CL20-NEXT: [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr -// CL20-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr -// CL20-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr -// CL20-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr -// CL20-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4 -// CL20-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4 -// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0 -// CL20-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4 -// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8 -// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0 -// CL20-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8 -// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]] -// CL20-NEXT: store i32 4, ptr [[LVC_ASCAST]], align 4 -// CL20-NEXT: store i32 4, ptr [[LV1_ASCAST]], align 4 +// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr addrspace(5) [[LP1]], align 8 +// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0 +// CL20-NEXT: [[ARRAYDECAY_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr +// CL20-NEXT: store ptr [[ARRAYDECAY_ASCAST]], ptr addrspace(5) [[LP2]], align 8 +// CL20-NEXT: [[LV1_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr +// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST1]]) #[[ATTR2:[0-9]+]] +// CL20-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4 +// CL20-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4 // CL20-NEXT: ret void // void func2(void) { @@ -102,8 +99,7 @@ void func2(void) { // CL20-SAME: ) #[[ATTR0]] { // CL20-NEXT: [[ENTRY:.*:]] // CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5) -// CL20-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr -// CL20-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false) +// CL20-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false) // CL20-NEXT: ret void // void func3(void) { @@ -126,11 +122,9 @@ void func3(void) { // CL20-NEXT: [[ENTRY:.*:]] // CL20-NEXT: [[VAR:%.*]] = alloca i64, align 8, addrspace(5) // CL20-NEXT: [[ALLOCA_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// CL20-NEXT: [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] to ptr -// CL20-NEXT: [[ALLOCA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA_ADDR]] to ptr -// CL20-NEXT: store i64 5, ptr [[VAR_ASCAST]], align 8 -// CL20-NEXT: store ptr [[VAR_ASCAST]], ptr [[ALLOCA_ADDR_ASCAST]], align 4 -// CL20-NEXT: [[TMP0:%.*]] = load ptr addrspace(5), ptr [[ALLOCA_ADDR_ASCAST]], align 4 +// CL20-NEXT: store i64 5, ptr addrspace(5) [[VAR]], align 8 +// CL20-NEXT: store ptr addrspace(5) [[VAR]], ptr addrspace(5) [[ALLOCA_ADDR]], align 4 +// CL20-NEXT: [[TMP0:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOCA_ADDR]], align 4 // CL20-NEXT: store i64 8, ptr addrspace(5) [[TMP0]], align 8 // CL20-NEXT: ret void // @@ -159,11 +153,10 @@ void wrong_store_type_private_pointer_alloca() { // CL20-NEXT: [[ENTRY:.*:]] // CL20-NEXT: [[VAR:%.*]] = alloca i64, align 8, addrspace(5) // CL20-NEXT: [[ALLOCA_ADDR_AS_GENERIC:%.*]] = alloca ptr, align 8, addrspace(5) +// CL20-NEXT: store i64 5, ptr addrspace(5) [[VAR]], align 8 // CL20-NEXT: [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] to ptr -// CL20-NEXT: [[ALLOCA_ADDR_AS_GENERIC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]] to ptr -// CL20-NEXT: store i64 5, ptr [[VAR_ASCAST]], align 8 -// CL20-NEXT: store ptr [[VAR_ASCAST]], ptr [[ALLOCA_ADDR_AS_GENERIC_ASCAST]], align 8 -// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ALLOCA_ADDR_AS_GENERIC_ASCAST]], align 8 +// CL20-NEXT: store ptr [[VAR_ASCAST]], ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]], align 8 +// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]], align 8 // CL20-NEXT: store i64 9, ptr [[TMP0]], align 8 // CL20-NEXT: ret void // diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl index 7d0a66bac1469..a1a114ef129a1 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl @@ -272,10 +272,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { // AMDGCN-SAME: ) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // // diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index a0e11a1b5997e..bbb55b7e14941 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -136,9 +136,6 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr // NOCPU-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr // NOCPU-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr -// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr -// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr -// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr // NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr // NOCPU-NEXT: [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr // NOCPU-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr @@ -146,17 +143,16 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr // NOCPU-NEXT: [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr // NOCPU-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr -// NOCPU-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr // NOCPU-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr // NOCPU-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr // NOCPU-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8 // NOCPU-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1 // NOCPU-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8 // NOCPU-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8 -// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4 -// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 -// NOCPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) +// NOCPU-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4 +// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 +// NOCPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) // NOCPU-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0 // NOCPU-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8 // NOCPU-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1 @@ -170,9 +166,9 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1 // NOCPU-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8 // NOCPU-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr [[BLOCK_ASCAST]]) -// NOCPU-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 -// NOCPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) +// NOCPU-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 +// NOCPU-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP2_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) // NOCPU-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0 // NOCPU-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8 // NOCPU-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1 @@ -192,9 +188,9 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 // NOCPU-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8 // NOCPU-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr addrspace(5) [[VARTMP2]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr [[BLOCK3_ASCAST]]) -// NOCPU-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 -// NOCPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) +// NOCPU-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 +// NOCPU-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP11_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) // NOCPU-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0 // NOCPU-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8 // NOCPU-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1 @@ -228,11 +224,11 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4 // NOCPU-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8 // NOCPU-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8 -// NOCPU-NEXT: store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8 -// NOCPU-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 -// NOCPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) -// NOCPU-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8 +// NOCPU-NEXT: store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8 +// NOCPU-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 +// NOCPU-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP27_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) +// NOCPU-NEXT: [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8 // NOCPU-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr [[BLOCK21_ASCAST]]) // NOCPU-NEXT: ret void // @@ -259,16 +255,13 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) // NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) // NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr -// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr -// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr // NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr // NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8 -// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4 +// NOCPU-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4 // NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() -// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 -// NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) +// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 +// NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) // NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) // NOCPU-NEXT: ret void // @@ -517,9 +510,6 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr // GFX900-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr // GFX900-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr -// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr -// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr -// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr // GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr // GFX900-NEXT: [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr // GFX900-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr @@ -527,7 +517,6 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr // GFX900-NEXT: [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr // GFX900-NEXT: [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr -// GFX900-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr // GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr // GFX900-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr // GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] @@ -536,11 +525,11 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9:[0-9]+]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] -// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17:![0-9]+]] +// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17:![0-9]+]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] -// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19:![0-9]+]] -// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] -// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]] +// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19:![0-9]+]] +// GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]] // GFX900-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i32 25, ptr [[BLOCK_SIZE]], align 8 // GFX900-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1 @@ -554,9 +543,9 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] // GFX900-NEXT: store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA16]] // GFX900-NEXT: [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr [[BLOCK_ASCAST]]) -// GFX900-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]] -// GFX900-NEXT: [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] -// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] +// GFX900-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]] +// GFX900-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP2_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] // GFX900-NEXT: [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE4]], align 8 // GFX900-NEXT: [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1 @@ -576,9 +565,9 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr addrspace(5) [[VARTMP2]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr [[BLOCK3_ASCAST]]) -// GFX900-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]] -// GFX900-NEXT: [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] -// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] +// GFX900-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]] +// GFX900-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP11_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] // GFX900-NEXT: [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i32 41, ptr [[BLOCK_SIZE13]], align 8 // GFX900-NEXT: [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1 @@ -615,11 +604,11 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4 // GFX900-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] // GFX900-NEXT: store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]] -// GFX900-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]] -// GFX900-NEXT: [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] -// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] -// GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]] +// GFX900-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP27_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] +// GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA16]] // GFX900-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr [[BLOCK21_ASCAST]]) // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]] // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] @@ -650,19 +639,16 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) // GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) // GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr -// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr -// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr // GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr // GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] -// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] // GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]] -// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] -// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]] +// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] // GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] diff --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl index a0c106bca83c9..d0bcd1fccb7ce 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl @@ -139,12 +139,12 @@ void test_static_var_local(void) { // Test function-scope variable initialization. // NOOPT-LABEL: @test_func_scope_var_private( -// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %sp1{{.*}}, align 4 -// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %sp2{{.*}}, align 4 -// NOOPT: store ptr addrspace(5) null, ptr %sp3{{.*}}, align 4 -// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %sp4{{.*}}, align 4 -// NOOPT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_private.SS1, i64 32, i1 false) -// NOOPT: call void @llvm.memset.p0.i64(ptr align 8 %SS2{{.*}}, i8 0, i64 24, i1 false) +// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp1{{.*}}, align 4 +// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp2{{.*}}, align 4 +// NOOPT: store ptr addrspace(5) null, ptr addrspace(5) %sp3{{.*}}, align 4 +// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp4{{.*}}, align 4 +// NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_private.SS1, i64 32, i1 false) +// NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2{{.*}}, i8 0, i64 24, i1 false) void test_func_scope_var_private(void) { private char *sp1 = 0; private char *sp2 = NULL; @@ -157,12 +157,12 @@ void test_func_scope_var_private(void) { // Test function-scope variable initialization. // NOOPT-LABEL: @test_func_scope_var_local( -// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %sp1{{.*}}, align 4 -// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %sp2{{.*}}, align 4 -// NOOPT: store ptr addrspace(3) null, ptr %sp3{{.*}}, align 4 -// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %sp4{{.*}}, align 4 -// NOOPT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_local.SS1, i64 32, i1 false) -// NOOPT: call void @llvm.memset.p0.i64(ptr align 8 %SS2{{.*}}, i8 0, i64 24, i1 false) +// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp1{{.*}}, align 4 +// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp2{{.*}}, align 4 +// NOOPT: store ptr addrspace(3) null, ptr addrspace(5) %sp3{{.*}}, align 4 +// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp4{{.*}}, align 4 +// NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_local.SS1, i64 32, i1 false) +// NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2{{.*}}, i8 0, i64 24, i1 false) void test_func_scope_var_local(void) { local char *sp1 = 0; local char *sp2 = NULL; @@ -603,7 +603,7 @@ int test_and_ptr(private char* p1, local char* p2) { // Test folding of null pointer in function scope. // NOOPT-LABEL: test_fold_private // NOOPT: call void @test_fold_callee -// NOOPT: store ptr addrspace(1) null, ptr %glob{{.*}}, align 8 +// NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob{{.*}}, align 8 // NOOPT: %{{.*}} = sub i64 %{{.*}}, 0 // NOOPT: call void @test_fold_callee // NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)) to i32) to i64 @@ -619,7 +619,7 @@ void test_fold_private(void) { // NOOPT-LABEL: test_fold_local // NOOPT: call void @test_fold_callee -// NOOPT: store ptr addrspace(1) null, ptr %glob{{.*}}, align 8 +// NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob{{.*}}, align 8 // NOOPT: %{{.*}} = sub i64 %{{.*}}, 0 // NOOPT: call void @test_fold_callee // NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) to i32) to i64 diff --git a/clang/test/CodeGenOpenCL/blocks.cl b/clang/test/CodeGenOpenCL/blocks.cl index 161f1406c96cb..20212cf213ffe 100644 --- a/clang/test/CodeGenOpenCL/blocks.cl +++ b/clang/test/CodeGenOpenCL/blocks.cl @@ -44,10 +44,10 @@ void foo(){ // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr %[[block:.*]], i32 0, i32 2 // AMDGCN: store ptr @__foo_block_invoke, ptr %[[block_invoke]] // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr %[[block]], i32 0, i32 3 - // AMDGCN: %[[i_value:.*]] = load i32, ptr %i + // AMDGCN: %[[i_value:.*]] = load i32, ptr addrspace(5) %i // AMDGCN: store i32 %[[i_value]], ptr %[[block_captured]], - // AMDGCN: store ptr %[[block]], ptr %[[block_B:.*]], - // AMDGCN: %[[block_literal:.*]] = load ptr, ptr %[[block_B]] + // AMDGCN: store ptr %[[block]], ptr addrspace(5) %[[block_B:.*]], + // AMDGCN: %[[block_literal:.*]] = load ptr, ptr addrspace(5) %[[block_B]] // AMDGCN: call {{.*}}i32 @__foo_block_invoke(ptr noundef %[[block_literal]]) int (^ block_B)(void) = ^{ diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl index 85b449e45a0f1..ce7da3aba9e45 100644 --- a/clang/test/CodeGenOpenCL/builtins-alloca.cl +++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl @@ -39,13 +39,12 @@ // OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL20-NEXT: [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr // OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL20-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 // OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4 +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 // OPENCL20-NEXT: ret void // // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca( @@ -67,13 +66,12 @@ // OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL30GAS-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL30GAS-NEXT: [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr // OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL30GAS-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 // OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) -// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 // OPENCL30GAS-NEXT: ret void // void test1_builtin_alloca(unsigned n) { @@ -111,13 +109,12 @@ void test1_builtin_alloca(unsigned n) { // OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr // OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL20-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 // OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4 +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 // OPENCL20-NEXT: ret void // // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_uninitialized( @@ -139,13 +136,12 @@ void test1_builtin_alloca(unsigned n) { // OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL30GAS-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL30GAS-NEXT: [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr // OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL30GAS-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 // OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) -// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4 +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 // OPENCL30GAS-NEXT: ret void // void test1_builtin_alloca_uninitialized(unsigned n) { @@ -183,13 +179,12 @@ void test1_builtin_alloca_uninitialized(unsigned n) { // OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL20-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr // OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL20-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 // OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4 +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 // OPENCL20-NEXT: ret void // // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_with_align( @@ -211,13 +206,12 @@ void test1_builtin_alloca_uninitialized(unsigned n) { // OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr // OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL30GAS-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 // OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) -// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4 +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 // OPENCL30GAS-NEXT: ret void // void test1_builtin_alloca_with_align(unsigned n) { @@ -255,13 +249,12 @@ void test1_builtin_alloca_with_align(unsigned n) { // OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr // OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL20-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 // OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4 +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 // OPENCL20-NEXT: ret void // // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_with_align_uninitialized( @@ -283,13 +276,12 @@ void test1_builtin_alloca_with_align(unsigned n) { // OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr // OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL30GAS-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 // OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) -// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4 +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 // OPENCL30GAS-NEXT: ret void // void test1_builtin_alloca_with_align_uninitialized(unsigned n) { @@ -325,12 +317,11 @@ void test1_builtin_alloca_with_align_uninitialized(unsigned n) { // OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL20-NEXT: [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr // OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4 +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 // OPENCL20-NEXT: ret void // // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca( @@ -351,12 +342,11 @@ void test1_builtin_alloca_with_align_uninitialized(unsigned n) { // OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL30GAS-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL30GAS-NEXT: [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr // OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) -// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4 +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 // OPENCL30GAS-NEXT: ret void // void test2_builtin_alloca(unsigned n) { @@ -392,12 +382,11 @@ void test2_builtin_alloca(unsigned n) { // OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr // OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4 +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 // OPENCL20-NEXT: ret void // // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_uninitialized( @@ -418,12 +407,11 @@ void test2_builtin_alloca(unsigned n) { // OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL30GAS-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL30GAS-NEXT: [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr // OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) -// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4 +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 // OPENCL30GAS-NEXT: ret void // void test2_builtin_alloca_uninitialized(unsigned n) { @@ -459,12 +447,11 @@ void test2_builtin_alloca_uninitialized(unsigned n) { // OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL20-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr // OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4 +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 // OPENCL20-NEXT: ret void // // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_with_align( @@ -485,12 +472,11 @@ void test2_builtin_alloca_uninitialized(unsigned n) { // OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr // OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) -// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4 +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 // OPENCL30GAS-NEXT: ret void // void test2_builtin_alloca_with_align(unsigned n) { @@ -526,12 +512,11 @@ void test2_builtin_alloca_with_align(unsigned n) { // OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL20-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr // OPENCL20-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4 +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 // OPENCL20-NEXT: ret void // // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_with_align_uninitialized( @@ -552,12 +537,11 @@ void test2_builtin_alloca_with_align(unsigned n) { // OPENCL30GAS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // OPENCL30GAS-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// OPENCL30GAS-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr // OPENCL30GAS-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // OPENCL30GAS-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 // OPENCL30GAS-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) -// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4 +// OPENCL30GAS-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 // OPENCL30GAS-NEXT: ret void // void test2_builtin_alloca_with_align_uninitialized(unsigned n) { diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl index 2dba7fb719376..f7641280715c8 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl @@ -146,12 +146,11 @@ void test_s_barrier_signal_isfirst(int* a, int* b, int *c) // CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr -// CHECK-NEXT: [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.s.get.barrier.state(i32 [[TMP0]]) -// CHECK-NEXT: store i32 [[TMP1]], ptr [[STATE_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[STATE]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[STATE]], align 4 // CHECK-NEXT: ret i32 [[TMP2]] // unsigned test_s_get_barrier_state(int a) @@ -167,13 +166,12 @@ unsigned test_s_get_barrier_state(int a) // CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr -// CHECK-NEXT: [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr // CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3) // CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) [[TMP1]]) -// CHECK-NEXT: store i32 [[TMP2]], ptr [[STATE_ASCAST]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[STATE]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[STATE]], align 4 // CHECK-NEXT: ret i32 [[TMP3]] // unsigned test_s_get_named_barrier_state(void *bar) diff --git a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl index 86ca9ae509073..8845ffe499fde 100644 --- a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl +++ b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl @@ -32,11 +32,10 @@ __kernel void use_of_local_var() // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr // CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR5:[0-9]+]] -// CHECK-NEXT: store i32 0, ptr [[X_ASCAST]], align 4, !tbaa [[TBAA4:![0-9]+]] -// CHECK-NEXT: [[X_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[X_ASCAST]] to ptr addrspace(5) -// CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[X_ASCAST_ASCAST]]) #[[ATTR6:[0-9]+]] +// CHECK-NEXT: store i32 0, ptr addrspace(5) [[X]], align 4, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[X]]) #[[ATTR6:[0-9]+]] +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr // CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ASCAST]]) #[[ATTR6]] // CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR5]] // CHECK-NEXT: ret void diff --git a/clang/test/Index/pipe-size.cl b/clang/test/Index/pipe-size.cl index a48857baef1a6..f15bbefb68e7f 100644 --- a/clang/test/Index/pipe-size.cl +++ b/clang/test/Index/pipe-size.cl @@ -12,5 +12,5 @@ __kernel void testPipe( pipe int test ) // SPIR64: store target("spirv.Pipe", 0) %test, ptr %test.addr, align 8 // SPIR64: store i32 8, ptr %s, align 4 // AMDGCN: store ptr addrspace(1) %test, ptr %test{{.*}}, align 8 - // AMDGCN: store i32 8, ptr %s{{.*}}, align 4 + // AMDGCN: store i32 8, ptr addrspace(5) %s{{.*}}, align 4 } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits