Author: Matt Arsenault
Date: 2025-05-08T07:51:57+02:00
New Revision: a11d86461e7d7d9bce3d04a39ded1cad394239ca

URL: 
https://github.com/llvm/llvm-project/commit/a11d86461e7d7d9bce3d04a39ded1cad394239ca
DIFF: 
https://github.com/llvm/llvm-project/commit/a11d86461e7d7d9bce3d04a39ded1cad394239ca.diff

LOG: clang: Fix broken implicit cast to generic address space (#138863)

This fixes emitting undefined behavior where a 64-bit generic
pointer is written to a 32-bit slot allocated for a private pointer.
This can be seen in test/CodeGenOpenCL/amdgcn-automatic-variable.cl's
wrong_pointer_alloca.

Added: 
    

Modified: 
    clang/lib/CodeGen/CGDecl.cpp
    clang/lib/CodeGen/CGExpr.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
    clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
    clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
    clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
    clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
    clang/test/CodeGenOpenCL/blocks.cl
    clang/test/CodeGenOpenCL/builtins-alloca.cl
    clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
    clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
    clang/test/Index/pipe-size.cl

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index db34e2738b4cf..1e54e55c5abbb 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1588,7 +1588,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
       // Create the alloca.  Note that we set the name separately from
       // building the instruction so that it's there even in no-asserts
       // builds.
-      address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName(),
+      address = CreateTempAlloca(allocaTy, Ty.getAddressSpace(),
+                                 allocaAlignment, D.getName(),
                                  /*ArraySize=*/nullptr, &AllocaAddr);
 
       // Don't emit lifetime markers for MSVC catch parameters. The lifetime of

diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 6f5ead78f2b23..1a835c97decef 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -100,13 +100,11 @@ CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type 
*Ty, CharUnits Align,
   return RawAddress(Alloca, Ty, Align, KnownNonNull);
 }
 
-/// CreateTempAlloca - This creates a alloca and inserts it into the entry
-/// block. The alloca is casted to default address space if necessary.
-RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
-                                             const Twine &Name,
+RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
+                                             CharUnits Align, const Twine 
&Name,
                                              llvm::Value *ArraySize,
                                              RawAddress *AllocaAddr) {
-  auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
+  RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
   if (AllocaAddr)
     *AllocaAddr = Alloca;
   llvm::Value *V = Alloca.getPointer();
@@ -114,8 +112,9 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type 
*Ty, CharUnits Align,
   // be 
diff erent from the type defined by the language. For example,
   // in C++ the auto variables are in the default address space. Therefore
   // cast alloca to the default address space when necessary.
-  if (getASTAllocaAddressSpace() != LangAS::Default) {
-    auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
+
+  unsigned DestAddrSpace = getContext().getTargetAddressSpace(DestLangAS);
+  if (DestAddrSpace != Alloca.getAddressSpace()) {
     llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
     // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt,
     // otherwise alloca is inserted at the current insertion point of the
@@ -123,8 +122,8 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type 
*Ty, CharUnits Align,
     if (!ArraySize)
       Builder.SetInsertPoint(getPostAllocaInsertPoint());
     V = getTargetHooks().performAddrSpaceCast(
-        *this, V, getASTAllocaAddressSpace(), LangAS::Default,
-        Builder.getPtrTy(DestAddrSpace), /*non-null*/ true);
+        *this, V, getASTAllocaAddressSpace(), DestLangAS,
+        Builder.getPtrTy(DestAddrSpace), /*IsNonNull=*/true);
   }
 
   return RawAddress(V, Ty, Align, KnownNonNull);

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 561f8f6a2a2fb..c0bc3825f0188 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2861,10 +2861,28 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// more efficient if the caller knows that the address will not be exposed.
   llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp",
                                      llvm::Value *ArraySize = nullptr);
+
+  /// CreateTempAlloca - This creates a alloca and inserts it into the entry
+  /// block. The alloca is casted to the address space of \p UseAddrSpace if
+  /// necessary.
+  RawAddress CreateTempAlloca(llvm::Type *Ty, LangAS UseAddrSpace,
+                              CharUnits align, const Twine &Name = "tmp",
+                              llvm::Value *ArraySize = nullptr,
+                              RawAddress *Alloca = nullptr);
+
+  /// CreateTempAlloca - This creates a alloca and inserts it into the entry
+  /// block. The alloca is casted to default address space if necessary.
+  ///
+  /// FIXME: This version should be removed, and context should provide the
+  /// context use address space used instead of default.
   RawAddress CreateTempAlloca(llvm::Type *Ty, CharUnits align,
                               const Twine &Name = "tmp",
                               llvm::Value *ArraySize = nullptr,
-                              RawAddress *Alloca = nullptr);
+                              RawAddress *Alloca = nullptr) {
+    return CreateTempAlloca(Ty, LangAS::Default, align, Name, ArraySize,
+                            Alloca);
+  }
+
   RawAddress CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align,
                                          const Twine &Name = "tmp",
                                          llvm::Value *ArraySize = nullptr);

diff  --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl 
b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 49604c6c5e61b..a70e9af75fa38 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -798,10 +798,7 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // AMDGCN20-SAME: ) #[[ATTR0]] {
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
 // AMDGCN20-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], 
align 8, addrspace(5)
-// AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca 
[[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN20-NEXT:    [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[P_S]] to ptr
-// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 
[[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
-// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef 
byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
+// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef 
byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]]
 // AMDGCN20-NEXT:    ret void
 //
 //

diff  --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl 
b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
index e28120adc0364..af50928d8ecf0 100644
--- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
+++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
@@ -55,22 +55,19 @@ void func1(int *x) {
 // CL20-NEXT:    [[LP1:%.*]] = alloca ptr, align 8, addrspace(5)
 // CL20-NEXT:    [[LP2:%.*]] = alloca ptr, align 8, addrspace(5)
 // CL20-NEXT:    [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
+// CL20-NEXT:    store i32 1, ptr addrspace(5) [[LV1]], align 4
+// CL20-NEXT:    store i32 2, ptr addrspace(5) [[LV2]], align 4
+// CL20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr 
addrspace(5) [[LA]], i64 0, i64 0
+// CL20-NEXT:    store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4
 // CL20-NEXT:    [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] 
to ptr
-// CL20-NEXT:    [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] 
to ptr
-// CL20-NEXT:    [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to 
ptr
-// CL20-NEXT:    [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] 
to ptr
-// CL20-NEXT:    [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] 
to ptr
-// CL20-NEXT:    [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] 
to ptr
-// CL20-NEXT:    store i32 1, ptr [[LV1_ASCAST]], align 4
-// CL20-NEXT:    store i32 2, ptr [[LV2_ASCAST]], align 4
-// CL20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr 
[[LA_ASCAST]], i64 0, i64 0
-// CL20-NEXT:    store i32 3, ptr [[ARRAYIDX]], align 4
-// CL20-NEXT:    store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8
-// CL20-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr 
[[LA_ASCAST]], i64 0, i64 0
-// CL20-NEXT:    store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8
-// CL20-NEXT:    call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]]
-// CL20-NEXT:    store i32 4, ptr [[LVC_ASCAST]], align 4
-// CL20-NEXT:    store i32 4, ptr [[LV1_ASCAST]], align 4
+// CL20-NEXT:    store ptr [[LV1_ASCAST]], ptr addrspace(5) [[LP1]], align 8
+// CL20-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr 
addrspace(5) [[LA]], i64 0, i64 0
+// CL20-NEXT:    [[ARRAYDECAY_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[ARRAYDECAY]] to ptr
+// CL20-NEXT:    store ptr [[ARRAYDECAY_ASCAST]], ptr addrspace(5) [[LP2]], 
align 8
+// CL20-NEXT:    [[LV1_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] 
to ptr
+// CL20-NEXT:    call void @func1(ptr noundef [[LV1_ASCAST1]]) 
#[[ATTR2:[0-9]+]]
+// CL20-NEXT:    store i32 4, ptr addrspace(5) [[LVC]], align 4
+// CL20-NEXT:    store i32 4, ptr addrspace(5) [[LV1]], align 4
 // CL20-NEXT:    ret void
 //
 void func2(void) {
@@ -102,8 +99,7 @@ void func2(void) {
 // CL20-SAME: ) #[[ATTR0]] {
 // CL20-NEXT:  [[ENTRY:.*:]]
 // CL20-NEXT:    [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
-// CL20-NEXT:    [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
-// CL20-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, 
i64 64, i1 false)
+// CL20-NEXT:    call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], 
i8 0, i64 64, i1 false)
 // CL20-NEXT:    ret void
 //
 void func3(void) {
@@ -126,11 +122,9 @@ void func3(void) {
 // CL20-NEXT:  [[ENTRY:.*:]]
 // CL20-NEXT:    [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
 // CL20-NEXT:    [[ALLOCA_ADDR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
-// CL20-NEXT:    [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] 
to ptr
-// CL20-NEXT:    [[ALLOCA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[ALLOCA_ADDR]] to ptr
-// CL20-NEXT:    store i64 5, ptr [[VAR_ASCAST]], align 8
-// CL20-NEXT:    store ptr [[VAR_ASCAST]], ptr [[ALLOCA_ADDR_ASCAST]], align 4
-// CL20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(5), ptr 
[[ALLOCA_ADDR_ASCAST]], align 4
+// CL20-NEXT:    store i64 5, ptr addrspace(5) [[VAR]], align 8
+// CL20-NEXT:    store ptr addrspace(5) [[VAR]], ptr addrspace(5) 
[[ALLOCA_ADDR]], align 4
+// CL20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(5), ptr addrspace(5) 
[[ALLOCA_ADDR]], align 4
 // CL20-NEXT:    store i64 8, ptr addrspace(5) [[TMP0]], align 8
 // CL20-NEXT:    ret void
 //
@@ -159,11 +153,10 @@ void wrong_store_type_private_pointer_alloca() {
 // CL20-NEXT:  [[ENTRY:.*:]]
 // CL20-NEXT:    [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
 // CL20-NEXT:    [[ALLOCA_ADDR_AS_GENERIC:%.*]] = alloca ptr, align 8, 
addrspace(5)
+// CL20-NEXT:    store i64 5, ptr addrspace(5) [[VAR]], align 8
 // CL20-NEXT:    [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] 
to ptr
-// CL20-NEXT:    [[ALLOCA_ADDR_AS_GENERIC_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]] to ptr
-// CL20-NEXT:    store i64 5, ptr [[VAR_ASCAST]], align 8
-// CL20-NEXT:    store ptr [[VAR_ASCAST]], ptr 
[[ALLOCA_ADDR_AS_GENERIC_ASCAST]], align 8
-// CL20-NEXT:    [[TMP0:%.*]] = load ptr, ptr 
[[ALLOCA_ADDR_AS_GENERIC_ASCAST]], align 8
+// CL20-NEXT:    store ptr [[VAR_ASCAST]], ptr addrspace(5) 
[[ALLOCA_ADDR_AS_GENERIC]], align 8
+// CL20-NEXT:    [[TMP0:%.*]] = load ptr, ptr addrspace(5) 
[[ALLOCA_ADDR_AS_GENERIC]], align 8
 // CL20-NEXT:    store i64 9, ptr [[TMP0]], align 8
 // CL20-NEXT:    ret void
 //

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl 
b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
index 7d0a66bac1469..a1a114ef129a1 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
@@ -272,10 +272,7 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // AMDGCN-SAME: ) #[[ATTR0]] {
 // AMDGCN-NEXT:  [[ENTRY:.*:]]
 // AMDGCN-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], 
align 8, addrspace(5)
-// AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca 
[[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT:    [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] 
to ptr
-// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 
[[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
-// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef 
byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
+// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef 
byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]]
 // AMDGCN-NEXT:    ret void
 //
 //

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl 
b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index a0e11a1b5997e..bbb55b7e14941 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -136,9 +136,6 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[B_ADDR]] to ptr
 // NOCPU-NEXT:    [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[C_ADDR]] to ptr
 // NOCPU-NEXT:    [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[D_ADDR]] to ptr
-// NOCPU-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[DEFAULT_QUEUE]] to ptr
-// NOCPU-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[FLAGS]] to ptr
-// NOCPU-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[NDRANGE]] to ptr
 // NOCPU-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] 
to ptr
 // NOCPU-NEXT:    [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BLOCK]] to ptr
 // NOCPU-NEXT:    [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[VARTMP2]] to ptr
@@ -146,17 +143,16 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[VARTMP11]] to ptr
 // NOCPU-NEXT:    [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BLOCK12]] to ptr
 // NOCPU-NEXT:    [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BLOCK_SIZES]] to ptr
-// NOCPU-NEXT:    [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BLOCK20]] to ptr
 // NOCPU-NEXT:    [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BLOCK21]] to ptr
 // NOCPU-NEXT:    [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[VARTMP27]] to ptr
 // NOCPU-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
 // NOCPU-NEXT:    store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1
 // NOCPU-NEXT:    store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
 // NOCPU-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8
-// NOCPU-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr 
[[DEFAULT_QUEUE_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], 
ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
+// NOCPU-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[DEFAULT_QUEUE]], align 8
+// NOCPU-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], 
ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
 // NOCPU-NEXT:    [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, 
ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
 // NOCPU-NEXT:    store i32 25, ptr [[BLOCK_SIZE]], align 8
 // NOCPU-NEXT:    [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1
@@ -170,9 +166,9 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
 // NOCPU-NEXT:    store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8
 // NOCPU-NEXT:    [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr 
addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) [[TMP]], ptr 
addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to 
ptr), ptr [[BLOCK_ASCAST]])
-// NOCPU-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr 
[[DEFAULT_QUEUE_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 
[[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
+// NOCPU-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[DEFAULT_QUEUE]], align 8
+// NOCPU-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 
[[TMP2_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
 // NOCPU-NEXT:    [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr 
[[BLOCK3_ASCAST]], i32 0, i32 0
 // NOCPU-NEXT:    store i32 41, ptr [[BLOCK_SIZE4]], align 8
 // NOCPU-NEXT:    [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr 
[[BLOCK3_ASCAST]], i32 0, i32 1
@@ -192,9 +188,9 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
 // NOCPU-NEXT:    store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8
 // NOCPU-NEXT:    [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr 
addrspace(1) [[TMP5]], i32 [[TMP6]], ptr addrspace(5) [[VARTMP2]], ptr 
addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to 
ptr), ptr [[BLOCK3_ASCAST]])
-// NOCPU-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr 
[[DEFAULT_QUEUE_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 
[[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
+// NOCPU-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[DEFAULT_QUEUE]], align 8
+// NOCPU-NEXT:    [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 
[[TMP11_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
 // NOCPU-NEXT:    [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr 
[[BLOCK12_ASCAST]], i32 0, i32 0
 // NOCPU-NEXT:    store i32 41, ptr [[BLOCK_SIZE13]], align 8
 // NOCPU-NEXT:    [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr 
[[BLOCK12_ASCAST]], i32 0, i32 1
@@ -228,11 +224,11 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ 
i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
 // NOCPU-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr 
[[C_ADDR_ASCAST]], align 8
 // NOCPU-NEXT:    store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], 
align 8
-// NOCPU-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr 
[[DEFAULT_QUEUE_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 
[[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
-// NOCPU-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8
+// NOCPU-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], 
align 8
+// NOCPU-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[DEFAULT_QUEUE]], align 8
+// NOCPU-NEXT:    [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 
[[TMP27_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
+// NOCPU-NEXT:    [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], 
align 8
 // NOCPU-NEXT:    [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr 
addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr 
addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to 
ptr), ptr [[BLOCK21_ASCAST]])
 // NOCPU-NEXT:    ret void
 //
@@ -259,16 +255,13 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, 
addrspace(5)
 // NOCPU-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, 
addrspace(5)
 // NOCPU-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[I_ADDR]] to ptr
-// NOCPU-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[DEFAULT_QUEUE]] to ptr
-// NOCPU-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[FLAGS]] to ptr
-// NOCPU-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[NDRANGE]] to ptr
 // NOCPU-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] 
to ptr
 // NOCPU-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8
-// NOCPU-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4
+// NOCPU-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4
 // NOCPU-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
-// NOCPU-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr 
[[DEFAULT_QUEUE_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], 
ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
+// NOCPU-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[DEFAULT_QUEUE]], align 8
+// NOCPU-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], 
ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
 // NOCPU-NEXT:    [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr 
addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr 
addrspacecast (ptr addrspace(1) 
@__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr 
addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
 // NOCPU-NEXT:    ret void
 //
@@ -517,9 +510,6 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[B_ADDR]] to ptr
 // GFX900-NEXT:    [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[C_ADDR]] to ptr
 // GFX900-NEXT:    [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[D_ADDR]] to ptr
-// GFX900-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[DEFAULT_QUEUE]] to ptr
-// GFX900-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[FLAGS]] to ptr
-// GFX900-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[NDRANGE]] to ptr
 // GFX900-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] 
to ptr
 // GFX900-NEXT:    [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BLOCK]] to ptr
 // GFX900-NEXT:    [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[VARTMP2]] to ptr
@@ -527,7 +517,6 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[VARTMP11]] to ptr
 // GFX900-NEXT:    [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BLOCK12]] to ptr
 // GFX900-NEXT:    [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BLOCK_SIZES]] to ptr
-// GFX900-NEXT:    [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BLOCK20]] to ptr
 // GFX900-NEXT:    [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BLOCK21]] to ptr
 // GFX900-NEXT:    [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[VARTMP27]] to ptr
 // GFX900-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 
8, !tbaa [[TBAA14]]
@@ -536,11 +525,11 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa 
[[TBAA3]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) 
[[DEFAULT_QUEUE]]) #[[ATTR9:[0-9]+]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) 
[[FLAGS]]) #[[ATTR9]]
-// GFX900-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa 
[[TBAA17:![0-9]+]]
+// GFX900-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa 
[[TBAA17:![0-9]+]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) 
[[NDRANGE]]) #[[ATTR9]]
-// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr 
[[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19:![0-9]+]]
-// GFX900-NEXT:    [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, 
!tbaa [[TBAA17]]
-// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 
[[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct 
[[TBAA_STRUCT21:![0-9]+]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19:![0-9]+]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 
4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 
[[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), 
!tbaa.struct [[TBAA_STRUCT21:![0-9]+]]
 // GFX900-NEXT:    [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
 // GFX900-NEXT:    store i32 25, ptr [[BLOCK_SIZE]], align 8
 // GFX900-NEXT:    [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1
@@ -554,9 +543,9 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, 
!tbaa [[TBAA16]]
 // GFX900-NEXT:    store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa 
[[TBAA16]]
 // GFX900-NEXT:    [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr 
addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) [[TMP]], ptr 
addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to 
ptr), ptr [[BLOCK_ASCAST]])
-// GFX900-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr 
[[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, 
!tbaa [[TBAA17]]
-// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 
[[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct 
[[TBAA_STRUCT21]]
+// GFX900-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
+// GFX900-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 
4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 
[[TMP2_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), 
!tbaa.struct [[TBAA_STRUCT21]]
 // GFX900-NEXT:    [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr 
[[BLOCK3_ASCAST]], i32 0, i32 0
 // GFX900-NEXT:    store i32 41, ptr [[BLOCK_SIZE4]], align 8
 // GFX900-NEXT:    [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr 
[[BLOCK3_ASCAST]], i32 0, i32 1
@@ -576,9 +565,9 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, 
!tbaa [[TBAA3]]
 // GFX900-NEXT:    store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, 
!tbaa [[TBAA3]]
 // GFX900-NEXT:    [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr 
addrspace(1) [[TMP5]], i32 [[TMP6]], ptr addrspace(5) [[VARTMP2]], ptr 
addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to 
ptr), ptr [[BLOCK3_ASCAST]])
-// GFX900-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr 
[[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, 
!tbaa [[TBAA17]]
-// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 
[[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), 
!tbaa.struct [[TBAA_STRUCT21]]
+// GFX900-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
+// GFX900-NEXT:    [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 
4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 
[[TMP11_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), 
!tbaa.struct [[TBAA_STRUCT21]]
 // GFX900-NEXT:    [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr 
[[BLOCK12_ASCAST]], i32 0, i32 0
 // GFX900-NEXT:    store i32 41, ptr [[BLOCK_SIZE13]], align 8
 // GFX900-NEXT:    [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr 
[[BLOCK12_ASCAST]], i32 0, i32 1
@@ -615,11 +604,11 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ 
i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
 // GFX900-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr 
[[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
 // GFX900-NEXT:    store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], 
align 8, !tbaa [[TBAA7]]
-// GFX900-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 
8, !tbaa [[TBAA16]]
-// GFX900-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr 
[[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, 
!tbaa [[TBAA17]]
-// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 
[[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), 
!tbaa.struct [[TBAA_STRUCT21]]
-// GFX900-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8, 
!tbaa [[TBAA16]]
+// GFX900-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], 
align 8, !tbaa [[TBAA16]]
+// GFX900-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
+// GFX900-NEXT:    [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 
4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 
[[TMP27_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), 
!tbaa.struct [[TBAA_STRUCT21]]
+// GFX900-NEXT:    [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], 
align 8, !tbaa [[TBAA16]]
 // GFX900-NEXT:    [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr 
addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr 
addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to 
ptr), ptr [[BLOCK21_ASCAST]])
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) 
[[BLOCK20]]) #[[ATTR9]]
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) 
[[NDRANGE]]) #[[ATTR9]]
@@ -650,19 +639,16 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, 
addrspace(5)
 // GFX900-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, 
addrspace(5)
 // GFX900-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[I_ADDR]] to ptr
-// GFX900-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[DEFAULT_QUEUE]] to ptr
-// GFX900-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[FLAGS]] to ptr
-// GFX900-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[NDRANGE]] to ptr
 // GFX900-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] 
to ptr
 // GFX900-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 
8, !tbaa [[TBAA26]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) 
[[DEFAULT_QUEUE]]) #[[ATTR9]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) 
[[FLAGS]]) #[[ATTR9]]
-// GFX900-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa 
[[TBAA17]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) 
[[NDRANGE]]) #[[ATTR9]]
 // GFX900-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
-// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr 
[[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, 
!tbaa [[TBAA17]]
-// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 
[[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct 
[[TBAA_STRUCT21]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) 
[[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
+// GFX900-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 
4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 
[[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), 
!tbaa.struct [[TBAA_STRUCT21]]
 // GFX900-NEXT:    [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr 
addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr 
addrspacecast (ptr addrspace(1) 
@__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr 
addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) 
[[NDRANGE]]) #[[ATTR9]]
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) 
[[FLAGS]]) #[[ATTR9]]

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl 
b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
index a0c106bca83c9..d0bcd1fccb7ce 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
@@ -139,12 +139,12 @@ void test_static_var_local(void) {
 
 // Test function-scope variable initialization.
 // NOOPT-LABEL: @test_func_scope_var_private(
-// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), 
ptr %sp1{{.*}}, align 4
-// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), 
ptr %sp2{{.*}}, align 4
-// NOOPT: store ptr addrspace(5) null, ptr %sp3{{.*}}, align 4
-// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), 
ptr %sp4{{.*}}, align 4
-// NOOPT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %SS1{{.*}}, ptr 
addrspace(4) align 8 @__const.test_func_scope_var_private.SS1, i64 32, i1 false)
-// NOOPT: call void @llvm.memset.p0.i64(ptr align 8 %SS2{{.*}}, i8 0, i64 24, 
i1 false)
+// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), 
ptr addrspace(5) %sp1{{.*}}, align 4
+// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), 
ptr addrspace(5) %sp2{{.*}}, align 4
+// NOOPT: store ptr addrspace(5) null, ptr addrspace(5) %sp3{{.*}}, align 4
+// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), 
ptr addrspace(5) %sp4{{.*}}, align 4
+// NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 
%SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_private.SS1, 
i64 32, i1 false)
+// NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2{{.*}}, 
i8 0, i64 24, i1 false)
 void test_func_scope_var_private(void) {
   private char *sp1 = 0;
   private char *sp2 = NULL;
@@ -157,12 +157,12 @@ void test_func_scope_var_private(void) {
 
 // Test function-scope variable initialization.
 // NOOPT-LABEL: @test_func_scope_var_local(
-// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), 
ptr %sp1{{.*}}, align 4
-// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), 
ptr %sp2{{.*}}, align 4
-// NOOPT: store ptr addrspace(3) null, ptr %sp3{{.*}}, align 4
-// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), 
ptr %sp4{{.*}}, align 4
-// NOOPT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %SS1{{.*}}, ptr 
addrspace(4) align 8 @__const.test_func_scope_var_local.SS1, i64 32, i1 false)
-// NOOPT: call void @llvm.memset.p0.i64(ptr align 8 %SS2{{.*}}, i8 0, i64 24, 
i1 false)
+// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), 
ptr addrspace(5) %sp1{{.*}}, align 4
+// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), 
ptr addrspace(5) %sp2{{.*}}, align 4
+// NOOPT: store ptr addrspace(3) null, ptr addrspace(5) %sp3{{.*}}, align 4
+// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), 
ptr addrspace(5) %sp4{{.*}}, align 4
+// NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 
%SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_local.SS1, 
i64 32, i1 false)
+// NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2{{.*}}, 
i8 0, i64 24, i1 false)
 void test_func_scope_var_local(void) {
   local char *sp1 = 0;
   local char *sp2 = NULL;
@@ -603,7 +603,7 @@ int test_and_ptr(private char* p1, local char* p2) {
 // Test folding of null pointer in function scope.
 // NOOPT-LABEL: test_fold_private
 // NOOPT: call void @test_fold_callee
-// NOOPT: store ptr addrspace(1) null, ptr %glob{{.*}}, align 8
+// NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob{{.*}}, align 8
 // NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
 // NOOPT: call void @test_fold_callee
 // NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(5) addrspacecast 
(ptr null to ptr addrspace(5)) to i32) to i64
@@ -619,7 +619,7 @@ void test_fold_private(void) {
 
 // NOOPT-LABEL: test_fold_local
 // NOOPT: call void @test_fold_callee
-// NOOPT: store ptr addrspace(1) null, ptr %glob{{.*}}, align 8
+// NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob{{.*}}, align 8
 // NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
 // NOOPT: call void @test_fold_callee
 // NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(3) addrspacecast 
(ptr null to ptr addrspace(3)) to i32) to i64

diff  --git a/clang/test/CodeGenOpenCL/blocks.cl 
b/clang/test/CodeGenOpenCL/blocks.cl
index 161f1406c96cb..20212cf213ffe 100644
--- a/clang/test/CodeGenOpenCL/blocks.cl
+++ b/clang/test/CodeGenOpenCL/blocks.cl
@@ -44,10 +44,10 @@ void foo(){
   // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds nuw <{ i32, i32, 
ptr, i32 }>, ptr %[[block:.*]], i32 0, i32 2
   // AMDGCN: store ptr @__foo_block_invoke, ptr %[[block_invoke]]
   // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds nuw <{ i32, i32, 
ptr, i32 }>, ptr %[[block]], i32 0, i32 3
-  // AMDGCN: %[[i_value:.*]] = load i32, ptr %i
+  // AMDGCN: %[[i_value:.*]] = load i32, ptr addrspace(5) %i
   // AMDGCN: store i32 %[[i_value]], ptr %[[block_captured]],
-  // AMDGCN: store ptr %[[block]], ptr %[[block_B:.*]],
-  // AMDGCN: %[[block_literal:.*]] = load ptr, ptr %[[block_B]]
+  // AMDGCN: store ptr %[[block]], ptr addrspace(5) %[[block_B:.*]],
+  // AMDGCN: %[[block_literal:.*]] = load ptr, ptr addrspace(5) %[[block_B]]
   // AMDGCN: call {{.*}}i32 @__foo_block_invoke(ptr noundef %[[block_literal]])
 
   int (^ block_B)(void) = ^{

diff  --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl 
b/clang/test/CodeGenOpenCL/builtins-alloca.cl
index 85b449e45a0f1..ce7da3aba9e45 100644
--- a/clang/test/CodeGenOpenCL/builtins-alloca.cl
+++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl
@@ -39,13 +39,12 @@
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[ALLOC_PTR]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, 
addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca(
@@ -67,13 +66,12 @@
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[ALLOC_PTR]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, 
addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test1_builtin_alloca(unsigned n) {
@@ -111,13 +109,12 @@ void test1_builtin_alloca(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr 
addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast 
ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, 
addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_UNINITIALIZED]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_uninitialized(
@@ -139,13 +136,12 @@ void test1_builtin_alloca(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr 
addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast 
ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, 
addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_UNINITIALIZED]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test1_builtin_alloca_uninitialized(unsigned n) {
@@ -183,13 +179,12 @@ void test1_builtin_alloca_uninitialized(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 
4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, 
addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ALIGN_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_ALIGN]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_with_align(
@@ -211,13 +206,12 @@ void test1_builtin_alloca_uninitialized(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), 
align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, 
addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ALIGN_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_ALIGN]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test1_builtin_alloca_with_align(unsigned n) {
@@ -255,13 +249,12 @@ void test1_builtin_alloca_with_align(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr 
addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = 
addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, 
addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void 
@test1_builtin_alloca_with_align_uninitialized(
@@ -283,13 +276,12 @@ void test1_builtin_alloca_with_align(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr 
addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = 
addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, 
addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test1_builtin_alloca_with_align_uninitialized(unsigned n) {
@@ -325,12 +317,11 @@ void 
test1_builtin_alloca_with_align_uninitialized(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[ALLOC_PTR]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, 
addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca(
@@ -351,12 +342,11 @@ void 
test1_builtin_alloca_with_align_uninitialized(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[ALLOC_PTR]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, 
addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test2_builtin_alloca(unsigned n) {
@@ -392,12 +382,11 @@ void test2_builtin_alloca(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr 
addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast 
ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, 
addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_UNINITIALIZED]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_uninitialized(
@@ -418,12 +407,11 @@ void test2_builtin_alloca(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr 
addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast 
ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, 
addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_UNINITIALIZED]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test2_builtin_alloca_uninitialized(unsigned n) {
@@ -459,12 +447,11 @@ void test2_builtin_alloca_uninitialized(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 
4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, 
addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ALIGN_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_ALIGN]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_with_align(
@@ -485,12 +472,11 @@ void test2_builtin_alloca_uninitialized(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), 
align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, 
addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ALIGN_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_ALIGN]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test2_builtin_alloca_with_align(unsigned n) {
@@ -526,12 +512,11 @@ void test2_builtin_alloca_with_align(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr 
addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = 
addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, 
addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void 
@test2_builtin_alloca_with_align_uninitialized(
@@ -552,12 +537,11 @@ void test2_builtin_alloca_with_align(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr 
addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = 
addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, 
addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr 
[[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) 
[[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test2_builtin_alloca_with_align_uninitialized(unsigned n) {

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
index 2dba7fb719376..f7641280715c8 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
@@ -146,12 +146,11 @@ void test_s_barrier_signal_isfirst(int* a, int* b, int *c)
 // CHECK-NEXT:    [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[A_ADDR]] to ptr
-// CHECK-NEXT:    [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[STATE]] to ptr
 // CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.s.get.barrier.state(i32 
[[TMP0]])
-// CHECK-NEXT:    store i32 [[TMP1]], ptr [[STATE_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[STATE]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[STATE]], align 4
 // CHECK-NEXT:    ret i32 [[TMP2]]
 //
 unsigned test_s_get_barrier_state(int a)
@@ -167,13 +166,12 @@ unsigned test_s_get_barrier_state(int a)
 // CHECK-NEXT:    [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[BAR_ADDR]] to ptr
-// CHECK-NEXT:    [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[STATE]] to ptr
 // CHECK-NEXT:    store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
 // CHECK-NEXT:    [[TMP2:%.*]] = call i32 
@llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) [[TMP1]])
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[STATE_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[TMP2]], ptr addrspace(5) [[STATE]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(5) [[STATE]], align 4
 // CHECK-NEXT:    ret i32 [[TMP3]]
 //
 unsigned test_s_get_named_barrier_state(void *bar)

diff  --git 
a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl 
b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
index 86ca9ae509073..8845ffe499fde 100644
--- a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
+++ b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
@@ -32,11 +32,10 @@ __kernel void use_of_local_var()
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[X:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to 
ptr
 // CHECK-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) 
[[X]]) #[[ATTR5:[0-9]+]]
-// CHECK-NEXT:    store i32 0, ptr [[X_ASCAST]], align 4, !tbaa 
[[TBAA4:![0-9]+]]
-// CHECK-NEXT:    [[X_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[X_ASCAST]] to 
ptr addrspace(5)
-// CHECK-NEXT:    call void @private_ptr(ptr addrspace(5) noundef 
[[X_ASCAST_ASCAST]]) #[[ATTR6:[0-9]+]]
+// CHECK-NEXT:    store i32 0, ptr addrspace(5) [[X]], align 4, !tbaa 
[[TBAA4:![0-9]+]]
+// CHECK-NEXT:    call void @private_ptr(ptr addrspace(5) noundef [[X]]) 
#[[ATTR6:[0-9]+]]
+// CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to 
ptr
 // CHECK-NEXT:    call void @generic_ptr(ptr noundef [[X_ASCAST]]) #[[ATTR6]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) 
[[X]]) #[[ATTR5]]
 // CHECK-NEXT:    ret void

diff  --git a/clang/test/Index/pipe-size.cl b/clang/test/Index/pipe-size.cl
index a48857baef1a6..f15bbefb68e7f 100644
--- a/clang/test/Index/pipe-size.cl
+++ b/clang/test/Index/pipe-size.cl
@@ -12,5 +12,5 @@ __kernel void testPipe( pipe int test )
     // SPIR64: store target("spirv.Pipe", 0) %test, ptr %test.addr, align 8
     // SPIR64: store i32 8, ptr %s, align 4
     // AMDGCN: store ptr addrspace(1) %test, ptr %test{{.*}}, align 8
-    // AMDGCN: store i32 8, ptr %s{{.*}}, align 4
+    // AMDGCN: store i32 8, ptr addrspace(5) %s{{.*}}, align 4
 }


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to