https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/148062

>From b8c027c278248eeb53d561c2acb2784ce3dcfc34 Mon Sep 17 00:00:00 2001
From: jofrn <jofer...@amd.com>
Date: Thu, 10 Jul 2025 17:34:38 -0400
Subject: [PATCH] [clang][CodeGen] Preserve addrspace of enqueue_kernel
 builtin.

__enqueue_kernel_varargs last parameter is in addrspace(5), but
CodeGen currently misses this qualifier. This commit fixes the code
to preserve the qualifier by referencing Alloca, which has its casts
removed, rather than TmpPtr.
---
 clang/lib/CodeGen/CGBuiltin.cpp                   |  4 ++--
 clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 48c91eb4a5b4f..5f2eb76e7bacb 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5987,8 +5987,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
       auto *Zero = llvm::ConstantInt::get(IntTy, 0);
       for (unsigned I = First; I < NumArgs; ++I) {
         auto *Index = llvm::ConstantInt::get(IntTy, I - First);
-        auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
-                                      {Zero, Index});
+        auto *GEP =
+            Builder.CreateGEP(Tmp.getElementType(), Alloca, {Zero, Index});
         if (I == First)
           ElemPtr = GEP;
         auto *V =
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl 
b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index bbb55b7e14941..bfbed79dc7f16 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -209,9 +209,9 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ 
i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr 
[[BLOCK12_ASCAST]], i32 0, i32 5
 // NOCPU-NEXT:    [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
 // NOCPU-NEXT:    store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8
-// NOCPU-NEXT:    [[TMP18:%.*]] = getelementptr [1 x i64], ptr 
[[BLOCK_SIZES_ASCAST]], i32 0, i32 0
-// NOCPU-NEXT:    store i64 100, ptr [[TMP18]], align 8
-// NOCPU-NEXT:    [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr 
addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr 
addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to 
ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
+// NOCPU-NEXT:    [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) 
[[BLOCK_SIZES]], i32 0, i32 0
+// NOCPU-NEXT:    store i64 100, ptr addrspace(5) [[TMP18]], align 8
+// NOCPU-NEXT:    [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr 
addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr 
addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to 
ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr addrspace(5) [[TMP18]])
 // NOCPU-NEXT:    [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0
 // NOCPU-NEXT:    store i32 32, ptr [[BLOCK_SIZE22]], align 8
 // NOCPU-NEXT:    [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1
@@ -587,9 +587,9 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, 
!tbaa [[TBAA3]]
 // GFX900-NEXT:    store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, 
!tbaa [[TBAA3]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) 
[[BLOCK_SIZES]]) #[[ATTR9]]
-// GFX900-NEXT:    [[TMP18:%.*]] = getelementptr [1 x i64], ptr 
[[BLOCK_SIZES_ASCAST]], i32 0, i32 0
-// GFX900-NEXT:    store i64 100, ptr [[TMP18]], align 8
-// GFX900-NEXT:    [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr 
addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr 
addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to 
ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]])
+// GFX900-NEXT:    [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) 
[[BLOCK_SIZES]], i32 0, i32 0
+// GFX900-NEXT:    store i64 100, ptr addrspace(5) [[TMP18]], align 8
+// GFX900-NEXT:    [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr 
addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr 
addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to 
ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr addrspace(5) [[TMP18]])
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) 
[[BLOCK_SIZES]]) #[[ATTR9]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) 
[[BLOCK20]]) #[[ATTR9]]
 // GFX900-NEXT:    [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, 
i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to