https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/148062
>From b8c027c278248eeb53d561c2acb2784ce3dcfc34 Mon Sep 17 00:00:00 2001 From: jofrn <jofer...@amd.com> Date: Thu, 10 Jul 2025 17:34:38 -0400 Subject: [PATCH] [clang][CodeGen] Preserve addrspace of enqueue_kernel builtin. __enqueue_kernel_varargs last parameter is in addrspace(5), but CodeGen currently misses this qualifier. This commit fixes the code to preserve the qualifier by referencing Alloca, which has its casts removed, rather than TmpPtr. --- clang/lib/CodeGen/CGBuiltin.cpp | 4 ++-- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 48c91eb4a5b4f..5f2eb76e7bacb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5987,8 +5987,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto *Zero = llvm::ConstantInt::get(IntTy, 0); for (unsigned I = First; I < NumArgs; ++I) { auto *Index = llvm::ConstantInt::get(IntTy, I - First); - auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr, - {Zero, Index}); + auto *GEP = + Builder.CreateGEP(Tmp.getElementType(), Alloca, {Zero, Index}); if (I == First) ElemPtr = GEP; auto *V = diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index bbb55b7e14941..bfbed79dc7f16 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -209,9 +209,9 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5 // NOCPU-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 // NOCPU-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8 -// NOCPU-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0 -// NOCPU-NEXT: store i64 100, ptr [[TMP18]], align 8 -// NOCPU-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]]) +// NOCPU-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0 +// NOCPU-NEXT: store i64 100, ptr addrspace(5) [[TMP18]], align 8 +// NOCPU-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr addrspace(5) [[TMP18]]) // NOCPU-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0 // NOCPU-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8 // NOCPU-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1 @@ -587,9 +587,9 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]] -// GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0 -// GFX900-NEXT: store i64 100, ptr [[TMP18]], align 8 -// GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]]) +// GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr addrspace(5) [[BLOCK_SIZES]], i32 0, i32 0 +// GFX900-NEXT: store i64 100, ptr addrspace(5) [[TMP18]], align 8 +// GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr addrspace(5) [[TMP18]]) // GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]] // GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]] // GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits