https://github.com/guopsh-sugon created https://github.com/llvm/llvm-project/pull/115819
Externalize static global texture variable in CUDA/HIP. Reason: CUDA/HIP runtime needs reference the texture symbol in device elf when program is running. If a texture var has internal linkage type a runtime error will occur when running. To sovle this problem, CUDA nvcc externalizes static global texture var. But clang doesn't do it right now. >From 2219475fcafe81117521ad5d08ef64b9366548d0 Mon Sep 17 00:00:00 2001 From: guopsh <guo...@sugon.com> Date: Tue, 12 Nov 2024 13:36:42 +0800 Subject: [PATCH] externalize static global texture var --- clang/lib/CodeGen/CGCUDANV.cpp | 14 ++++++++ clang/lib/CodeGen/CodeGenModule.cpp | 15 +++++++++ .../CodeGenCUDA/static-global-texture-var.cu | 33 +++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 clang/test/CodeGenCUDA/static-global-texture-var.cu diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index ae14d74f2d9151..333b06a80e0cc7 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -306,6 +306,20 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { CGM.printPostfixForExternalizedDecl(Out, ND); DeviceSideName = std::string(Out.str()); } + + // Make unique name for static global tetxure variable for HIP/CUDA. + if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) { + if (VD->getType()->isCUDADeviceBuiltinTextureType() && + VD->getStorageClass() == SC_Static && VD->hasGlobalStorage() && + !VD->isStaticDataMember()) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << DeviceSideName; + CGM.printPostfixForExternalizedDecl(Out, ND); + DeviceSideName = std::string(Out.str()); + } + } + return DeviceSideName; } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index ba376f9ecfacde..859f707741e23e 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1952,6 +1952,15 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, CGM.getLangOpts().CUDAIsDevice) CGM.printPostfixForExternalizedDecl(Out, ND); + // Make unique name for static global tetxure variable for HIP/CUDA. + if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) { + if (VD->getType()->isCUDADeviceBuiltinTextureType() && + VD->getStorageClass() == SC_Static && VD->hasGlobalStorage() && + !VD->isStaticDataMember()) { + CGM.printPostfixForExternalizedDecl(Out, ND); + } + } + return std::string(Out.str()); } @@ -5608,6 +5617,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Set the llvm linkage type as appropriate. llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(D); + // Make static global texture variable externally visible. + if (D->getType()->isCUDADeviceBuiltinTextureType() && + D->getStorageClass() == SC_Static && !D->isStaticDataMember()) { + Linkage = llvm::GlobalValue::ExternalLinkage; + } + // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on // the device. [...]" // CUDA B.2.2 "The __constant__ qualifier, optionally used together with diff --git a/clang/test/CodeGenCUDA/static-global-texture-var.cu b/clang/test/CodeGenCUDA/static-global-texture-var.cu new file mode 100644 index 00000000000000..86b5fa8d68548b --- /dev/null +++ b/clang/test/CodeGenCUDA/static-global-texture-var.cu @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CUDA-DEVICE %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fcuda-is-device -std=c++11 \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=HIP-DEVICE %s + +struct textureReference { + int desc; +}; + +enum ReadMode { + ElementType = 0, + NormalizedFloat = 1 +}; + +template <typename T, int dim = 1, enum ReadMode mode = ElementType> +struct __attribute__((device_builtin_texture_type)) texture : public textureReference { +}; + +// Confirm static global texture is externally visible and has a unique name. +static texture<float, 2, ElementType> texRef; +//CUDA-DEVICE: @_ZL6texRef__static__{{.*}} = addrspace(1) externally_initialized global i64 undef, align 4 +//HIP-DEVICE: @_ZL6texRef.static.{{.*}} = addrspace(1) externally_initialized global %struct.texture undef, align 4 + +struct v4f { + float x, y, z, w; +}; + +__attribute__((device)) v4f tex2d_ld(texture<float, 2, ElementType>, float, float) asm("llvm.nvvm.texRef.unified.2d.v4f32.f32"); + +__attribute__((device)) float foo(float x, float y) { + return tex2d_ld(texRef, x, y).x; +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits