yaxunl created this revision. yaxunl added a reviewer: tra. yaxunl requested review of this revision.
A static device variable may be accessed in host code through cudaMemCpyFromSymbol etc. Currently clang does not emit the static device variable if it is only referenced by host code, which causes host code to fail at run time. This patch fixes that. https://reviews.llvm.org/D88115 Files: clang/lib/CodeGen/CodeGenModule.cpp clang/test/CodeGenCUDA/static-device-var-no-rdc.cu Index: clang/test/CodeGenCUDA/static-device-var-no-rdc.cu =================================================================== --- clang/test/CodeGenCUDA/static-device-var-no-rdc.cu +++ clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -63,6 +63,13 @@ // externalized nor registered. // DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat +// Check a static device variable referenced by host function only is externalized. +// DEV-DAG: @_ZL1w = addrspace(1) externally_initialized global i32 0 +// HOST-DAG: @_ZL1w = internal global i32 undef +// HOST-DAG: @[[DEVNAMEW:[0-9]+]] = {{.*}}c"_ZL1w\00" + +static __device__ int w; + inline __device__ void devfun(const int ** b) { const static int p = 2; b[0] = &p; @@ -92,11 +99,13 @@ getDeviceSymbol(&x); getDeviceSymbol(&x5); getDeviceSymbol(&y); + getDeviceSymbol(&w); z = 123; a[0] = &z2; } // HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] // HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST: __hipRegisterVar({{.*}}@_ZL1w {{.*}}@[[DEVNAMEW]] // HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w // HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -2195,6 +2195,11 @@ assert(DeferredVTables.empty()); } + // Emit CUDA/HIP static device variables referenced by host code only. + if (getLangOpts().CUDA) + for (auto V : getContext().CUDAStaticDeviceVarReferencedByHost) + DeferredDeclsToEmit.push_back(V); + // Stop if we're out of both deferred vtables and deferred declarations. if (DeferredDeclsToEmit.empty()) return;
Index: clang/test/CodeGenCUDA/static-device-var-no-rdc.cu =================================================================== --- clang/test/CodeGenCUDA/static-device-var-no-rdc.cu +++ clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -63,6 +63,13 @@ // externalized nor registered. // DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat +// Check a static device variable referenced by host function only is externalized. +// DEV-DAG: @_ZL1w = addrspace(1) externally_initialized global i32 0 +// HOST-DAG: @_ZL1w = internal global i32 undef +// HOST-DAG: @[[DEVNAMEW:[0-9]+]] = {{.*}}c"_ZL1w\00" + +static __device__ int w; + inline __device__ void devfun(const int ** b) { const static int p = 2; b[0] = &p; @@ -92,11 +99,13 @@ getDeviceSymbol(&x); getDeviceSymbol(&x5); getDeviceSymbol(&y); + getDeviceSymbol(&w); z = 123; a[0] = &z2; } // HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] // HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST: __hipRegisterVar({{.*}}@_ZL1w {{.*}}@[[DEVNAMEW]] // HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w // HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -2195,6 +2195,11 @@ assert(DeferredVTables.empty()); } + // Emit CUDA/HIP static device variables referenced by host code only. + if (getLangOpts().CUDA) + for (auto V : getContext().CUDAStaticDeviceVarReferencedByHost) + DeferredDeclsToEmit.push_back(V); + // Stop if we're out of both deferred vtables and deferred declarations. if (DeferredDeclsToEmit.empty()) return;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits