https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/110182
>From af1adfafaa09bc7992cf9aaf34a6121cf2d56d5b Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Thu, 26 Sep 2024 04:16:52 +0100 Subject: [PATCH 1/3] Mark globals as `constant` if they have been annotated with `__constant__`/carry the `CUDAConstant` attribute. --- clang/lib/CodeGen/CodeGenModule.cpp | 5 +- clang/test/CodeGenCUDA/address-spaces.cu | 2 +- clang/test/CodeGenCUDA/amdgpu-visibility.cu | 6 +-- clang/test/CodeGenCUDA/anon-ns.cu | 4 +- clang/test/CodeGenCUDA/device-stub.cu | 8 ++-- clang/test/CodeGenCUDA/device-var-init.cu | 48 +++++++++---------- clang/test/CodeGenCUDA/device-var-linkage.cu | 14 +++--- clang/test/CodeGenCUDA/filter-decl.cu | 4 +- .../CodeGenCUDA/static-device-var-no-rdc.cu | 4 +- .../test/CodeGenCUDA/static-device-var-rdc.cu | 6 +-- .../template-class-static-member.cu | 4 +- clang/test/CodeGenHIP/hipspv-addr-spaces.cpp | 2 +- 12 files changed, 54 insertions(+), 53 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d53d47979f29fb..4fb8a2ec73f4d3 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5612,8 +5612,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. - GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor && - D->getType().isConstantStorage(getContext(), true, true)); + GV->setConstant(D->hasAttr<CUDAConstantAttr>() || + (!NeedsGlobalCtor && !NeedsGlobalDtor && + D->getType().isConstantStorage(getContext(), true, true))); // If it is in a read-only section, mark it 'constant'. if (const SectionAttr *SA = D->getAttr<SectionAttr>()) { diff --git a/clang/test/CodeGenCUDA/address-spaces.cu b/clang/test/CodeGenCUDA/address-spaces.cu index 0608c9cabd0489..66903c81b93339 100644 --- a/clang/test/CodeGenCUDA/address-spaces.cu +++ b/clang/test/CodeGenCUDA/address-spaces.cu @@ -9,7 +9,7 @@ // CHECK: @i ={{.*}} addrspace(1) externally_initialized global __device__ int i; -// CHECK: @j ={{.*}} addrspace(4) externally_initialized global +// CHECK: @j ={{.*}} addrspace(4) externally_initialized constant __constant__ int j; // CHECK: @k ={{.*}} addrspace(3) global diff --git a/clang/test/CodeGenCUDA/amdgpu-visibility.cu b/clang/test/CodeGenCUDA/amdgpu-visibility.cu index d7dbab112a68c6..ef74d932ee8c8f 100644 --- a/clang/test/CodeGenCUDA/amdgpu-visibility.cu +++ b/clang/test/CodeGenCUDA/amdgpu-visibility.cu @@ -4,11 +4,11 @@ #include "Inputs/cuda.h" -// CHECK-DEFAULT: @c ={{.*}} addrspace(4) externally_initialized global +// CHECK-DEFAULT: @c ={{.*}} addrspace(4) externally_initialized constant // CHECK-DEFAULT: @g ={{.*}} addrspace(1) externally_initialized global -// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized global +// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized constant // CHECK-PROTECTED: @g = protected addrspace(1) externally_initialized global -// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized global +// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized constant // CHECK-HIDDEN: @g = protected addrspace(1) externally_initialized global __constant__ int c; __device__ int g; diff --git a/clang/test/CodeGenCUDA/anon-ns.cu b/clang/test/CodeGenCUDA/anon-ns.cu index 3c55e9907dd6c1..d931f31d0207c5 100644 --- a/clang/test/CodeGenCUDA/anon-ns.cu +++ b/clang/test/CodeGenCUDA/anon-ns.cu @@ -28,13 +28,13 @@ // HIP-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]]( // HIP-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]]( // HIP-DAG: @[[VM:_ZN12_GLOBAL__N_12vmE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global -// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized global +// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized constant // HIP-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global // CUDA-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]]( // CUDA-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]]( // CUDA-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]]( -// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized global +// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized constant // CUDA-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global // COMMON-DAG: @_ZN12_GLOBAL__N_12vdE = internal addrspace(1) global diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index 8695433f6df10c..c3b8b025448d4f 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -69,9 +69,9 @@ // WIN-DAG: @"?device_var@@3HA" = internal global i32 __device__ int device_var; -// NORDC-DAG: @constant_var = internal global i32 -// RDC-DAG: @constant_var = global i32 -// WIN-DAG: @"?constant_var@@3HA" = internal global i32 +// NORDC-DAG: @constant_var = internal constant i32 +// RDC-DAG: @constant_var = constant i32 +// WIN-DAG: @"?constant_var@@3HA" = internal constant i32 __constant__ int constant_var; // NORDC-DAG: @shared_var = internal global i32 @@ -105,7 +105,7 @@ extern __device__ int ext_device_var_def; __device__ int ext_device_var_def = 1; // NORDC-DAG: @ext_device_var_def = internal global i32 undef, // RDC-DAG: @ext_device_var_def = global i32 undef, -// WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef +// WIN-DAG: @"?ext_constant_var_def@@3HA" = internal constant i32 undef __constant__ int ext_constant_var_def = 2; #if __cplusplus > 201402L diff --git a/clang/test/CodeGenCUDA/device-var-init.cu b/clang/test/CodeGenCUDA/device-var-init.cu index 226b7e295f4b45..c80b3127337d00 100644 --- a/clang/test/CodeGenCUDA/device-var-init.cu +++ b/clang/test/CodeGenCUDA/device-var-init.cu @@ -26,8 +26,8 @@ __shared__ int s_v; // DEVICE: @s_v ={{.*}} addrspace(3) global i32 undef, // HOST: @s_v = internal global i32 undef, __constant__ int c_v; -// DEVICE: addrspace(4) externally_initialized global i32 0, -// HOST: @c_v = internal global i32 undef, +// DEVICE: addrspace(4) externally_initialized constant i32 0, +// HOST: @c_v = internal constant i32 undef, __device__ int d_v_i = 1; // DEVICE: @d_v_i ={{.*}} addrspace(1) externally_initialized global i32 1, @@ -51,15 +51,15 @@ __shared__ T s_t; // DEVICE: @s_t ={{.*}} addrspace(3) global %struct.T undef, // HOST: @s_t = internal global %struct.T undef, __constant__ T c_t; -// DEVICE: @c_t ={{.*}} addrspace(4) externally_initialized global %struct.T zeroinitializer, -// HOST: @c_t = internal global %struct.T undef, +// DEVICE: @c_t ={{.*}} addrspace(4) externally_initialized constant %struct.T zeroinitializer, +// HOST: @c_t = internal constant %struct.T undef, __device__ T d_t_i = {2}; // DEVICE: @d_t_i ={{.*}} addrspace(1) externally_initialized global %struct.T { i32 2 }, // HOST: @d_t_i = internal global %struct.T undef, __constant__ T c_t_i = {2}; -// DEVICE: @c_t_i ={{.*}} addrspace(4) externally_initialized global %struct.T { i32 2 }, -// HOST: @c_t_i = internal global %struct.T undef, +// DEVICE: @c_t_i ={{.*}} addrspace(4) externally_initialized constant %struct.T { i32 2 }, +// HOST: @c_t_i = internal constant %struct.T undef, // empty constructor __device__ EC d_ec; @@ -69,8 +69,8 @@ __shared__ EC s_ec; // DEVICE: @s_ec ={{.*}} addrspace(3) global %struct.EC undef, // HOST: @s_ec = internal global %struct.EC undef, __constant__ EC c_ec; -// DEVICE: @c_ec ={{.*}} addrspace(4) externally_initialized global %struct.EC zeroinitializer, -// HOST: @c_ec = internal global %struct.EC undef +// DEVICE: @c_ec ={{.*}} addrspace(4) externally_initialized constant %struct.EC zeroinitializer, +// HOST: @c_ec = internal constant %struct.EC undef // empty destructor __device__ ED d_ed; @@ -80,8 +80,8 @@ __shared__ ED s_ed; // DEVICE: @s_ed ={{.*}} addrspace(3) global %struct.ED undef, // HOST: @s_ed = internal global %struct.ED undef, __constant__ ED c_ed; -// DEVICE: @c_ed ={{.*}} addrspace(4) externally_initialized global %struct.ED zeroinitializer, -// HOST: @c_ed = internal global %struct.ED undef, +// DEVICE: @c_ed ={{.*}} addrspace(4) externally_initialized constant %struct.ED zeroinitializer, +// HOST: @c_ed = internal constant %struct.ED undef, __device__ ECD d_ecd; // DEVICE: @d_ecd ={{.*}} addrspace(1) externally_initialized global %struct.ECD zeroinitializer, @@ -90,8 +90,8 @@ __shared__ ECD s_ecd; // DEVICE: @s_ecd ={{.*}} addrspace(3) global %struct.ECD undef, // HOST: @s_ecd = internal global %struct.ECD undef, __constant__ ECD c_ecd; -// DEVICE: @c_ecd ={{.*}} addrspace(4) externally_initialized global %struct.ECD zeroinitializer, -// HOST: @c_ecd = internal global %struct.ECD undef, +// DEVICE: @c_ecd ={{.*}} addrspace(4) externally_initialized constant %struct.ECD zeroinitializer, +// HOST: @c_ecd = internal constant %struct.ECD undef, // empty templated constructor -- allowed with no arguments __device__ ETC d_etc; @@ -101,15 +101,15 @@ __shared__ ETC s_etc; // DEVICE: @s_etc ={{.*}} addrspace(3) global %struct.ETC undef, // HOST: @s_etc = internal global %struct.ETC undef, __constant__ ETC c_etc; -// DEVICE: @c_etc ={{.*}} addrspace(4) externally_initialized global %struct.ETC zeroinitializer, -// HOST: @c_etc = internal global %struct.ETC undef, +// DEVICE: @c_etc ={{.*}} addrspace(4) externally_initialized constant %struct.ETC zeroinitializer, +// HOST: @c_etc = internal constant %struct.ETC undef, __device__ NCFS d_ncfs; // DEVICE: @d_ncfs ={{.*}} addrspace(1) externally_initialized global %struct.NCFS { i32 3 } // HOST: @d_ncfs = internal global %struct.NCFS undef, __constant__ NCFS c_ncfs; -// DEVICE: @c_ncfs ={{.*}} addrspace(4) externally_initialized global %struct.NCFS { i32 3 } -// HOST: @c_ncfs = internal global %struct.NCFS undef, +// DEVICE: @c_ncfs ={{.*}} addrspace(4) externally_initialized constant %struct.NCFS { i32 3 } +// HOST: @c_ncfs = internal constant %struct.NCFS undef, // Regular base class -- allowed __device__ T_B_T d_t_b_t; @@ -119,8 +119,8 @@ __shared__ T_B_T s_t_b_t; // DEVICE: @s_t_b_t ={{.*}} addrspace(3) global %struct.T_B_T undef, // HOST: @s_t_b_t = internal global %struct.T_B_T undef, __constant__ T_B_T c_t_b_t; -// DEVICE: @c_t_b_t ={{.*}} addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer, -// HOST: @c_t_b_t = internal global %struct.T_B_T undef, +// DEVICE: @c_t_b_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_B_T zeroinitializer, +// HOST: @c_t_b_t = internal constant %struct.T_B_T undef, // Incapsulated object of allowed class -- allowed __device__ T_F_T d_t_f_t; @@ -130,8 +130,8 @@ __shared__ T_F_T s_t_f_t; // DEVICE: @s_t_f_t ={{.*}} addrspace(3) global %struct.T_F_T undef, // HOST: @s_t_f_t = internal global %struct.T_F_T undef, __constant__ T_F_T c_t_f_t; -// DEVICE: @c_t_f_t ={{.*}} addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer, -// HOST: @c_t_f_t = internal global %struct.T_F_T undef, +// DEVICE: @c_t_f_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_F_T zeroinitializer, +// HOST: @c_t_f_t = internal constant %struct.T_F_T undef, // array of allowed objects -- allowed __device__ T_FA_T d_t_fa_t; @@ -141,8 +141,8 @@ __shared__ T_FA_T s_t_fa_t; // DEVICE: @s_t_fa_t ={{.*}} addrspace(3) global %struct.T_FA_T undef, // HOST: @s_t_fa_t = internal global %struct.T_FA_T undef, __constant__ T_FA_T c_t_fa_t; -// DEVICE: @c_t_fa_t ={{.*}} addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer, -// HOST: @c_t_fa_t = internal global %struct.T_FA_T undef, +// DEVICE: @c_t_fa_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_FA_T zeroinitializer, +// HOST: @c_t_fa_t = internal constant %struct.T_FA_T undef, // Calling empty base class initializer is OK @@ -153,8 +153,8 @@ __shared__ EC_I_EC s_ec_i_ec; // DEVICE: @s_ec_i_ec ={{.*}} addrspace(3) global %struct.EC_I_EC undef, // HOST: @s_ec_i_ec = internal global %struct.EC_I_EC undef, __constant__ EC_I_EC c_ec_i_ec; -// DEVICE: @c_ec_i_ec ={{.*}} addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer, -// HOST: @c_ec_i_ec = internal global %struct.EC_I_EC undef, +// DEVICE: @c_ec_i_ec ={{.*}} addrspace(4) externally_initialized constant %struct.EC_I_EC zeroinitializer, +// HOST: @c_ec_i_ec = internal constant %struct.EC_I_EC undef, // DEVICE: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef // DEVICE: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu index 3c2efb57525c9c..a4ed170c7865b1 100644 --- a/clang/test/CodeGenCUDA/device-var-linkage.cu +++ b/clang/test/CodeGenCUDA/device-var-linkage.cu @@ -20,9 +20,9 @@ // NORDC-H-DAG: @v1 = internal global i32 undef // RDC-H-DAG: @v1 = global i32 undef __device__ int v1; -// DEV-DAG: @v2 = addrspace(4) externally_initialized global i32 0 -// NORDC-H-DAG: @v2 = internal global i32 undef -// RDC-H-DAG: @v2 = global i32 undef +// DEV-DAG: @v2 = addrspace(4) externally_initialized constant i32 0 +// NORDC-H-DAG: @v2 = internal constant i32 undef +// RDC-H-DAG: @v2 = constant i32 undef __constant__ int v2; // DEV-DAG: @v3 = addrspace(1) externally_initialized global ptr addrspace(1) null // NORDC-H-DAG: @v3 = internal externally_initialized global ptr null @@ -48,10 +48,10 @@ extern __managed__ int ev3; // HOST-DAG: @_ZL3sv1 = internal global i32 undef // CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 static __device__ int sv1; -// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 -// HOST-DAG: @_ZL3sv2 = internal global i32 undef -// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 +// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized constant i32 0 +// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0 +// HOST-DAG: @_ZL3sv2 = internal constant i32 undef +// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized constant i32 0 static __constant__ int sv2; // NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global ptr addrspace(1) null // RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global ptr addrspace(1) null diff --git a/clang/test/CodeGenCUDA/filter-decl.cu b/clang/test/CodeGenCUDA/filter-decl.cu index 0f4691f7c8aa7c..eff4ece42205a5 100644 --- a/clang/test/CodeGenCUDA/filter-decl.cu +++ b/clang/test/CodeGenCUDA/filter-decl.cu @@ -9,8 +9,8 @@ // CHECK-DEVICE-NOT: module asm "file scope asm is host only" __asm__("file scope asm is host only"); -// CHECK-HOST: constantdata = internal global -// CHECK-DEVICE: constantdata = {{(dso_local )?}}externally_initialized global +// CHECK-HOST: constantdata = internal constant +// CHECK-DEVICE: constantdata = {{(dso_local )?}}externally_initialized constant __constant__ char constantdata[256]; // CHECK-HOST: devicedata = internal global diff --git a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu index 80655c2d296047..3c1a3f62419ca0 100644 --- a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -50,8 +50,8 @@ static __device__ int x5; } // Check a static constant variable referenced by host is externalized. -// DEV-DAG: @_ZL1y ={{.*}} addrspace(4) externally_initialized global i32 0 -// HOST-DAG: @_ZL1y = internal global i32 undef +// DEV-DAG: @_ZL1y ={{.*}} addrspace(4) externally_initialized constant i32 0 +// HOST-DAG: @_ZL1y = internal constant i32 undef // HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" static __constant__ int y; diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu index 16ec413397235a..29eaae63483cb7 100644 --- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -57,7 +57,7 @@ // HOST-DAG: @_ZL1x = internal global i32 undef -// HOST-DAG: @_ZL1y = internal global i32 undef +// HOST-DAG: @_ZL1y = internal constant i32 undef // Test normal static device variables // INT-DEV-DAG: @_ZL1x[[FILEID:.*]] = addrspace(1) externally_initialized global i32 0 @@ -81,11 +81,11 @@ static __device__ int x; static __device__ int x2; // Test normal static device variables -// INT-DEV-DAG: @_ZL1y[[FILEID:.*]] = addrspace(4) externally_initialized global i32 0 +// INT-DEV-DAG: @_ZL1y[[FILEID:.*]] = addrspace(4) externally_initialized constant i32 0 // INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y[[FILEID:.*]]\00" // Test externalized static device variables -// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 +// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0 // EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00" static __constant__ int y; diff --git a/clang/test/CodeGenCUDA/template-class-static-member.cu b/clang/test/CodeGenCUDA/template-class-static-member.cu index d790d2dea66bab..e4763ded921c3c 100644 --- a/clang/test/CodeGenCUDA/template-class-static-member.cu +++ b/clang/test/CodeGenCUDA/template-class-static-member.cu @@ -38,13 +38,13 @@ const int A<T>::const_member; template class A<int>; //DEV-DAG: @_ZN1AIiE8d_memberE = internal addrspace(1) global i32 0, comdat, align 4 -//DEV-DAG: @_ZN1AIiE8c_memberE = internal addrspace(4) global i32 0, comdat, align 4 +//DEV-DAG: @_ZN1AIiE8c_memberE = internal addrspace(4) constant i32 0, comdat, align 4 //DEV-DAG: @_ZN1AIiE8m_memberE = internal addrspace(1) externally_initialized global ptr addrspace(1) null //DEV-DAG: @_ZN1AIiE12const_memberE = internal addrspace(4) constant i32 0, comdat, align 4 //DEV-NEG-NOT: @_ZN1AIiE8h_memberE //HOST-DAG: @_ZN1AIiE8h_memberE = weak_odr global i32 0, comdat, align 4 //HOST-DAG: @_ZN1AIiE8d_memberE = internal global i32 undef, comdat, align 4 -//HOST-DAG: @_ZN1AIiE8c_memberE = internal global i32 undef, comdat, align 4 +//HOST-DAG: @_ZN1AIiE8c_memberE = internal constant i32 undef, comdat, align 4 //HOST-DAG: @_ZN1AIiE8m_memberE = internal externally_initialized global ptr null //HOST-DAG: @_ZN1AIiE12const_memberE = weak_odr constant i32 0, comdat, align 4 diff --git a/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp b/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp index c575f49ff69716..05811bb7e1285d 100644 --- a/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp +++ b/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp @@ -12,7 +12,7 @@ // CHECK: @d ={{.*}} addrspace(1) externally_initialized global __device__ int d; -// CHECK: @c ={{.*}} addrspace(1) externally_initialized global +// CHECK: @c ={{.*}} addrspace(1) externally_initialized constant __constant__ int c; // CHECK: @s ={{.*}} addrspace(3) global >From c1ce40bf3d3473441b9edb3f76d1433f2633ecd9 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Thu, 26 Sep 2024 22:54:28 +0100 Subject: [PATCH 2/3] We only need to set `constant` on the Device side. --- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- clang/test/CodeGenCUDA/device-stub.cu | 8 +++---- clang/test/CodeGenCUDA/device-var-init.cu | 24 +++++++++---------- clang/test/CodeGenCUDA/device-var-linkage.cu | 6 ++--- clang/test/CodeGenCUDA/filter-decl.cu | 2 +- .../CodeGenCUDA/static-device-var-no-rdc.cu | 2 +- .../test/CodeGenCUDA/static-device-var-rdc.cu | 2 +- .../template-class-static-member.cu | 2 +- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 2558d1ea8eca57..25c1c496a4f27f 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5622,7 +5622,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. - GV->setConstant(D->hasAttr<CUDAConstantAttr>() || + GV->setConstant((D->hasAttr<CUDAConstantAttr>() && LangOpts.CUDAIsDevice) || (!NeedsGlobalCtor && !NeedsGlobalDtor && D->getType().isConstantStorage(getContext(), true, true))); diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index c3b8b025448d4f..8695433f6df10c 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -69,9 +69,9 @@ // WIN-DAG: @"?device_var@@3HA" = internal global i32 __device__ int device_var; -// NORDC-DAG: @constant_var = internal constant i32 -// RDC-DAG: @constant_var = constant i32 -// WIN-DAG: @"?constant_var@@3HA" = internal constant i32 +// NORDC-DAG: @constant_var = internal global i32 +// RDC-DAG: @constant_var = global i32 +// WIN-DAG: @"?constant_var@@3HA" = internal global i32 __constant__ int constant_var; // NORDC-DAG: @shared_var = internal global i32 @@ -105,7 +105,7 @@ extern __device__ int ext_device_var_def; __device__ int ext_device_var_def = 1; // NORDC-DAG: @ext_device_var_def = internal global i32 undef, // RDC-DAG: @ext_device_var_def = global i32 undef, -// WIN-DAG: @"?ext_constant_var_def@@3HA" = internal constant i32 undef +// WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef __constant__ int ext_constant_var_def = 2; #if __cplusplus > 201402L diff --git a/clang/test/CodeGenCUDA/device-var-init.cu b/clang/test/CodeGenCUDA/device-var-init.cu index c80b3127337d00..9d62e4126b430d 100644 --- a/clang/test/CodeGenCUDA/device-var-init.cu +++ b/clang/test/CodeGenCUDA/device-var-init.cu @@ -27,7 +27,7 @@ __shared__ int s_v; // HOST: @s_v = internal global i32 undef, __constant__ int c_v; // DEVICE: addrspace(4) externally_initialized constant i32 0, -// HOST: @c_v = internal constant i32 undef, +// HOST: @c_v = internal global i32 undef, __device__ int d_v_i = 1; // DEVICE: @d_v_i ={{.*}} addrspace(1) externally_initialized global i32 1, @@ -52,14 +52,14 @@ __shared__ T s_t; // HOST: @s_t = internal global %struct.T undef, __constant__ T c_t; // DEVICE: @c_t ={{.*}} addrspace(4) externally_initialized constant %struct.T zeroinitializer, -// HOST: @c_t = internal constant %struct.T undef, +// HOST: @c_t = internal global %struct.T undef, __device__ T d_t_i = {2}; // DEVICE: @d_t_i ={{.*}} addrspace(1) externally_initialized global %struct.T { i32 2 }, // HOST: @d_t_i = internal global %struct.T undef, __constant__ T c_t_i = {2}; // DEVICE: @c_t_i ={{.*}} addrspace(4) externally_initialized constant %struct.T { i32 2 }, -// HOST: @c_t_i = internal constant %struct.T undef, +// HOST: @c_t_i = internal global %struct.T undef, // empty constructor __device__ EC d_ec; @@ -70,7 +70,7 @@ __shared__ EC s_ec; // HOST: @s_ec = internal global %struct.EC undef, __constant__ EC c_ec; // DEVICE: @c_ec ={{.*}} addrspace(4) externally_initialized constant %struct.EC zeroinitializer, -// HOST: @c_ec = internal constant %struct.EC undef +// HOST: @c_ec = internal global %struct.EC undef // empty destructor __device__ ED d_ed; @@ -81,7 +81,7 @@ __shared__ ED s_ed; // HOST: @s_ed = internal global %struct.ED undef, __constant__ ED c_ed; // DEVICE: @c_ed ={{.*}} addrspace(4) externally_initialized constant %struct.ED zeroinitializer, -// HOST: @c_ed = internal constant %struct.ED undef, +// HOST: @c_ed = internal global %struct.ED undef, __device__ ECD d_ecd; // DEVICE: @d_ecd ={{.*}} addrspace(1) externally_initialized global %struct.ECD zeroinitializer, @@ -91,7 +91,7 @@ __shared__ ECD s_ecd; // HOST: @s_ecd = internal global %struct.ECD undef, __constant__ ECD c_ecd; // DEVICE: @c_ecd ={{.*}} addrspace(4) externally_initialized constant %struct.ECD zeroinitializer, -// HOST: @c_ecd = internal constant %struct.ECD undef, +// HOST: @c_ecd = internal global %struct.ECD undef, // empty templated constructor -- allowed with no arguments __device__ ETC d_etc; @@ -102,14 +102,14 @@ __shared__ ETC s_etc; // HOST: @s_etc = internal global %struct.ETC undef, __constant__ ETC c_etc; // DEVICE: @c_etc ={{.*}} addrspace(4) externally_initialized constant %struct.ETC zeroinitializer, -// HOST: @c_etc = internal constant %struct.ETC undef, +// HOST: @c_etc = internal global %struct.ETC undef, __device__ NCFS d_ncfs; // DEVICE: @d_ncfs ={{.*}} addrspace(1) externally_initialized global %struct.NCFS { i32 3 } // HOST: @d_ncfs = internal global %struct.NCFS undef, __constant__ NCFS c_ncfs; // DEVICE: @c_ncfs ={{.*}} addrspace(4) externally_initialized constant %struct.NCFS { i32 3 } -// HOST: @c_ncfs = internal constant %struct.NCFS undef, +// HOST: @c_ncfs = internal global %struct.NCFS undef, // Regular base class -- allowed __device__ T_B_T d_t_b_t; @@ -120,7 +120,7 @@ __shared__ T_B_T s_t_b_t; // HOST: @s_t_b_t = internal global %struct.T_B_T undef, __constant__ T_B_T c_t_b_t; // DEVICE: @c_t_b_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_B_T zeroinitializer, -// HOST: @c_t_b_t = internal constant %struct.T_B_T undef, +// HOST: @c_t_b_t = internal global %struct.T_B_T undef, // Incapsulated object of allowed class -- allowed __device__ T_F_T d_t_f_t; @@ -131,7 +131,7 @@ __shared__ T_F_T s_t_f_t; // HOST: @s_t_f_t = internal global %struct.T_F_T undef, __constant__ T_F_T c_t_f_t; // DEVICE: @c_t_f_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_F_T zeroinitializer, -// HOST: @c_t_f_t = internal constant %struct.T_F_T undef, +// HOST: @c_t_f_t = internal global %struct.T_F_T undef, // array of allowed objects -- allowed __device__ T_FA_T d_t_fa_t; @@ -142,7 +142,7 @@ __shared__ T_FA_T s_t_fa_t; // HOST: @s_t_fa_t = internal global %struct.T_FA_T undef, __constant__ T_FA_T c_t_fa_t; // DEVICE: @c_t_fa_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_FA_T zeroinitializer, -// HOST: @c_t_fa_t = internal constant %struct.T_FA_T undef, +// HOST: @c_t_fa_t = internal global %struct.T_FA_T undef, // Calling empty base class initializer is OK @@ -154,7 +154,7 @@ __shared__ EC_I_EC s_ec_i_ec; // HOST: @s_ec_i_ec = internal global %struct.EC_I_EC undef, __constant__ EC_I_EC c_ec_i_ec; // DEVICE: @c_ec_i_ec ={{.*}} addrspace(4) externally_initialized constant %struct.EC_I_EC zeroinitializer, -// HOST: @c_ec_i_ec = internal constant %struct.EC_I_EC undef, +// HOST: @c_ec_i_ec = internal global %struct.EC_I_EC undef, // DEVICE: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef // DEVICE: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu index a4ed170c7865b1..4c57323d85f9dd 100644 --- a/clang/test/CodeGenCUDA/device-var-linkage.cu +++ b/clang/test/CodeGenCUDA/device-var-linkage.cu @@ -21,8 +21,8 @@ // RDC-H-DAG: @v1 = global i32 undef __device__ int v1; // DEV-DAG: @v2 = addrspace(4) externally_initialized constant i32 0 -// NORDC-H-DAG: @v2 = internal constant i32 undef -// RDC-H-DAG: @v2 = constant i32 undef +// NORDC-H-DAG: @v2 = internal global i32 undef +// RDC-H-DAG: @v2 = global i32 undef __constant__ int v2; // DEV-DAG: @v3 = addrspace(1) externally_initialized global ptr addrspace(1) null // NORDC-H-DAG: @v3 = internal externally_initialized global ptr null @@ -50,7 +50,7 @@ extern __managed__ int ev3; static __device__ int sv1; // NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized constant i32 0 // RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0 -// HOST-DAG: @_ZL3sv2 = internal constant i32 undef +// HOST-DAG: @_ZL3sv2 = internal global i32 undef // CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized constant i32 0 static __constant__ int sv2; // NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global ptr addrspace(1) null diff --git a/clang/test/CodeGenCUDA/filter-decl.cu b/clang/test/CodeGenCUDA/filter-decl.cu index eff4ece42205a5..02dacd0ad8ef41 100644 --- a/clang/test/CodeGenCUDA/filter-decl.cu +++ b/clang/test/CodeGenCUDA/filter-decl.cu @@ -9,7 +9,7 @@ // CHECK-DEVICE-NOT: module asm "file scope asm is host only" __asm__("file scope asm is host only"); -// CHECK-HOST: constantdata = internal constant +// CHECK-HOST: constantdata = internal global // CHECK-DEVICE: constantdata = {{(dso_local )?}}externally_initialized constant __constant__ char constantdata[256]; diff --git a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu index 3c1a3f62419ca0..e92b00345e00c2 100644 --- a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -51,7 +51,7 @@ static __device__ int x5; // Check a static constant variable referenced by host is externalized. // DEV-DAG: @_ZL1y ={{.*}} addrspace(4) externally_initialized constant i32 0 -// HOST-DAG: @_ZL1y = internal constant i32 undef +// HOST-DAG: @_ZL1y = internal global i32 undef // HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" static __constant__ int y; diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu index 29eaae63483cb7..9d2811f9385e1e 100644 --- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -57,7 +57,7 @@ // HOST-DAG: @_ZL1x = internal global i32 undef -// HOST-DAG: @_ZL1y = internal constant i32 undef +// HOST-DAG: @_ZL1y = internal global i32 undef // Test normal static device variables // INT-DEV-DAG: @_ZL1x[[FILEID:.*]] = addrspace(1) externally_initialized global i32 0 diff --git a/clang/test/CodeGenCUDA/template-class-static-member.cu b/clang/test/CodeGenCUDA/template-class-static-member.cu index e4763ded921c3c..b614cd9dcbb14d 100644 --- a/clang/test/CodeGenCUDA/template-class-static-member.cu +++ b/clang/test/CodeGenCUDA/template-class-static-member.cu @@ -45,6 +45,6 @@ template class A<int>; //HOST-DAG: @_ZN1AIiE8h_memberE = weak_odr global i32 0, comdat, align 4 //HOST-DAG: @_ZN1AIiE8d_memberE = internal global i32 undef, comdat, align 4 -//HOST-DAG: @_ZN1AIiE8c_memberE = internal constant i32 undef, comdat, align 4 +//HOST-DAG: @_ZN1AIiE8c_memberE = internal global i32 undef, comdat, align 4 //HOST-DAG: @_ZN1AIiE8m_memberE = internal externally_initialized global ptr null //HOST-DAG: @_ZN1AIiE12const_memberE = weak_odr constant i32 0, comdat, align 4 >From f7be5f4f5e1694dc8a8f46251399cb367069a8fc Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Sat, 28 Sep 2024 01:19:22 +0100 Subject: [PATCH 3/3] Extend externally_initialized testing to `constant`s. --- .../Transforms/GlobalOpt/externally-initialized.ll | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/test/Transforms/GlobalOpt/externally-initialized.ll b/llvm/test/Transforms/GlobalOpt/externally-initialized.ll index 7a8244ea8297a6..f0ee7830f273a0 100644 --- a/llvm/test/Transforms/GlobalOpt/externally-initialized.ll +++ b/llvm/test/Transforms/GlobalOpt/externally-initialized.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -passes=globalopt | FileCheck %s +; RUN: opt < %s -passes=early-cse | opt -S -passes=globalopt | FileCheck %s --check-prefix=CHECK-CONSTANT ; This global is externally_initialized, which may modify the value between ; it's static initializer and any code in this module being run, so the only @@ -12,6 +13,10 @@ ; CHECK: @b = internal unnamed_addr externally_initialized global i32 undef @b = internal externally_initialized global i32 undef +; This constant global is externally_initialized, which may modify the value +; between its static const initializer and any code in this module being run, so +; the read from it cannot be const propagated +@c = internal externally_initialized constant i32 42 define void @foo() { ; CHECK-LABEL: foo @@ -35,3 +40,11 @@ entry: %val = load i32, ptr @b ret i32 %val } + +define i32 @bam() { +; CHECK-CONSTANT-LABEL: bam +entry: +; CHECK-CONSTANT: %val = load i32, ptr @c + %val = load i32, ptr @c + ret i32 %val +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits