yaxunl created this revision.
yaxunl added a reviewer: rjmccall.
amdgcn does not support zeroinitializer for CUDS shared variable.
Patch by Greg Rodgers.
Lit test added by Yaxun Liu.
https://reviews.llvm.org/D44985
Files:
lib/CodeGen/CGDecl.cpp
test/CodeGenCUDA/device-var-init.cu
Index: test/CodeGenCUDA/device-var-init.cu
===================================================================
--- test/CodeGenCUDA/device-var-init.cu
+++ test/CodeGenCUDA/device-var-init.cu
@@ -1,10 +1,14 @@
// REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
// Make sure we don't allow dynamic initialization for device
// variables, but accept empty constructors allowed by CUDA.
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \
-// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,NVPTX %s
+
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
#ifdef __clang__
#include "Inputs/cuda.h"
@@ -105,68 +109,116 @@
__constant__ EC_I_EC c_ec_i_ec;
// CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
+// NVPTX: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC zeroinitializer
+// AMDGCN: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef
+// NVPTX: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC zeroinitializer
+// AMDGCN: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef
+
// We should not emit global initializers for device-side variables.
// CHECK-NOT: @__cxx_global_var_init
// Make sure that initialization restrictions do not apply to local
// variables.
__device__ void df() {
+ // AMDGCN: %[[ec:.*]] = addrspacecast %struct.EC addrspace(5)* %ec to %struct.EC*
+ // AMDGCN: %[[ed:.*]] = addrspacecast %struct.ED addrspace(5)* %ed to %struct.ED*
+ // AMDGCN: %[[ecd:.*]] = addrspacecast %struct.ECD addrspace(5)* %ecd to %struct.ECD*
+ // AMDGCN: %[[etc:.*]] = addrspacecast %struct.ETC addrspace(5)* %etc to %struct.ETC*
+ // AMDGCN: %[[uc:.*]] = addrspacecast %struct.UC addrspace(5)* %uc to %struct.UC*
+ // AMDGCN: %[[ud:.*]] = addrspacecast %struct.UD addrspace(5)* %ud to %struct.UD*
+ // AMDGCN: %[[eci:.*]] = addrspacecast %struct.ECI addrspace(5)* %eci to %struct.ECI*
+ // AMDGCN: %[[nec:.*]] = addrspacecast %struct.NEC addrspace(5)* %nec to %struct.NEC*
+ // AMDGCN: %[[ned:.*]] = addrspacecast %struct.NED addrspace(5)* %ned to %struct.NED*
+ // AMDGCN: %[[ncv:.*]] = addrspacecast %struct.NCV addrspace(5)* %ncv to %struct.NCV*
+ // AMDGCN: %[[vd:.*]] = addrspacecast %struct.VD addrspace(5)* %vd to %struct.VD*
+ // AMDGCN: %[[ncf:.*]] = addrspacecast %struct.NCF addrspace(5)* %ncf to %struct.NCF*
+ // AMDGCN: %[[ncfs:.*]] = addrspacecast %struct.NCFS addrspace(5)* %ncfs to %struct.NCFS*
+ // AMDGCN: %[[utc:.*]] = addrspacecast %struct.UTC addrspace(5)* %utc to %struct.UTC*
+ // AMDGCN: %[[netc:.*]] = addrspacecast %struct.NETC addrspace(5)* %netc to %struct.NETC*
+ // AMDGCN: %[[ec_i_ec:.*]] = addrspacecast %struct.EC_I_EC addrspace(5)* %ec_i_ec to %struct.EC_I_EC*
+ // AMDGCN: %[[ec_i_ec1:.*]] = addrspacecast %struct.EC_I_EC1 addrspace(5)* %ec_i_ec1 to %struct.EC_I_EC1*
+ // AMDGCN: %[[t_v_t:.*]] = addrspacecast %struct.T_V_T addrspace(5)* %t_v_t to %struct.T_V_T*
+ // AMDGCN: %[[t_b_nec:.*]] = addrspacecast %struct.T_B_NEC addrspace(5)* %t_b_nec to %struct.T_B_NEC*
+ // AMDGCN: %[[t_f_nec:.*]] = addrspacecast %struct.T_F_NEC addrspace(5)* %t_f_nec to %struct.T_F_NEC*
+ // AMDGCN: %[[t_fa_nec:.*]] = addrspacecast %struct.T_FA_NEC addrspace(5)* %t_fa_nec to %struct.T_FA_NEC*
+ // AMDGCN: %[[t_b_ned:.*]] = addrspacecast %struct.T_B_NED addrspace(5)* %t_b_ned to %struct.T_B_NED*
+ // AMDGCN: %[[t_f_ned:.*]] = addrspacecast %struct.T_F_NED addrspace(5)* %t_f_ned to %struct.T_F_NED*
+ // AMDGCN: %[[t_fa_ned:.*]] = addrspacecast %struct.T_FA_NED addrspace(5)* %t_fa_ned to %struct.T_FA_NED*
+
T t;
// CHECK-NOT: call
EC ec;
- // CHECK: call void @_ZN2ECC1Ev(%struct.EC* %ec)
+ // NVPTX: call void @_ZN2ECC1Ev(%struct.EC* %ec)
+ // AMDGCN: call void @_ZN2ECC1Ev(%struct.EC* %[[ec]])
ED ed;
// CHECK-NOT: call
ECD ecd;
- // CHECK: call void @_ZN3ECDC1Ev(%struct.ECD* %ecd)
+ // NVPTX: call void @_ZN3ECDC1Ev(%struct.ECD* %ecd)
+ // AMDGCN: call void @_ZN3ECDC1Ev(%struct.ECD* %[[ecd]])
ETC etc;
- // CHECK: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc)
+ // NVPTX: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc)
+ // AMDGCN: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %[[etc]])
UC uc;
// undefined constructor -- not allowed
- // CHECK: call void @_ZN2UCC1Ev(%struct.UC* %uc)
+ // NVPTX: call void @_ZN2UCC1Ev(%struct.UC* %uc)
+ // AMDGCN: call void @_ZN2UCC1Ev(%struct.UC* %[[uc]])
UD ud;
// undefined destructor -- not allowed
// CHECK-NOT: call
ECI eci;
// empty constructor w/ initializer list -- not allowed
- // CHECK: call void @_ZN3ECIC1Ev(%struct.ECI* %eci)
+ // NVPTX: call void @_ZN3ECIC1Ev(%struct.ECI* %eci)
+ // AMDGCN: call void @_ZN3ECIC1Ev(%struct.ECI* %[[eci]])
NEC nec;
// non-empty constructor -- not allowed
- // CHECK: call void @_ZN3NECC1Ev(%struct.NEC* %nec)
+ // NVPTX: call void @_ZN3NECC1Ev(%struct.NEC* %nec)
+ // AMDGCN: call void @_ZN3NECC1Ev(%struct.NEC* %[[nec]])
// non-empty destructor -- not allowed
NED ned;
// no-constructor, virtual method -- not allowed
- // CHECK: call void @_ZN3NCVC1Ev(%struct.NCV* %ncv)
+ // NVPTX: call void @_ZN3NCVC1Ev(%struct.NCV* %ncv)
+ // AMDGCN: call void @_ZN3NCVC1Ev(%struct.NCV* %[[ncv]])
NCV ncv;
// CHECK-NOT: call
VD vd;
- // CHECK: call void @_ZN2VDC1Ev(%struct.VD* %vd)
+ // NVPTX: call void @_ZN2VDC1Ev(%struct.VD* %vd)
+ // AMDGCN: call void @_ZN2VDC1Ev(%struct.VD* %[[vd]])
NCF ncf;
- // CHECK: call void @_ZN3NCFC1Ev(%struct.NCF* %ncf)
+ // NVPTX: call void @_ZN3NCFC1Ev(%struct.NCF* %ncf)
+ // AMDGCN: call void @_ZN3NCFC1Ev(%struct.NCF* %[[ncf]])
NCFS ncfs;
- // CHECK: call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs)
+ // NVPTX: call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs)
+ // AMDGCN: call void @_ZN4NCFSC1Ev(%struct.NCFS* %[[ncfs]])
UTC utc;
- // CHECK: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc)
+ // NVPTX: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc)
+ // AMDGCN: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %[[utc]])
NETC netc;
- // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
+ // NVPTX: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
+ // AMDGCN: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %[[netc]])
T_B_T t_b_t;
// CHECK-NOT: call
T_F_T t_f_t;
// CHECK-NOT: call
T_FA_T t_fa_t;
// CHECK-NOT: call
EC_I_EC ec_i_ec;
- // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
+ // NVPTX: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
+ // AMDGCN: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %[[ec_i_ec]])
EC_I_EC1 ec_i_ec1;
- // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
+ // NVPTX: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
+ // AMDGCN: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %[[ec_i_ec1]])
T_V_T t_v_t;
- // CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
+ // NVPTX: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
+ // AMDGCN: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %[[t_v_t]])
T_B_NEC t_b_nec;
- // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
+ // NVPTX: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
+ // AMDGCN: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %[[t_b_nec]])
T_F_NEC t_f_nec;
- // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
+ // NVPTX: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
+ // AMDGCN: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %[[t_f_nec]])
T_FA_NEC t_fa_nec;
- // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
+ // NVPTX: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
+ // AMDGCN: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %[[t_fa_nec]])
T_B_NED t_b_ned;
// CHECK-NOT: call
T_F_NED t_f_ned;
@@ -182,14 +234,23 @@
df(); // CHECK: call void @_Z2dfv()
// Verify that we only call non-empty destructors
- // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned)
- // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned)
- // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned)
- // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd)
- // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned)
- // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud)
- // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd)
- // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed)
+ // NVPTX-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned)
+ // NVPTX-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned)
+ // NVPTX-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned)
+ // NVPTX-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd)
+ // NVPTX-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned)
+ // NVPTX-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud)
+ // NVPTX-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd)
+ // NVPTX-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed)
+
+ // AMDGCN-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %[[t_fa_ned]])
+ // AMDGCN-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %[[t_f_ned]])
+ // AMDGCN-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %[[t_b_ned]])
+ // AMDGCN-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %[[vd]])
+ // AMDGCN-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %[[ned]])
+ // AMDGCN-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %[[ud]])
+ // AMDGCN-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %[[ecd]])
+ // AMDGCN-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %[[ed]])
// CHECK-NEXT: ret void
}
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -405,6 +405,13 @@
if (D.getInit() && !isCudaSharedVar)
var = AddInitializerToStaticVarDecl(D, var);
+ // FIXME: Currently, the AMDGPU BE does not support zeroinitializer for AS3
+ // Remove the following three lines when it does.
+ if (isCudaSharedVar && (getContext().getTargetInfo().getTriple().getArch() ==
+ llvm::Triple::amdgcn))
+ var->setInitializer(
+ llvm::UndefValue::get(var->getType()->getElementType()));
+
var->setAlignment(alignment.getQuantity());
if (D.hasAttr<AnnotateAttr>())
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits