tra created this revision. tra added reviewers: jingyue, jlebar, rnk. tra added a subscriber: cfe-commits.
While __shared__ variables look like any other variable with a static storage class to compiler, they behave differently on device side. * one instance is created per block of GPUS, so standard "initialize once using guard variable" model does not quite work. * lifetime of the variables ends when the __global__ function exits. Again, it does not fit current assumption about static local vars as we will need to init them again if that function is called again. * with that in mind, deinitialization on app exit does not work either as the variable no longer exists past its kernel's exit. nvcc takes a rather dangerous shortcut and allows non-empty constructors for local __static__ variables. It calls initializer on every entry into the scope and produces a warning that there's going to be a data race as there will be many kernels doing init on many instances of that __shared__ variable. It also calls destructors on exit from the scope. Now, imagine recursive call of a function with a local __static__ variable... Until we figure out better way to deal with this, clang will only allow empty constructors for local __shared__ variables in a way identical to restrictions imposed on dynamic initializers for global variables. http://reviews.llvm.org/D20039 Files: lib/CodeGen/CGDecl.cpp lib/Sema/SemaDecl.cpp test/CodeGenCUDA/device-var-init.cu Index: test/CodeGenCUDA/device-var-init.cu =================================================================== --- test/CodeGenCUDA/device-var-init.cu +++ test/CodeGenCUDA/device-var-init.cu @@ -63,6 +63,8 @@ // static in-class field initializer. NVCC does not allow it, but // clang generates static initializer for this, so we'll accept it. +// We still can't use it on __shared__ vars as they don't allow *any* +// initializers. struct NCFS { int ncfs = 3; }; @@ -367,8 +369,13 @@ T_B_NEC t_b_nec; T_F_NEC t_f_nec; T_FA_NEC t_fa_nec; + static __shared__ EC s_ec; + static __shared__ ETC s_etc; +#if ERROR_CASE + static __shared__ NCFS s_ncfs; + // expected-error@-1 {{initialization is not supported for __shared__ variables.}} static __shared__ UC s_uc; -#if ERROR_CASE + // expected-error@-1 {{initialization is not supported for __shared__ variables.}} static __device__ int ds; // expected-error@-1 {{Within a __device__/__global__ function, only __shared__ variables may be marked "static"}} static __constant__ int dc; @@ -394,7 +401,8 @@ // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec) -// CHECK: call void @_ZN2UCC1Ev(%struct.UC* addrspacecast (%struct.UC addrspace(3)* @_ZZ2dfvE4s_uc to %struct.UC*)) +// CHECK-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*)) +// CHECK-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*)) // CHECK: ret void // We should not emit global init function. Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -10413,13 +10413,15 @@ // Perform check for initializers of device-side global variables. // CUDA allows empty constructors as initializers (see E.2.3.1, CUDA // 7.5). CUDA also allows constant initializers for __constant__ and - // __device__ variables. + // __device__ variables. We also must have the same checks applied + // to all __shared__ variables whether they are local or + // not. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { const Expr *Init = VD->getInit(); - const bool IsGlobal = VD->hasGlobalStorage() && !VD->isStaticLocal(); - if (Init && IsGlobal && + if (Init && VD->hasGlobalStorage() && (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || VD->hasAttr<CUDASharedAttr>())) { + assert((!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>())); bool AllowedInit = false; if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) AllowedInit = Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -371,8 +371,15 @@ llvm::GlobalVariable *var = cast<llvm::GlobalVariable>(addr->stripPointerCasts()); + + // CUDA's local and local static __shared__ variables should not + // have any non-empty initializers which is ensured by Sema. + // Whatever initializer such variable may have when it gets here is + // a no-op and should not be emitted. + bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + D.hasAttr<CUDASharedAttr>(); // If this value has an initializer, emit it. - if (D.getInit()) + if (D.getInit() && !isCudaSharedVar) var = AddInitializerToStaticVarDecl(D, var); var->setAlignment(alignment.getQuantity()); @@ -1874,4 +1881,3 @@ return; getOpenMPRuntime().emitUserDefinedReduction(CGF, D); } -
Index: test/CodeGenCUDA/device-var-init.cu =================================================================== --- test/CodeGenCUDA/device-var-init.cu +++ test/CodeGenCUDA/device-var-init.cu @@ -63,6 +63,8 @@ // static in-class field initializer. NVCC does not allow it, but // clang generates static initializer for this, so we'll accept it. +// We still can't use it on __shared__ vars as they don't allow *any* +// initializers. struct NCFS { int ncfs = 3; }; @@ -367,8 +369,13 @@ T_B_NEC t_b_nec; T_F_NEC t_f_nec; T_FA_NEC t_fa_nec; + static __shared__ EC s_ec; + static __shared__ ETC s_etc; +#if ERROR_CASE + static __shared__ NCFS s_ncfs; + // expected-error@-1 {{initialization is not supported for __shared__ variables.}} static __shared__ UC s_uc; -#if ERROR_CASE + // expected-error@-1 {{initialization is not supported for __shared__ variables.}} static __device__ int ds; // expected-error@-1 {{Within a __device__/__global__ function, only __shared__ variables may be marked "static"}} static __constant__ int dc; @@ -394,7 +401,8 @@ // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec) -// CHECK: call void @_ZN2UCC1Ev(%struct.UC* addrspacecast (%struct.UC addrspace(3)* @_ZZ2dfvE4s_uc to %struct.UC*)) +// CHECK-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*)) +// CHECK-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*)) // CHECK: ret void // We should not emit global init function. Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -10413,13 +10413,15 @@ // Perform check for initializers of device-side global variables. // CUDA allows empty constructors as initializers (see E.2.3.1, CUDA // 7.5). CUDA also allows constant initializers for __constant__ and - // __device__ variables. + // __device__ variables. We also must have the same checks applied + // to all __shared__ variables whether they are local or + // not. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { const Expr *Init = VD->getInit(); - const bool IsGlobal = VD->hasGlobalStorage() && !VD->isStaticLocal(); - if (Init && IsGlobal && + if (Init && VD->hasGlobalStorage() && (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || VD->hasAttr<CUDASharedAttr>())) { + assert((!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>())); bool AllowedInit = false; if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) AllowedInit = Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -371,8 +371,15 @@ llvm::GlobalVariable *var = cast<llvm::GlobalVariable>(addr->stripPointerCasts()); + + // CUDA's local and local static __shared__ variables should not + // have any non-empty initializers which is ensured by Sema. + // Whatever initializer such variable may have when it gets here is + // a no-op and should not be emitted. + bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + D.hasAttr<CUDASharedAttr>(); // If this value has an initializer, emit it. - if (D.getInit()) + if (D.getInit() && !isCudaSharedVar) var = AddInitializerToStaticVarDecl(D, var); var->setAlignment(alignment.getQuantity()); @@ -1874,4 +1881,3 @@ return; getOpenMPRuntime().emitUserDefinedReduction(CGF, D); } -
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits