Author: Nick Sarnie Date: 2025-10-22T14:47:29Z New Revision: f6ba21389fb0daad19bbb36a60cbfb4e7ef4efa2
URL: https://github.com/llvm/llvm-project/commit/f6ba21389fb0daad19bbb36a60cbfb4e7ef4efa2 DIFF: https://github.com/llvm/llvm-project/commit/f6ba21389fb0daad19bbb36a60cbfb4e7ef4efa2.diff LOG: [clang] Fix inconsistencies with the device_kernel attr on different targets (#161905) The original [change](https://github.com/llvm/llvm-project/pull/137882) unifying the device kernel attributes had some inexplicable behavior, such as `amdgpu_kernel` resulting in a function ending up with the `spir_kernel` CC but `nvptx_kernel` not doing the same, both cases compiling for SPIR. There was also a [crash](https://github.com/llvm/llvm-project/issues/161077). `sycl_kernel` is now separated out from `device_kernel`, but still there was some weird behavior for the remaining spellings. For the target-specific spellings (`nvptx_kernel` and `amdgpu_kernel`), while not technically required, we warn and ignore the attribute if the spelling doesn't match the target because it's weird from the user's point of view to allow it. Also we make sure that any valid usage actually applies the CC to the generated `llvm:Function`. This worked for `NVPTX` already but was missing for `SPIR/SPIR-V` and `AMDGPU`, it needs to be explicitly done in `TargetInfo`. This allows us to remove the `amdgpu_kernel` specific handing we had. That special handling was previously required because it was the only variation that was allowed on a type, and thus had a separate way to propagate the CC. These issues were reported [here](https://github.com/llvm/llvm-project/issues/161077) and [here](https://github.com/llvm/llvm-project/pull/161349). Closes: https://github.com/llvm/llvm-project/issues/161077 --------- Signed-off-by: Sarnie, Nick <[email protected]> Added: clang/test/Sema/callingconv-devicekernel.cpp Modified: clang/docs/ReleaseNotes.rst clang/include/clang/Basic/Attr.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/AST/TypePrinter.cpp clang/lib/Basic/Targets/NVPTX.h clang/lib/CodeGen/Targets/AMDGPU.cpp clang/lib/CodeGen/Targets/NVPTX.cpp clang/lib/CodeGen/Targets/SPIR.cpp clang/lib/Sema/SemaDeclAttr.cpp clang/lib/Sema/SemaType.cpp clang/test/Sema/callingconv.c Removed: ################################################################################ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index fe77f917bb801..e6e33e7a9a280 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -447,6 +447,7 @@ Bug Fixes to Attribute Support - Using ``[[gnu::cleanup(some_func)]]`` where some_func is annotated with ``[[gnu::error("some error")]]`` now correctly triggers an error. (#GH146520) - Fix a crash when the function name is empty in the `swift_name` attribute. (#GH157075) +- Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905) Bug Fixes to C++ Support ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index eb48a0c01fd1e..b320f4bedba0c 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1623,7 +1623,7 @@ def SYCLKernel : InheritableAttr { let Documentation = [SYCLKernelDocs]; } -def DeviceKernel : DeclOrTypeAttr { +def DeviceKernel : InheritableAttr { let Spellings = [Clang<"device_kernel">, Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">, CustomKeyword<"__kernel">, CustomKeyword<"kernel">]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5ff4cc4b833d9..20b499462ae94 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -4126,6 +4126,9 @@ def warn_missing_sdksettings_for_availability_checking : Warning< "%0 availability is ignored without a valid 'SDKSettings.json' in the SDK">, InGroup<DiagGroup<"ignored-availability-without-sdk-settings">>; +def err_hidden_device_kernel + : Error<"%0 is specified as a device kernel but it is not externally visible">; + // Thread Safety Attributes def warn_thread_attribute_ignored : Warning< "ignoring %0 attribute because its argument is invalid">, diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 2da7789fe8117..c18b2eafc722c 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -2147,9 +2147,6 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, } case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break; case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break; - case attr::DeviceKernel: - OS << T->getAttr()->getSpelling(); - break; case attr::IntelOclBicc: OS << "inteloclbicc"; break; diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 33c29586359be..f5c8396f398aa 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -200,7 +200,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { // a host function. if (HostTarget) return HostTarget->checkCallingConvention(CC); - return CCCR_Warning; + return CC == CC_DeviceKernel ? CCCR_OK : CCCR_Warning; } bool hasBitIntType() const override { return true; } diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 0bc4b4b7025f2..e4ad078dab197 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -439,9 +439,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); - if (FD) + if (FD) { setFunctionDeclAttributes(FD, F, M); - + if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL) + F->setCallingConv(getDeviceKernelCallingConv()); + } if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) F->addFnAttr("amdgpu-ieee", "false"); } @@ -658,7 +660,7 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( // kernel address (only the kernel descriptor). auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, &Mod); - F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); + F->setCallingConv(getDeviceKernelCallingConv()); llvm::AttrBuilder KernelAttrs(C); // FIXME: The invoke isn't applying the right attributes either diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 53f2fc4a040a7..f6715861d91bc 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -264,7 +264,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); if (FD->hasAttr<CUDAGlobalAttr>()) { - F->setCallingConv(llvm::CallingConv::PTX_Kernel); + F->setCallingConv(getDeviceKernelCallingConv()); for (auto IV : llvm::enumerate(FD->parameters())) if (IV.value()->hasAttr<CUDAGridConstantAttr>()) @@ -278,7 +278,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( } // Attach kernel metadata directly if compiling for NVPTX. if (FD->hasAttr<DeviceKernelAttr>()) - F->setCallingConv(llvm::CallingConv::PTX_Kernel); + F->setCallingConv(getDeviceKernelCallingConv()); } void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 80e096ecf5ae9..15d0b353d748c 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -64,6 +64,8 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; }; class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { public: @@ -268,6 +270,22 @@ CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::ConstantPointerNull::get(NPT), PT); } +void CommonSPIRTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (M.getLangOpts().OpenCL || GV->isDeclaration()) + return; + + const FunctionDecl *FD = dyn_cast<FunctionDecl>(D); + if (!FD) + return; + + llvm::Function *F = dyn_cast<llvm::Function>(GV); + assert(F && "Expected GlobalValue to be a Function"); + + if (FD->hasAttr<DeviceKernelAttr>()) + F->setCallingConv(getDeviceKernelCallingConv()); +} + LangAS SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const { @@ -292,19 +310,23 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, void SPIRVTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (!M.getLangOpts().HIP || - M.getTarget().getTriple().getVendor() != llvm::Triple::AMD) - return; if (GV->isDeclaration()) return; - auto F = dyn_cast<llvm::Function>(GV); - if (!F) + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) return; - auto FD = dyn_cast_or_null<FunctionDecl>(D); - if (!FD) + llvm::Function *F = dyn_cast<llvm::Function>(GV); + assert(F && "Expected GlobalValue to be a Function"); + + if (FD->hasAttr<DeviceKernelAttr>()) + F->setCallingConv(getDeviceKernelCallingConv()); + + if (!M.getLangOpts().HIP || + M.getTarget().getTriple().getVendor() != llvm::Triple::AMD) return; + if (!FD->hasAttr<CUDAGlobalAttr>()) return; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 9475b8a684082..964a2a791e18f 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5206,16 +5206,36 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { const auto *FD = dyn_cast_or_null<FunctionDecl>(D); bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate(); - if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) { + llvm::Triple Triple = S.getASTContext().getTargetInfo().getTriple(); + const LangOptions &LangOpts = S.getLangOpts(); + // OpenCL has its own error messages. + if (!LangOpts.OpenCL && FD && !FD->isExternallyVisible()) { + S.Diag(AL.getLoc(), diag::err_hidden_device_kernel) << FD; + AL.setInvalid(); + return; + } + if (Triple.isNVPTX()) { handleGlobalAttr(S, D, AL); } else { // OpenCL C++ will throw a more specific error. - if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) { + if (!LangOpts.OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) { S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str) << AL << AL.isRegularKeywordAttribute() << "functions"; + AL.setInvalid(); + return; } handleSimpleAttribute<DeviceKernelAttr>(S, D, AL); } + // TODO: isGPU() should probably return true for SPIR. + bool TargetDeviceEnvironment = Triple.isGPU() || Triple.isSPIR() || + LangOpts.isTargetDevice() || LangOpts.OpenCL; + if (!TargetDeviceEnvironment) { + S.Diag(AL.getLoc(), diag::warn_cconv_unsupported) + << AL << (int)Sema::CallingConventionIgnoredReason::ForThisTarget; + AL.setInvalid(); + return; + } + // Make sure we validate the CC with the target // and warn/error if necessary. handleCallConvAttr(S, D, AL); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 7c1fb12a549e6..280b3c92cce14 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -134,7 +134,6 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_VectorCall: \ case ParsedAttr::AT_AArch64VectorPcs: \ case ParsedAttr::AT_AArch64SVEPcs: \ - case ParsedAttr::AT_DeviceKernel: \ case ParsedAttr::AT_MSABI: \ case ParsedAttr::AT_SysVABI: \ case ParsedAttr::AT_Pcs: \ @@ -3786,7 +3785,8 @@ static CallingConv getCCForDeclaratorChunk( } } } - for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { + for (const ParsedAttr &AL : llvm::concat<ParsedAttr>( + D.getDeclSpec().getAttributes(), D.getAttributes())) { if (AL.getKind() == ParsedAttr::AT_DeviceKernel) { CC = CC_DeviceKernel; break; @@ -7569,8 +7569,6 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr); case ParsedAttr::AT_ArmStreaming: return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr); - case ParsedAttr::AT_DeviceKernel: - return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr); case ParsedAttr::AT_Pcs: { // The attribute may have had a fixit applied where we treated an // identifier as a string literal. The contents of the string are valid, @@ -8809,16 +8807,6 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State, } } -static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) { - // The DeviceKernel attribute is shared for many targets, and - // it is only allowed to be a type attribute with the AMDGPU - // spelling, so skip processing the attr as a type attr - // unless it has that spelling. - if (Attr.getKind() != ParsedAttr::AT_DeviceKernel) - return true; - return DeviceKernelAttr::isAMDGPUSpelling(Attr); -} - static void processTypeAttrs(TypeProcessingState &state, QualType &type, TypeAttrLocation TAL, const ParsedAttributesView &attrs, @@ -9072,8 +9060,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, break; [[fallthrough]]; FUNCTION_TYPE_ATTRS_CASELIST: - if (!isMultiSubjectAttrAllowedOnType(attr)) - break; attr.setUsedAsTypeAttr(); diff --git a/clang/test/Sema/callingconv-devicekernel.cpp b/clang/test/Sema/callingconv-devicekernel.cpp new file mode 100644 index 0000000000000..f5da873fb68bd --- /dev/null +++ b/clang/test/Sema/callingconv-devicekernel.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple spir64 -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple spirv64 -fsyntax-only -verify %s + +[[clang::device_kernel]] void kernel1() {} + +namespace { +[[clang::device_kernel]] void kernel2() {} // expected-error {{'kernel2' is specified as a device kernel but it is not externally visible}} +} + +namespace ns { + [[clang::device_kernel]] void kernel3() {} +} + +[[clang::device_kernel]] static void kernel4() {} // expected-error {{'kernel4' is specified as a device kernel but it is not externally visible}} diff --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c index f0b8b80a32974..28342b56af39a 100644 --- a/clang/test/Sema/callingconv.c +++ b/clang/test/Sema/callingconv.c @@ -55,6 +55,10 @@ int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aar int __attribute__((aarch64_sve_pcs)) aasvepcs(void); // expected-warning {{'aarch64_sve_pcs' calling convention is not supported for this target}} int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning {{'amdgpu_kernel' calling convention is not supported for this target}} +int __attribute__((device_kernel)) device_kernel(void) { // expected-warning {{'device_kernel' calling convention is not supported for this target}} +} +int __attribute__((sycl_kernel)) sycl_kernel(void) { // expected-warning {{'sycl_kernel' attribute ignored}} +} // PR6361 void ctest3(); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
