Author: Nick Sarnie Date: 2025-06-05T14:15:38Z New Revision: 3b9ebe92011b033523217a9b9a2f03f4c8c37aab
URL: https://github.com/llvm/llvm-project/commit/3b9ebe92011b033523217a9b9a2f03f4c8c37aab DIFF: https://github.com/llvm/llvm-project/commit/3b9ebe92011b033523217a9b9a2f03f4c8c37aab.diff LOG: [clang] Simplify device kernel attributes (#137882) We have multiple different attributes in clang representing device kernels for specific targets/languages. Refactor them into one attribute with different spellings to make it more easily scalable for new languages/targets. --------- Signed-off-by: Sarnie, Nick <nick.sar...@intel.com> Added: Modified: clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp clang/include/clang/AST/GlobalDecl.h clang/include/clang/Basic/Attr.td clang/include/clang/Basic/AttrDocs.td clang/include/clang/Basic/Specifiers.h clang/lib/AST/Decl.cpp clang/lib/AST/ItaniumMangle.cpp clang/lib/AST/MicrosoftMangle.cpp clang/lib/AST/Type.cpp clang/lib/AST/TypePrinter.cpp clang/lib/Basic/Targets/AArch64.cpp clang/lib/Basic/Targets/AMDGPU.h clang/lib/Basic/Targets/ARM.cpp clang/lib/Basic/Targets/BPF.h clang/lib/Basic/Targets/Mips.cpp clang/lib/Basic/Targets/SPIR.h clang/lib/Basic/Targets/SystemZ.h clang/lib/Basic/Targets/X86.h clang/lib/CodeGen/CGCall.cpp clang/lib/CodeGen/CGDebugInfo.cpp clang/lib/CodeGen/CGExpr.cpp clang/lib/CodeGen/CodeGenFunction.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/CodeGen/TargetInfo.cpp clang/lib/CodeGen/Targets/AMDGPU.cpp clang/lib/CodeGen/Targets/NVPTX.cpp clang/lib/CodeGen/Targets/SPIR.cpp clang/lib/CodeGen/Targets/TCE.cpp clang/lib/Sema/SemaDecl.cpp clang/lib/Sema/SemaDeclAttr.cpp clang/lib/Sema/SemaSYCL.cpp clang/lib/Sema/SemaTemplateInstantiateDecl.cpp clang/lib/Sema/SemaType.cpp clang/test/Misc/pragma-attribute-supported-attributes-list.test clang/tools/libclang/CXType.cpp llvm/include/llvm/BinaryFormat/Dwarf.def llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll Removed: ################################################################################ diff --git a/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp b/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp index c5da66a1f28b6..c21b7cab1b8da 100644 --- a/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp @@ -16,14 +16,14 @@ namespace clang::tidy::altera { void SingleWorkItemBarrierCheck::registerMatchers(MatchFinder *Finder) { // Find any function that calls barrier but does not call an ID function. - // hasAttr(attr::Kind::OpenCLKernel) restricts it to only kernel functions. + // hasAttr(attr::Kind::DeviceKernel) restricts it to only kernel functions. // FIXME: Have it accept all functions but check for a parameter that gets an // ID from one of the four ID functions. Finder->addMatcher( // Find function declarations... functionDecl( - // That are OpenCL kernels... - hasAttr(attr::Kind::OpenCLKernel), + // That are device kernels... + hasAttr(attr::Kind::DeviceKernel), // And call a barrier function (either 1.x or 2.x version)... forEachDescendant(callExpr(callee(functionDecl(hasAnyName( "barrier", "work_group_barrier")))) diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h index baf5371d2682d..97caff0198cb0 100644 --- a/clang/include/clang/AST/GlobalDecl.h +++ b/clang/include/clang/AST/GlobalDecl.h @@ -164,7 +164,7 @@ class GlobalDecl { } static KernelReferenceKind getDefaultKernelReference(const FunctionDecl *D) { - return (D->hasAttr<OpenCLKernelAttr>() || D->getLangOpts().CUDAIsDevice) + return (D->hasAttr<DeviceKernelAttr>() || D->getLangOpts().CUDAIsDevice) ? KernelReferenceKind::Kernel : KernelReferenceKind::Stub; } diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index c7834d491f453..f889e41c8699f 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -196,8 +196,10 @@ def FunctionPointer : SubsetSubject<DeclBase, "functions pointers">; def OpenCLKernelFunction - : SubsetSubject<Function, [{S->hasAttr<OpenCLKernelAttr>()}], - "kernel functions">; + : SubsetSubject<Function, [{S->getASTContext().getLangOpts().OpenCL && + DeviceKernelAttr::isOpenCLSpelling( + S->getAttr<DeviceKernelAttr>()}], + "kernel functions">; // HasFunctionProto is a more strict version of FunctionLike, so it should // never be specified in a Subjects list along with FunctionLike (due to the @@ -1515,12 +1517,6 @@ def CUDAGridConstant : InheritableAttr { let Documentation = [CUDAGridConstantAttrDocs]; } -def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> { - let Spellings = [Clang<"nvptx_kernel">]; - let Subjects = SubjectList<[Function]>; - let Documentation = [Undocumented]; -} - def HIPManaged : InheritableAttr { let Spellings = [GNU<"managed">, Declspec<"__managed__">]; let Subjects = SubjectList<[Var]>; @@ -1555,11 +1551,52 @@ def CUDAShared : InheritableAttr { } def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>; -def SYCLKernel : InheritableAttr { - let Spellings = [Clang<"sycl_kernel">]; - let Subjects = SubjectList<[FunctionTmpl]>; - let LangOpts = [SYCLDevice]; - let Documentation = [SYCLKernelDocs]; +def DeviceKernel : DeclOrTypeAttr { + let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">, + Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">, + CustomKeyword<"__kernel">, CustomKeyword<"kernel">]; + let Documentation = [DeviceKernelDocs]; + let AdditionalMembers = + [{ + static inline bool isAMDGPUSpelling(const AttributeCommonInfo& A) { + return A.getAttributeSpellingListIndex() == GNU_amdgpu_kernel || + A.getAttributeSpellingListIndex() == CXX11_clang_amdgpu_kernel || + A.getAttributeSpellingListIndex() == C23_clang_amdgpu_kernel; + } + static inline bool isAMDGPUSpelling(const AttributeCommonInfo* A) { + if(!A) return false; + return isAMDGPUSpelling(*A); + } + static inline bool isNVPTXSpelling(const AttributeCommonInfo& A) { + return A.getAttributeSpellingListIndex() == GNU_nvptx_kernel || + A.getAttributeSpellingListIndex() == CXX11_clang_nvptx_kernel || + A.getAttributeSpellingListIndex() == C23_clang_nvptx_kernel; + } + static inline bool isNVPTXSpelling(const AttributeCommonInfo* A) { + if(!A) return false; + return isNVPTXSpelling(*A); + } + static inline bool isSYCLSpelling(const AttributeCommonInfo& A) { + return A.getAttributeSpellingListIndex() == GNU_sycl_kernel || + A.getAttributeSpellingListIndex() == CXX11_clang_sycl_kernel || + A.getAttributeSpellingListIndex() == C23_clang_sycl_kernel; + } + static inline bool isSYCLSpelling(const AttributeCommonInfo* A) { + if(!A) return false; + return isSYCLSpelling(*A); + } + static inline bool isOpenCLSpelling(const AttributeCommonInfo& A) { + // Tablegen trips underscores from spellings to build the spelling + // list, but here we have the same spelling with unscores and without, + // so handle that case manually. + return A.getAttributeSpellingListIndex() == Keyword_kernel || + A.getAttrName()->getName() == "kernel"; + } + static inline bool isOpenCLSpelling(const AttributeCommonInfo* A) { + if (!A) return false; + return isOpenCLSpelling(*A); + } +}]; } def SYCLKernelEntryPoint : InheritableAttr { @@ -1625,15 +1662,6 @@ def Allocating : TypeAttr { let Documentation = [AllocatingDocs]; } -// Similar to CUDA, OpenCL attributes do not receive a [[]] spelling because -// the specification does not expose them with one currently. -def OpenCLKernel : InheritableAttr { - let Spellings = [CustomKeyword<"__kernel">, CustomKeyword<"kernel">]; - let Subjects = SubjectList<[Function], ErrorDiag>; - let Documentation = [Undocumented]; - let SimpleHandler = 1; -} - def OpenCLUnrollHint : StmtAttr { let Spellings = [GNU<"opencl_unroll_hint">]; let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt], @@ -2370,11 +2398,6 @@ def AMDGPUMaxNumWorkGroups : InheritableAttr { let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } -def AMDGPUKernelCall : DeclOrTypeAttr { - let Spellings = [Clang<"amdgpu_kernel">]; - let Documentation = [Undocumented]; -} - def BPFPreserveAccessIndex : InheritableAttr, TargetSpecificAttr<TargetBPF> { let Spellings = [Clang<"preserve_access_index">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 40b9f8142bb69..a16218f038518 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -396,9 +396,13 @@ any option of a multiversioned function is undefined. }]; } -def SYCLKernelDocs : Documentation { +def DeviceKernelDocs : Documentation { let Category = DocCatFunction; + let Heading = "device_kernel, sycl_kernel, nvptx_kernel, amdgpu_kernel, " + "kernel, __kernel"; let Content = [{ +These attributes specify that the function represents a kernel for device offloading. +The specific semantics depend on the offloading language, target, and attribute spelling. The ``sycl_kernel`` attribute specifies that a function template will be used to outline device code and to generate an OpenCL kernel. Here is a code example of the SYCL program, which demonstrates the compiler's diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h index 491badcc804e7..698fd9da5ced1 100644 --- a/clang/include/clang/Basic/Specifiers.h +++ b/clang/include/clang/Basic/Specifiers.h @@ -289,14 +289,13 @@ namespace clang { CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp"))) CC_IntelOclBicc, // __attribute__((intel_ocl_bicc)) CC_SpirFunction, // default for OpenCL functions on SPIR target - CC_OpenCLKernel, // inferred for OpenCL kernels + CC_DeviceKernel, // __attribute__((device_kernel)) CC_Swift, // __attribute__((swiftcall)) CC_SwiftAsync, // __attribute__((swiftasynccall)) CC_PreserveMost, // __attribute__((preserve_most)) CC_PreserveAll, // __attribute__((preserve_all)) CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs)) CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs)) - CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel)) CC_M68kRTD, // __attribute__((m68k_rtd)) CC_PreserveNone, // __attribute__((preserve_none)) CC_RISCVVectorCall, // __attribute__((riscv_vector_cc)) @@ -326,7 +325,7 @@ namespace clang { case CC_X86Pascal: case CC_X86VectorCall: case CC_SpirFunction: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_Swift: case CC_SwiftAsync: case CC_M68kRTD: diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 8425e40567b27..aad2d82401111 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3541,7 +3541,7 @@ bool FunctionDecl::isExternC() const { } bool FunctionDecl::isInExternCContext() const { - if (hasAttr<OpenCLKernelAttr>()) + if (DeviceKernelAttr::isOpenCLSpelling(getAttr<DeviceKernelAttr>())) return true; return getLexicalDeclContext()->isExternCContext(); } @@ -5510,7 +5510,8 @@ FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { } bool FunctionDecl::isReferenceableKernel() const { - return hasAttr<CUDAGlobalAttr>() || hasAttr<OpenCLKernelAttr>(); + return hasAttr<CUDAGlobalAttr>() || + DeviceKernelAttr::isOpenCLSpelling(getAttr<DeviceKernelAttr>()); } BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) { diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index f7c620dc09df7..ecf5be220439b 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -1557,7 +1557,8 @@ void CXXNameMangler::mangleUnqualifiedName( FD && FD->hasAttr<CUDAGlobalAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; bool IsOCLDeviceStub = - FD && FD->hasAttr<OpenCLKernelAttr>() && + FD && + DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; if (IsDeviceStub) mangleDeviceStubName(II); @@ -3532,10 +3533,9 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) { case CC_AAPCS_VFP: case CC_AArch64VectorCall: case CC_AArch64SVEPCS: - case CC_AMDGPUKernelCall: case CC_IntelOclBicc: case CC_SpirFunction: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_M68kRTD: diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index d6339029a65c9..bc47e0506add0 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1164,7 +1164,9 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD, ->hasAttr<CUDAGlobalAttr>())) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; bool IsOCLDeviceStub = - ND && isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>() && + ND && isa<FunctionDecl>(ND) && + DeviceKernelAttr::isOpenCLSpelling( + ND->getAttr<DeviceKernelAttr>()) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; if (IsDeviceStub) mangleSourceName( diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 5c990b1b02bc2..5bb39b12693fb 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3606,14 +3606,12 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) { return "aarch64_vector_pcs"; case CC_AArch64SVEPCS: return "aarch64_sve_pcs"; - case CC_AMDGPUKernelCall: - return "amdgpu_kernel"; case CC_IntelOclBicc: return "intel_ocl_bicc"; case CC_SpirFunction: return "spir_function"; - case CC_OpenCLKernel: - return "opencl_kernel"; + case CC_DeviceKernel: + return "device_kernel"; case CC_Swift: return "swiftcall"; case CC_SwiftAsync: @@ -4328,7 +4326,7 @@ bool AttributedType::isCallingConv() const { case attr::VectorCall: case attr::AArch64VectorPcs: case attr::AArch64SVEPcs: - case attr::AMDGPUKernelCall: + case attr::DeviceKernel: case attr::Pascal: case attr::MSABI: case attr::SysVABI: diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 694cd121a603b..330cfcd962825 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1100,8 +1100,8 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, case CC_AArch64SVEPCS: OS << "__attribute__((aarch64_sve_pcs))"; break; - case CC_AMDGPUKernelCall: - OS << "__attribute__((amdgpu_kernel))"; + case CC_DeviceKernel: + OS << "__attribute__((device_kernel))"; break; case CC_IntelOclBicc: OS << " __attribute__((intel_ocl_bicc))"; @@ -1116,7 +1116,6 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info, OS << " __attribute__((regcall))"; break; case CC_SpirFunction: - case CC_OpenCLKernel: // Do nothing. These CCs are not available as attributes. break; case CC_Swift: @@ -2069,7 +2068,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, } case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break; case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break; - case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break; + case attr::DeviceKernel: + OS << T->getAttr()->getSpelling(); + break; case attr::IntelOclBicc: OS << "inteloclbicc"; break; case attr::PreserveMost: OS << "preserve_most"; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index d0dde3d4ce177..e8abdf9aafd82 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1400,7 +1400,7 @@ AArch64TargetInfo::checkCallingConvention(CallingConv CC) const { case CC_PreserveMost: case CC_PreserveAll: case CC_PreserveNone: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_AArch64VectorCall: case CC_AArch64SVEPCS: case CC_Win64: @@ -1758,7 +1758,7 @@ WindowsARM64TargetInfo::checkCallingConvention(CallingConv CC) const { case CC_X86FastCall: return CCCR_Ignore; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_PreserveNone: diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 8ea544ba28b10..509128f3cf070 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -415,8 +415,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { default: return CCCR_Warning; case CC_C: - case CC_OpenCLKernel: - case CC_AMDGPUKernelCall: + case CC_DeviceKernel: return CCCR_OK; } } diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index bd12350367ce7..65d4ed1e96540 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -1404,7 +1404,7 @@ ARMTargetInfo::checkCallingConvention(CallingConv CC) const { case CC_AAPCS_VFP: case CC_Swift: case CC_SwiftAsync: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; default: return CCCR_Warning; @@ -1479,7 +1479,7 @@ WindowsARMTargetInfo::checkCallingConvention(CallingConv CC) const { case CC_X86VectorCall: return CCCR_Ignore; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_Swift: diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h index d1f68b842348e..d9e5cf4d8a92f 100644 --- a/clang/lib/Basic/Targets/BPF.h +++ b/clang/lib/Basic/Targets/BPF.h @@ -94,7 +94,7 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo { default: return CCCR_Warning; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; } } diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp index d693b19a29025..34837cc363a37 100644 --- a/clang/lib/Basic/Targets/Mips.cpp +++ b/clang/lib/Basic/Targets/Mips.cpp @@ -336,7 +336,7 @@ WindowsMipsTargetInfo::checkCallingConvention(CallingConv CC) const { case CC_X86VectorCall: return CCCR_Ignore; case CC_C: - case CC_OpenCLKernel: + case CC_DeviceKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_Swift: diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 470e578520939..0eaf82eee756b 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -193,7 +193,7 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo { } CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { - return (CC == CC_SpirFunction || CC == CC_OpenCLKernel) ? CCCR_OK + return (CC == CC_SpirFunction || CC == CC_DeviceKernel) ? CCCR_OK : CCCR_Warning; } diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index 6431be0b505ce..1af6122c7048b 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -245,7 +245,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo { switch (CC) { case CC_C: case CC_Swift: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; case CC_SwiftAsync: return CCCR_Error; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index babea81758d52..3d58be8f898c6 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -409,10 +409,11 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { case CC_Swift: case CC_X86Pascal: case CC_IntelOclBicc: - case CC_OpenCLKernel: return CCCR_OK; case CC_SwiftAsync: return CCCR_Error; + case CC_DeviceKernel: + return IsOpenCL ? CCCR_OK : CCCR_Warning; default: return CCCR_Warning; } @@ -440,7 +441,13 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { uint64_t getPointerAlignV(LangAS AddrSpace) const override { return getPointerWidthV(AddrSpace); } + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { + TargetInfo::adjust(Diags, Opts); + IsOpenCL = Opts.OpenCL; + } +private: + bool IsOpenCL = false; }; // X86-32 generic target @@ -786,8 +793,9 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { case CC_PreserveAll: case CC_PreserveNone: case CC_X86RegCall: - case CC_OpenCLKernel: return CCCR_OK; + case CC_DeviceKernel: + return IsOpenCL ? CCCR_OK : CCCR_Warning; default: return CCCR_Warning; } @@ -818,7 +826,6 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { return X86TargetInfo::validateGlobalRegisterVariable(RegName, RegSize, HasSizeMismatch); } - void setMaxAtomicWidth() override { if (hasFeature("cx16")) MaxAtomicInlineWidth = 128; @@ -830,6 +837,14 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { size_t getMaxBitIntWidth() const override { return llvm::IntegerType::MAX_INT_BITS; } + + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { + TargetInfo::adjust(Diags, Opts); + IsOpenCL = Opts.OpenCL; + } + +private: + bool IsOpenCL = false; }; // x86-64 UEFI target @@ -915,7 +930,7 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_64TargetInfo case CC_Swift: case CC_SwiftAsync: case CC_X86RegCall: - case CC_OpenCLKernel: + case CC_DeviceKernel: return CCCR_OK; default: return CCCR_Warning; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index a67b0d8a91afb..46a5d64412275 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -81,12 +81,19 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { return llvm::CallingConv::AArch64_VectorCall; case CC_AArch64SVEPCS: return llvm::CallingConv::AArch64_SVE_VectorCall; - case CC_AMDGPUKernelCall: - return llvm::CallingConv::AMDGPU_KERNEL; case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC; - case CC_OpenCLKernel: - return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); + case CC_DeviceKernel: { + if (CGM.getLangOpts().OpenCL) + return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); + if (CGM.getTriple().isSPIROrSPIRV()) + return llvm::CallingConv::SPIR_KERNEL; + if (CGM.getTriple().isAMDGPU()) + return llvm::CallingConv::AMDGPU_KERNEL; + if (CGM.getTriple().isNVPTX()) + return llvm::CallingConv::PTX_Kernel; + llvm_unreachable("Unknown kernel calling convention"); + } case CC_PreserveMost: return llvm::CallingConv::PreserveMost; case CC_PreserveAll: @@ -284,8 +291,8 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D, if (D->hasAttr<AArch64SVEPcsAttr>()) return CC_AArch64SVEPCS; - if (D->hasAttr<AMDGPUKernelCallAttr>()) - return CC_AMDGPUKernelCall; + if (D->hasAttr<DeviceKernelAttr>()) + return CC_DeviceKernel; if (D->hasAttr<IntelOclBiccAttr>()) return CC_IntelOclBicc; @@ -533,7 +540,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const GlobalDecl GD) { assert(isa<FunctionType>(FTy)); setCUDAKernelCallingConvention(FTy, CGM, FD); - if (FD->hasAttr<OpenCLKernelAttr>() && + if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { const FunctionType *FT = FTy->getAs<FunctionType>(); CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT); @@ -761,7 +768,7 @@ CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType, return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None, argTypes, - FunctionType::ExtInfo(CC_OpenCLKernel), + FunctionType::ExtInfo(CC_DeviceKernel), /*paramInfos=*/{}, RequiredArgs::All); } @@ -2536,7 +2543,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, NumElemsParam); } - if (TargetDecl->hasAttr<OpenCLKernelAttr>() && + if (DeviceKernelAttr::isOpenCLSpelling( + TargetDecl->getAttr<DeviceKernelAttr>()) && CallingConv != CallingConv::CC_C && CallingConv != CallingConv::CC_SpirFunction) { // Check CallingConv to avoid adding uniform-work-group-size attribute to @@ -2919,7 +2927,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // > For arguments to a __kernel function declared to be a pointer to a // > data type, the OpenCL compiler can assume that the pointee is always // > appropriately aligned as required by the data type. - if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() && + if (TargetDecl && + DeviceKernelAttr::isOpenCLSpelling( + TargetDecl->getAttr<DeviceKernelAttr>()) && ParamType->isPointerType()) { QualType PTy = ParamType->getPointeeType(); if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) { diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 7cb52597d9a00..fbcc330aca6bb 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1692,9 +1692,8 @@ static unsigned getDwarfCC(CallingConv CC) { return llvm::dwarf::DW_CC_LLVM_IntelOclBicc; case CC_SpirFunction: return llvm::dwarf::DW_CC_LLVM_SpirFunction; - case CC_OpenCLKernel: - case CC_AMDGPUKernelCall: - return llvm::dwarf::DW_CC_LLVM_OpenCLKernel; + case CC_DeviceKernel: + return llvm::dwarf::DW_CC_LLVM_DeviceKernel; case CC_Swift: return llvm::dwarf::DW_CC_LLVM_Swift; case CC_SwiftAsync: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 5fc98b6a692cc..1099a547caa5a 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5944,7 +5944,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { } static GlobalDecl getGlobalDeclForDirectCall(const FunctionDecl *FD) { - if (FD->hasAttr<OpenCLKernelAttr>()) + if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>())) return GlobalDecl(FD, KernelReferenceKind::Stub); return GlobalDecl(FD); } @@ -6375,7 +6375,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const auto *FnType = cast<FunctionType>(PointeeType); if (const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl); - FD && FD->hasAttr<OpenCLKernelAttr>()) + FD && DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>())) CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FnType); bool CFIUnchecked = diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 2ac7e9d498044..3302abad87d65 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -626,7 +626,7 @@ CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const { void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn) { - if (!FD->hasAttr<OpenCLKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>()) + if (!FD->hasAttr<DeviceKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>()) return; llvm::LLVMContext &Context = getLLVMContext(); @@ -1598,7 +1598,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // Implicit copy-assignment gets the same special treatment as implicit // copy-constructors. emitImplicitAssignmentOperatorBody(Args); - } else if (FD->hasAttr<OpenCLKernelAttr>() && + } else if (DeviceKernelAttr::isOpenCLSpelling( + FD->getAttr<DeviceKernelAttr>()) && GD.getKernelReferenceKind() == KernelReferenceKind::Kernel) { CallArgList CallArgs; for (unsigned i = 0; i < Args.size(); ++i) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 468fc6e0e5c56..84166dd567942 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1913,7 +1913,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, } else if (FD && FD->hasAttr<CUDAGlobalAttr>() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__device_stub__" << II->getName(); - } else if (FD && FD->hasAttr<OpenCLKernelAttr>() && + } else if (FD && + DeviceKernelAttr::isOpenCLSpelling( + FD->getAttr<DeviceKernelAttr>()) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__clang_ocl_kern_imp_" << II->getName(); } else { @@ -3930,7 +3932,8 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast<FunctionDecl>(Global)) { - if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody()) + if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) && + FD->doesThisDeclarationHaveABody()) addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub)); // Update deferred annotations with the latest declaration if the function @@ -4895,7 +4898,7 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable, if (!Ty) { const auto *FD = cast<FunctionDecl>(GD.getDecl()); Ty = getTypes().ConvertType(FD->getType()); - if (FD->hasAttr<OpenCLKernelAttr>() && + if (DeviceKernelAttr::isOpenCLSpelling(FD->getAttr<DeviceKernelAttr>()) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); Ty = getTypes().GetFunctionType(FI); @@ -6195,7 +6198,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, (CodeGenOpts.OptimizationLevel == 0) && !D->hasAttr<MinSizeAttr>(); - if (D->hasAttr<OpenCLKernelAttr>()) { + if (DeviceKernelAttr::isOpenCLSpelling(D->getAttr<DeviceKernelAttr>())) { if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub && !D->hasAttr<NoInlineAttr>() && !Fn->hasFnAttribute(llvm::Attribute::NoInline) && diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 7d176e421ac4e..f3df92c44bb6b 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -191,7 +191,7 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel( auto *F = llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, Name, &CGF.CGM.getModule()); llvm::CallingConv::ID KernelCC = - CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel); + CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_DeviceKernel); F->setCallingConv(KernelCC); llvm::AttrBuilder KernelAttrs(C); diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 452b2e6858673..8660373c3927f 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -337,7 +337,7 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D, return false; return !D->hasAttr<OMPDeclareTargetDeclAttr>() && - (D->hasAttr<OpenCLKernelAttr>() || + (D->hasAttr<DeviceKernelAttr>() || (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || (isa<VarDecl>(D) && (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || @@ -350,7 +350,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; const bool IsOpenCLKernel = - M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>(); + M.getLangOpts().OpenCL && FD->hasAttr<DeviceKernelAttr>(); const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>(); const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); @@ -572,7 +572,7 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const { void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention( const FunctionType *&FT) const { FT = getABIInfo().getContext().adjustFunctionType( - FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); + FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel)); } /// Return IR struct type for rtinfo struct in rocm-device-libs used for device diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 0ceca6192d8ea..ad802c9131de0 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -260,40 +260,31 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( llvm::Function *F = cast<llvm::Function>(GV); - // Perform special handling in OpenCL mode - if (M.getLangOpts().OpenCL) { - // Use OpenCL function attributes to check for kernel functions + // Perform special handling in OpenCL/CUDA mode + if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) { + // Use function attributes to check for kernel functions // By default, all functions are device functions - if (FD->hasAttr<OpenCLKernelAttr>()) { - // OpenCL __kernel functions get kernel metadata + if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) { + // OpenCL/CUDA kernel functions get kernel metadata // Create !{<func-ref>, metadata !"kernel", i32 1} node - F->setCallingConv(llvm::CallingConv::PTX_Kernel); // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); + if (FD->hasAttr<CUDAGlobalAttr>()) { + SmallVector<int, 10> GCI; + for (auto IV : llvm::enumerate(FD->parameters())) + if (IV.value()->hasAttr<CUDAGridConstantAttr>()) + // For some reason arg indices are 1-based in NVVM + GCI.push_back(IV.index() + 1); + // Create !{<func-ref>, metadata !"kernel", i32 1} node + F->setCallingConv(llvm::CallingConv::PTX_Kernel); + addGridConstantNVVMMetadata(F, GCI); + } + if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) + M.handleCUDALaunchBoundsAttr(F, Attr); } } - - // Perform special handling in CUDA mode. - if (M.getLangOpts().CUDA) { - // CUDA __global__ functions get a kernel metadata entry. Since - // __global__ functions cannot be called from the device, we do not - // need to set the noinline attribute. - if (FD->hasAttr<CUDAGlobalAttr>()) { - SmallVector<int, 10> GCI; - for (auto IV : llvm::enumerate(FD->parameters())) - if (IV.value()->hasAttr<CUDAGridConstantAttr>()) - // For some reason arg indices are 1-based in NVVM - GCI.push_back(IV.index() + 1); - // Create !{<func-ref>, metadata !"kernel", i32 1} node - F->setCallingConv(llvm::CallingConv::PTX_Kernel); - addGridConstantNVVMMetadata(F, GCI); - } - if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) - M.handleCUDALaunchBoundsAttr(F, Attr); - } - // Attach kernel metadata directly if compiling for NVPTX. - if (FD->hasAttr<NVPTXKernelAttr>()) { + if (FD->hasAttr<DeviceKernelAttr>()) { F->setCallingConv(llvm::CallingConv::PTX_Kernel); } } diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 92ae46234e6b1..2f1e43cdc8cc3 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -228,7 +228,7 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( // Convert HIP kernels to SPIR-V kernels. if (getABIInfo().getContext().getLangOpts().HIP) { FT = getABIInfo().getContext().adjustFunctionType( - FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel)); + FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel)); return; } } diff --git a/clang/lib/CodeGen/Targets/TCE.cpp b/clang/lib/CodeGen/Targets/TCE.cpp index f3685ccd9825a..df49aea49a1e3 100644 --- a/clang/lib/CodeGen/Targets/TCE.cpp +++ b/clang/lib/CodeGen/Targets/TCE.cpp @@ -39,7 +39,7 @@ void TCETargetCodeGenInfo::setTargetAttributes( llvm::Function *F = cast<llvm::Function>(GV); if (M.getLangOpts().OpenCL) { - if (FD->hasAttr<OpenCLKernelAttr>()) { + if (FD->hasAttr<DeviceKernelAttr>()) { // OpenCL C Kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>(); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index c662b0edbf2ac..60e911b9fecc0 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -8789,7 +8789,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { FunctionDecl *FD = getCurFunctionDecl(); // OpenCL v1.1 s6.5.2 and s6.5.3: no local or constant variables // in functions. - if (FD && !FD->hasAttr<OpenCLKernelAttr>()) { + if (FD && !FD->hasAttr<DeviceKernelAttr>()) { if (T.getAddressSpace() == LangAS::opencl_constant) Diag(NewVD->getLocation(), diag::err_opencl_function_variable) << 0 /*non-kernel only*/ << "constant"; @@ -8801,7 +8801,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { } // OpenCL v2.0 s6.5.2 and s6.5.3: local and constant variables must be // in the outermost scope of a kernel function. - if (FD && FD->hasAttr<OpenCLKernelAttr>()) { + if (FD && FD->hasAttr<DeviceKernelAttr>()) { if (!getCurScope()->isFunctionScope()) { if (T.getAddressSpace() == LangAS::opencl_constant) Diag(NewVD->getLocation(), diag::err_opencl_addrspace_scope) @@ -10930,9 +10930,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, MarkUnusedFileScopedDecl(NewFD); - - - if (getLangOpts().OpenCL && NewFD->hasAttr<OpenCLKernelAttr>()) { + if (getLangOpts().OpenCL && NewFD->hasAttr<DeviceKernelAttr>()) { // OpenCL v1.2 s6.8 static is invalid for kernel functions. if (SC == SC_Static) { Diag(D.getIdentifierLoc(), diag::err_static_kernel); @@ -12437,7 +12435,7 @@ void Sema::CheckMain(FunctionDecl *FD, const DeclSpec &DS) { if (getLangOpts().OpenCL) { Diag(FD->getLocation(), diag::err_opencl_no_main) - << FD->hasAttr<OpenCLKernelAttr>(); + << FD->hasAttr<DeviceKernelAttr>(); FD->setInvalidDecl(); return; } @@ -15713,7 +15711,7 @@ ShouldWarnAboutMissingPrototype(const FunctionDecl *FD, return false; // Don't warn for OpenCL kernels. - if (FD->hasAttr<OpenCLKernelAttr>()) + if (FD->hasAttr<DeviceKernelAttr>()) return false; // Don't warn on explicitly deleted functions. @@ -20607,7 +20605,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD, // SYCL functions can be template, so we check if they have appropriate // attribute prior to checking if it is a template. - if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>()) + if (LangOpts.SYCLIsDevice && FD->hasAttr<DeviceKernelAttr>()) return FunctionEmissionStatus::Emitted; // Templates are emitted when they're instantiated. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 334e112cc9a4e..da0e3265767d8 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5108,8 +5108,8 @@ static void handleGlobalAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice) S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD; - if (AL.getKind() == ParsedAttr::AT_NVPTXKernel) - D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL)); + if (AL.getKind() == ParsedAttr::AT_DeviceKernel) + D->addAttr(::new (S.Context) DeviceKernelAttr(S.Context, AL)); else D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL)); // In host compilation the kernel is emitted as a stub function, which is @@ -5244,9 +5244,11 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { case ParsedAttr::AT_AArch64SVEPcs: D->addAttr(::new (S.Context) AArch64SVEPcsAttr(S.Context, AL)); return; - case ParsedAttr::AT_AMDGPUKernelCall: - D->addAttr(::new (S.Context) AMDGPUKernelCallAttr(S.Context, AL)); + case ParsedAttr::AT_DeviceKernel: { + // The attribute should already be applied. + assert(D->hasAttr<DeviceKernelAttr>() && "Expected attribute"); return; + } case ParsedAttr::AT_IntelOclBicc: D->addAttr(::new (S.Context) IntelOclBiccAttr(S.Context, AL)); return; @@ -5289,6 +5291,33 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) { } } +static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate(); + if (S.getLangOpts().SYCLIsDevice) { + if (!IsFunctionTemplate) { + S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str) + << AL << AL.isRegularKeywordAttribute() << "function templates"; + } else { + S.SYCL().handleKernelAttr(D, AL); + } + } else if (DeviceKernelAttr::isSYCLSpelling(AL)) { + S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL; + } else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) { + handleGlobalAttr(S, D, AL); + } else { + // OpenCL C++ will throw a more specific error. + if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) { + S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str) + << AL << AL.isRegularKeywordAttribute() << "functions"; + } + handleSimpleAttribute<DeviceKernelAttr>(S, D, AL); + } + // Make sure we validate the CC with the target + // and warn/error if necessary. + handleCallConvAttr(S, D, AL); +} + static void handleSuppressAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (AL.getAttributeSpellingListIndex() == SuppressAttr::CXX11_gsl_suppress) { // Suppression attribute with GSL spelling requires at least 1 argument. @@ -5453,9 +5482,6 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, case ParsedAttr::AT_AArch64SVEPcs: CC = CC_AArch64SVEPCS; break; - case ParsedAttr::AT_AMDGPUKernelCall: - CC = CC_AMDGPUKernelCall; - break; case ParsedAttr::AT_RegCall: CC = CC_X86RegCall; break; @@ -5525,6 +5551,11 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC, llvm::Log2_64(ABIVLen) - 5); break; } + case ParsedAttr::AT_DeviceKernel: { + // Validation was handled in handleDeviceKernelAttr. + CC = CC_DeviceKernel; + break; + } default: llvm_unreachable("unexpected attribute kind"); } @@ -7148,9 +7179,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_EnumExtensibility: handleEnumExtensibilityAttr(S, D, AL); break; - case ParsedAttr::AT_SYCLKernel: - S.SYCL().handleKernelAttr(D, AL); - break; case ParsedAttr::AT_SYCLKernelEntryPoint: S.SYCL().handleKernelEntryPointAttr(D, AL); break; @@ -7175,7 +7203,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_CalledOnce: handleCalledOnceAttr(S, D, AL); break; - case ParsedAttr::AT_NVPTXKernel: case ParsedAttr::AT_CUDAGlobal: handleGlobalAttr(S, D, AL); break; @@ -7439,13 +7466,15 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_PreserveAll: case ParsedAttr::AT_AArch64VectorPcs: case ParsedAttr::AT_AArch64SVEPcs: - case ParsedAttr::AT_AMDGPUKernelCall: case ParsedAttr::AT_M68kRTD: case ParsedAttr::AT_PreserveNone: case ParsedAttr::AT_RISCVVectorCC: case ParsedAttr::AT_RISCVVLSCC: handleCallConvAttr(S, D, AL); break; + case ParsedAttr::AT_DeviceKernel: + handleDeviceKernelAttr(S, D, AL); + break; case ParsedAttr::AT_Suppress: handleSuppressAttr(S, D, AL); break; @@ -7764,9 +7793,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, static bool isKernelDecl(Decl *D) { const FunctionType *FnTy = D->getFunctionType(); - return D->hasAttr<OpenCLKernelAttr>() || - (FnTy && FnTy->getCallConv() == CallingConv::CC_AMDGPUKernelCall) || - D->hasAttr<CUDAGlobalAttr>() || D->getAttr<NVPTXKernelAttr>(); + return D->hasAttr<DeviceKernelAttr>() || + (FnTy && FnTy->getCallConv() == CallingConv::CC_DeviceKernel) || + D->hasAttr<CUDAGlobalAttr>(); } void Sema::ProcessDeclAttributeList( @@ -7793,7 +7822,7 @@ void Sema::ProcessDeclAttributeList( // good to have a way to specify "these attributes must appear as a group", // for these. Additionally, it would be good to have a way to specify "these // attribute must never appear as a group" for attributes like cold and hot. - if (!(D->hasAttr<OpenCLKernelAttr>() || + if (!(D->hasAttr<DeviceKernelAttr>() || (D->hasAttr<CUDAGlobalAttr>() && Context.getTargetInfo().getTriple().isSPIRV()))) { // These attributes cannot be applied to a non-kernel function. diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 1969d7b0ba837..3e03cb4bd5f99 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -199,7 +199,7 @@ void SemaSYCL::handleKernelAttr(Decl *D, const ParsedAttr &AL) { return; } - handleSimpleAttribute<SYCLKernelAttr>(*this, D, AL); + handleSimpleAttribute<DeviceKernelAttr>(*this, D, AL); } void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) { diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index bcad815e1587f..b8e830cc30be1 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -676,9 +676,9 @@ static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr( // This doesn't take any template parameters, but we have a custom action that // needs to happen when the kernel itself is instantiated. We need to run the // ItaniumMangler to mark the names required to name this kernel. -static void instantiateDependentSYCLKernelAttr( +static void instantiateDependentDeviceKernelAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, - const SYCLKernelAttr &Attr, Decl *New) { + const DeviceKernelAttr &Attr, Decl *New) { New->addAttr(Attr.clone(S.getASTContext())); } @@ -920,8 +920,8 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, continue; } - if (auto *A = dyn_cast<SYCLKernelAttr>(TmplAttr)) { - instantiateDependentSYCLKernelAttr(*this, TemplateArgs, *A, New); + if (auto *A = dyn_cast<DeviceKernelAttr>(TmplAttr)) { + instantiateDependentDeviceKernelAttr(*this, TemplateArgs, *A, New); continue; } diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index f863531580f38..a0cd2d1615243 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -134,7 +134,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_VectorCall: \ case ParsedAttr::AT_AArch64VectorPcs: \ case ParsedAttr::AT_AArch64SVEPcs: \ - case ParsedAttr::AT_AMDGPUKernelCall: \ + case ParsedAttr::AT_DeviceKernel: \ case ParsedAttr::AT_MSABI: \ case ParsedAttr::AT_SysVABI: \ case ParsedAttr::AT_Pcs: \ @@ -3755,18 +3755,7 @@ static CallingConv getCCForDeclaratorChunk( CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic, IsCXXInstanceMethod); - // Attribute AT_OpenCLKernel affects the calling convention for SPIR - // and AMDGPU targets, hence it cannot be treated as a calling - // convention attribute. This is the simplest place to infer - // calling convention for OpenCL kernels. - if (S.getLangOpts().OpenCL) { - for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { - if (AL.getKind() == ParsedAttr::AT_OpenCLKernel) { - CC = CC_OpenCLKernel; - break; - } - } - } else if (S.getLangOpts().CUDA) { + if (S.getLangOpts().CUDA) { // If we're compiling CUDA/HIP code and targeting HIPSPV we need to make // sure the kernels will be marked with the right calling convention so that // they will be visible by the APIs that ingest SPIR-V. We do not do this @@ -3775,13 +3764,20 @@ static CallingConv getCCForDeclaratorChunk( if (Triple.isSPIRV() && Triple.getVendor() != llvm::Triple::AMD) { for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) { - CC = CC_OpenCLKernel; + CC = CC_DeviceKernel; break; } } } } - + if (!S.getLangOpts().isSYCL()) { + for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) { + if (AL.getKind() == ParsedAttr::AT_DeviceKernel) { + CC = CC_DeviceKernel; + break; + } + } + } return CC; } @@ -7532,8 +7528,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) { return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr); case ParsedAttr::AT_ArmStreaming: return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr); - case ParsedAttr::AT_AMDGPUKernelCall: - return createSimpleAttr<AMDGPUKernelCallAttr>(Ctx, Attr); + case ParsedAttr::AT_DeviceKernel: + return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr); case ParsedAttr::AT_Pcs: { // The attribute may have had a fixit applied where we treated an // identifier as a string literal. The contents of the string are valid, @@ -8742,6 +8738,16 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State, } } +static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) { + // The DeviceKernel attribute is shared for many targets, and + // it is only allowed to be a type attribute with the AMDGPU + // spelling, so skip processing the attr as a type attr + // unless it has that spelling. + if (Attr.getKind() != ParsedAttr::AT_DeviceKernel) + return true; + return DeviceKernelAttr::isAMDGPUSpelling(Attr); +} + static void processTypeAttrs(TypeProcessingState &state, QualType &type, TypeAttrLocation TAL, const ParsedAttributesView &attrs, @@ -8995,6 +9001,9 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, break; [[fallthrough]]; FUNCTION_TYPE_ATTRS_CASELIST: + if (!isMultiSubjectAttrAllowedOnType(attr)) + break; + attr.setUsedAsTypeAttr(); // Attributes with standard syntax have strict rules for what they diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index bf64c388b0436..41d00dae3f69a 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -109,7 +109,6 @@ // CHECK-NEXT: NSConsumed (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: NSConsumesSelf (SubjectMatchRule_objc_method) // CHECK-NEXT: NSErrorDomain (SubjectMatchRule_enum) -// CHECK-NEXT: NVPTXKernel (SubjectMatchRule_function) // CHECK-NEXT: Naked (SubjectMatchRule_function) // CHECK-NEXT: NoBuiltin (SubjectMatchRule_function) // CHECK-NEXT: NoCommon (SubjectMatchRule_variable) diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp index 586d7edf93343..e7864e6d62e4d 100644 --- a/clang/tools/libclang/CXType.cpp +++ b/clang/tools/libclang/CXType.cpp @@ -732,8 +732,8 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) { TCALLINGCONV(RISCVVLSCall_32768); TCALLINGCONV(RISCVVLSCall_65536); case CC_SpirFunction: return CXCallingConv_Unexposed; - case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed; - case CC_OpenCLKernel: return CXCallingConv_Unexposed; + case CC_DeviceKernel: + return CXCallingConv_Unexposed; break; } #undef TCALLINGCONV diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index e52324a8ebc12..803ed67d534ea 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -1117,7 +1117,7 @@ HANDLE_DW_CC(0xc3, LLVM_AAPCS) HANDLE_DW_CC(0xc4, LLVM_AAPCS_VFP) HANDLE_DW_CC(0xc5, LLVM_IntelOclBicc) HANDLE_DW_CC(0xc6, LLVM_SpirFunction) -HANDLE_DW_CC(0xc7, LLVM_OpenCLKernel) +HANDLE_DW_CC(0xc7, LLVM_DeviceKernel) HANDLE_DW_CC(0xc8, LLVM_Swift) HANDLE_DW_CC(0xc9, LLVM_PreserveMost) HANDLE_DW_CC(0xca, LLVM_PreserveAll) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h index bd25f6c30ebf1..a760f773055d2 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h @@ -734,13 +734,15 @@ void DWARFTypePrinter<DieType>::appendSubroutineNameAfter( OS << " __attribute__((intel_ocl_bicc))"; break; case dwarf::CallingConvention::DW_CC_LLVM_SpirFunction: - case dwarf::CallingConvention::DW_CC_LLVM_OpenCLKernel: - // These aren't available as attributes, but maybe we should still - // render them somehow? (Clang doesn't render them, but that's an issue + // This isn't available as an attribute, but maybe we should still + // render it somehow? (Clang doesn't render it, but that's an issue // for template names too - since then the DWARF names of templates // instantiated with function types with these calling conventions won't // have distinct names - so we'd need to fix that too) break; + case dwarf::CallingConvention::DW_CC_LLVM_DeviceKernel: + OS << " __attribute__((device_kernel))"; + break; case dwarf::CallingConvention::DW_CC_LLVM_Swift: // SwiftAsync missing OS << " __attribute__((swiftcall))"; diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll index 1055abe6d3499..0f9a08a85a8cd 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll @@ -28,6 +28,6 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo !2 = !{i32 7, !"Dwarf Version", i32 5} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) -!5 = !DISubroutineType(cc: DW_CC_LLVM_OpenCLKernel, types: !6) +!5 = !DISubroutineType(cc: DW_CC_LLVM_DeviceKernel, types: !6) !6 = !{null} !7 = !{i32 1024, i32 1, i32 1} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits