[clang] [clang] Fix inconsistencies with the device_kernel attr on different targets (PR #161905)

Nick Sarnie via cfe-commits Fri, 17 Oct 2025 16:08:57 -0700

https://github.com/sarnex created 
https://github.com/llvm/llvm-project/pull/161905


The original [change](https://github.com/intel/llvm/pull/14114) unifying the 
device kernel attributes had some inexplicable behavior, such as 
`amdgpu_kernel` resulting in a function ending up with the `spir_kernel` CC but 
`nvptx_kernel` not doing the same.

For the target-specific spellings (`nvptx_kernel` and `amdgpu_kernel`), we warn 
and ignore if the spelling doesn't match the target.
Also we make sure the any valid spelling actually applies the CC. This worked 
for `NVPTX` already but was missing for `SPIR-V` and `AMDGPU`.

We actually don't need to do that, if we change `amdgpu_kernel` to apply the 
calling convention the same way that NVPTX does (setting the CC on the function 
in `TargetCodeGenInfo::setTargetAttribute`), we can remove all handling of 
`device_kernel` as a type attribute.

THese issues were reported 
[here](https://github.com/llvm/llvm-project/issues/161077) and 
[here](https://github.com/llvm/llvm-project/pull/161349).

Closes: https://github.com/llvm/llvm-project/issues/161077


>From cff89261fdf1e367bd5865f5014d4d9c115d19a7 Mon Sep 17 00:00:00 2001
From: "Sarnie, Nick" <[email protected]>
Date: Fri, 3 Oct 2025 12:33:38 -0700
Subject: [PATCH] [clang] Fix inconsistencies with the device_kernel attr on
 different targets

Signed-off-by: Sarnie, Nick <[email protected]>
---
 clang/docs/ReleaseNotes.rst                |  1 +
 clang/include/clang/Basic/Attr.td          |  2 +-
 clang/lib/AST/TypePrinter.cpp              |  3 --
 clang/lib/Basic/Targets/NVPTX.h            |  2 +-
 clang/lib/CodeGen/Targets/AMDGPU.cpp       |  6 ++--
 clang/lib/CodeGen/Targets/SPIR.cpp         | 33 ++++++++++++++++++--
 clang/lib/Sema/SemaDeclAttr.cpp            | 36 ++++++++++++++++++++--
 clang/lib/Sema/SemaType.cpp                | 18 ++---------
 clang/test/Sema/callingconv-devicekernel.c | 24 +++++++++++++++
 clang/test/Sema/callingconv.c              |  4 +++
 10 files changed, 100 insertions(+), 29 deletions(-)
 create mode 100644 clang/test/Sema/callingconv-devicekernel.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d2e5bd284d350..9b57949112c22 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -401,6 +401,7 @@ Bug Fixes to Attribute Support
 - Using ``[[gnu::cleanup(some_func)]]`` where some_func is annotated with
   ``[[gnu::error("some error")]]`` now correctly triggers an error. (#GH146520)
 - Fix a crash when the function name is empty in the `swift_name` attribute. 
(#GH157075)
+- Fixes crashes or missing diagnostics with the `device_kernel` attribute. 
(#GHTODO)
 
 Bug Fixes to C++ Support
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 3c697ed8dd882..6ec0eac529245 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1599,7 +1599,7 @@ def CUDAShared : InheritableAttr {
 }
 def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>;
 
-def DeviceKernel : DeclOrTypeAttr {
+def DeviceKernel : InheritableAttr {
   let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">,
                    Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">,
                    CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 66a1b684ec68b..568d56ab0b911 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -2147,9 +2147,6 @@ void TypePrinter::printAttributedAfter(const 
AttributedType *T,
   }
   case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
   case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
-  case attr::DeviceKernel:
-    OS << T->getAttr()->getSpelling();
-    break;
   case attr::IntelOclBicc:
     OS << "inteloclbicc";
     break;
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index 33c29586359be..f5c8396f398aa 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -200,7 +200,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public 
TargetInfo {
     // a host function.
     if (HostTarget)
       return HostTarget->checkCallingConvention(CC);
-    return CCCR_Warning;
+    return CC == CC_DeviceKernel ? CCCR_OK : CCCR_Warning;
   }
 
   bool hasBitIntType() const override { return true; }
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp 
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 0fcbf7e458a34..d54b1dc128254 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -419,9 +419,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
     return;
 
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
-  if (FD)
+  if (FD) {
     setFunctionDeclAttributes(FD, F, M);
-
+    if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
+      F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+  }
   if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
     F->addFnAttr("amdgpu-ieee", "false");
 }
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp 
b/clang/lib/CodeGen/Targets/SPIR.cpp
index 4aa63143a66cd..42ef1c704831c 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -61,6 +61,8 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
       QualType SampledType, CodeGenModule &CGM) const;
   void
   setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &M) const override;
 };
 class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
 public:
@@ -240,6 +242,26 @@ void 
CommonSPIRTargetCodeGenInfo::setOCLKernelStubCallingConvention(
       FT, FT->getExtInfo().withCallingConv(CC_SpirFunction));
 }
 
+void CommonSPIRTargetCodeGenInfo::setTargetAttributes(
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+  if (M.getLangOpts().OpenCL)
+    return;
+
+  if (GV->isDeclaration())
+    return;
+
+  llvm::Function *F = dyn_cast<llvm::Function>(GV);
+  if (!F)
+    return;
+
+  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  if (!FD)
+    return;
+
+  if (FD->hasAttr<DeviceKernelAttr>())
+    F->setCallingConv(getDeviceKernelCallingConv());
+}
+
 LangAS
 SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
                                                  const VarDecl *D) const {
@@ -264,9 +286,6 @@ 
SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
 
 void SPIRVTargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
-  if (!M.getLangOpts().HIP ||
-      M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
-    return;
   if (GV->isDeclaration())
     return;
 
@@ -277,6 +296,14 @@ void SPIRVTargetCodeGenInfo::setTargetAttributes(
   auto FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD)
     return;
+
+  if (FD->hasAttr<DeviceKernelAttr>())
+    F->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
+
+  if (!M.getLangOpts().HIP ||
+      M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+    return;
+
   if (!FD->hasAttr<CUDAGlobalAttr>())
     return;
 
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 328ccf6694073..551d00b3c7476 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5204,25 +5204,55 @@ static void handleCallConvAttr(Sema &S, Decl *D, const 
ParsedAttr &AL) {
 static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
   bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate();
-  if (S.getLangOpts().SYCLIsDevice) {
+  llvm::Triple Triple = S.getASTContext().getTargetInfo().getTriple();
+  const LangOptions &LangOpts = S.getLangOpts();
+
+  if (LangOpts.SYCLIsDevice) {
     if (!IsFunctionTemplate) {
       S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str)
           << AL << AL.isRegularKeywordAttribute() << "function templates";
+      AL.setInvalid();
+      return;
     } else {
       S.SYCL().handleKernelAttr(D, AL);
     }
   } else if (DeviceKernelAttr::isSYCLSpelling(AL)) {
     S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL;
-  } else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) {
+    AL.setInvalid();
+
+    return;
+  } else if (Triple.isNVPTX()) {
     handleGlobalAttr(S, D, AL);
   } else {
     // OpenCL C++ will throw a more specific error.
-    if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
+    if (!LangOpts.OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
       S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str)
           << AL << AL.isRegularKeywordAttribute() << "functions";
+      AL.setInvalid();
+      return;
     }
     handleSimpleAttribute<DeviceKernelAttr>(S, D, AL);
   }
+  // TODO: isGPU() should probably return true for SPIR.
+  bool TargetDeviceEnvironment = Triple.isGPU() || Triple.isSPIR() ||
+                                 LangOpts.isTargetDevice() || LangOpts.OpenCL;
+  bool IsAMDGPUMismatch =
+      DeviceKernelAttr::isAMDGPUSpelling(AL) && !Triple.isAMDGPU();
+  bool IsNVPTXMismatch =
+      DeviceKernelAttr::isNVPTXSpelling(AL) && !Triple.isNVPTX();
+  if (IsAMDGPUMismatch || IsNVPTXMismatch || !TargetDeviceEnvironment) {
+    // While both are just different spellings of the same underlying
+    // attribute, it makes more sense to the user if amdgpu_kernel can only
+    // be used on AMDGPU and the equivalent for NVPTX, so warn and ignore
+    // the attribute if there's a mismatch.
+    // Also warn if this is not an environment where a device kernel makes
+    // sense.
+    S.Diag(AL.getLoc(), diag::warn_cconv_unsupported)
+        << AL << (int)Sema::CallingConventionIgnoredReason::ForThisTarget;
+    AL.setInvalid();
+    return;
+  }
+
   // Make sure we validate the CC with the target
   // and warn/error if necessary.
   handleCallConvAttr(S, D, AL);
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index bee613aa5f1c5..0d5b0e7e842b3 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -134,7 +134,6 @@ static void diagnoseBadTypeAttribute(Sema &S, const 
ParsedAttr &attr,
   case ParsedAttr::AT_VectorCall:                                              
\
   case ParsedAttr::AT_AArch64VectorPcs:                                        
\
   case ParsedAttr::AT_AArch64SVEPcs:                                           
\
-  case ParsedAttr::AT_DeviceKernel:                                            
\
   case ParsedAttr::AT_MSABI:                                                   
\
   case ParsedAttr::AT_SysVABI:                                                 
\
   case ParsedAttr::AT_Pcs:                                                     
\
@@ -3781,7 +3780,8 @@ static CallingConv getCCForDeclaratorChunk(
     }
   }
   if (!S.getLangOpts().isSYCL()) {
-    for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
+    for (const ParsedAttr &AL : llvm::concat<ParsedAttr>(
+             D.getDeclSpec().getAttributes(), D.getAttributes())) {
       if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
         CC = CC_DeviceKernel;
         break;
@@ -7565,8 +7565,6 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr 
&Attr) {
     return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
   case ParsedAttr::AT_ArmStreaming:
     return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr);
-  case ParsedAttr::AT_DeviceKernel:
-    return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr);
   case ParsedAttr::AT_Pcs: {
     // The attribute may have had a fixit applied where we treated an
     // identifier as a string literal.  The contents of the string are valid,
@@ -8805,16 +8803,6 @@ static void 
HandleHLSLParamModifierAttr(TypeProcessingState &State,
   }
 }
 
-static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) {
-  // The DeviceKernel attribute is shared for many targets, and
-  // it is only allowed to be a type attribute with the AMDGPU
-  // spelling, so skip processing the attr as a type attr
-  // unless it has that spelling.
-  if (Attr.getKind() != ParsedAttr::AT_DeviceKernel)
-    return true;
-  return DeviceKernelAttr::isAMDGPUSpelling(Attr);
-}
-
 static void processTypeAttrs(TypeProcessingState &state, QualType &type,
                              TypeAttrLocation TAL,
                              const ParsedAttributesView &attrs,
@@ -9068,8 +9056,6 @@ static void processTypeAttrs(TypeProcessingState &state, 
QualType &type,
         break;
       [[fallthrough]];
     FUNCTION_TYPE_ATTRS_CASELIST:
-      if (!isMultiSubjectAttrAllowedOnType(attr))
-        break;
 
       attr.setUsedAsTypeAttr();
 
diff --git a/clang/test/Sema/callingconv-devicekernel.c 
b/clang/test/Sema/callingconv-devicekernel.c
new file mode 100644
index 0000000000000..869687f8ca65d
--- /dev/null
+++ b/clang/test/Sema/callingconv-devicekernel.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s 2>&1 -o -| 
FileCheck -check-prefix=CHECK-AMDGPU %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -emit-llvm %s 2>&1 -o -| 
FileCheck -check-prefix=CHECK-NVPTX %s
+// RUN: %clang_cc1 -triple spir64 -emit-llvm %s 2>&1 -o - | FileCheck 
-check-prefix=CHECK-SPIR %s
+// RUN: %clang_cc1 -triple spirv64 -emit-llvm %s 2>&1 -o - | FileCheck 
-check-prefix=CHECK-SPIR %s
+
+// CHECK-AMDGPU-DAG: amdgpu_kernel void @kernel1()
+// CHECK-NVPTX-DAG: ptx_kernel void @kernel1()
+// CHECK-SPIR-DAG: spir_kernel void @kernel1()
+[[clang::device_kernel]] void kernel1() {}
+
+// CHECK-AMDGPU-DAG: amdgpu_kernel void @kernel2()
+// CHECK-NVPTX-DAG: 14:3: warning: 'clang::amdgpu_kernel' calling convention 
is not supported for this target
+// CHECK-SPIR-DAG: 14:3: warning: 'clang::amdgpu_kernel' calling convention is 
not supported for this target
+[[clang::amdgpu_kernel]] void kernel2() {}
+
+// CHECK-AMDGPU-DAG: 19:3: warning: 'clang::nvptx_kernel' calling convention 
is not supported for this target
+// CHECK-NVPTX-DAG: ptx_kernel void @kernel3()
+// CHECK-SPIR-DAG: 19:3: warning: 'clang::nvptx_kernel' calling convention is 
not supported for this target
+[[clang::nvptx_kernel]] void kernel3() {}
+
+// CHECK-AMDGPU-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
+// CHECK-NVPTX-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
+// CHECK-SPIR-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
+[[clang::sycl_kernel]] void kernel4() {}
diff --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c
index f0b8b80a32974..28342b56af39a 100644
--- a/clang/test/Sema/callingconv.c
+++ b/clang/test/Sema/callingconv.c
@@ -55,6 +55,10 @@ int __attribute__((aarch64_vector_pcs)) aavpcs(void); // 
expected-warning {{'aar
 int __attribute__((aarch64_sve_pcs)) aasvepcs(void);  // expected-warning 
{{'aarch64_sve_pcs' calling convention is not supported for this target}}
 
 int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning 
{{'amdgpu_kernel' calling convention is not supported for this target}}
+int __attribute__((device_kernel)) device_kernel(void) { // expected-warning 
{{'device_kernel' calling convention is not supported for this target}}
+}
+int __attribute__((sycl_kernel)) sycl_kernel(void) { // expected-warning 
{{'sycl_kernel' attribute ignored}}
+}
 
 // PR6361
 void ctest3();

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Fix inconsistencies with the device_kernel attr on different targets (PR #161905)

Reply via email to