llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Artem Belevich (Artem-B) <details> <summary>Changes</summary> LLVM support for the attribute has been implemented already, so it just plumbs it through to the CUDA front-end. One notable difference from NVCC is that the attribute can be used regardless of the targeted GPU. On the older GPUs it will just be ignored. The attribute is a performance hint, and does not warrant a hard error if compiler can't benefit from it on a particular GPU variant. --- Full diff: https://github.com/llvm/llvm-project/pull/114589.diff 12 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+1) - (modified) clang/include/clang/Basic/Attr.td (+7) - (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+2) - (modified) clang/lib/CodeGen/Targets/NVPTX.cpp (+29-7) - (modified) clang/lib/Sema/SemaDecl.cpp (+10-1) - (modified) clang/lib/Sema/SemaDeclAttr.cpp (+12) - (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+6) - (modified) clang/test/CodeGenCUDA/Inputs/cuda.h (+2) - (added) clang/test/CodeGenCUDA/grid-constant.cu (+31) - (modified) clang/test/Misc/pragma-attribute-supported-attributes-list.test (+1) - (modified) clang/test/SemaCUDA/Inputs/cuda.h (+1) - (added) clang/test/SemaCUDA/grid-constant.cu (+33) ``````````diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 402203f89e23a0..9466df98747e27 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -699,6 +699,7 @@ CUDA Support ^^^^^^^^^^^^ - Clang now supports CUDA SDK up to 12.6 - Added support for sm_100 +- Added support for `__grid_constant__` attribute. AIX Support ^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 47c93b48175fc8..9925b46ab2505e 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1450,6 +1450,13 @@ def CUDAHost : InheritableAttr { } def : MutualExclusions<[CUDAGlobal, CUDAHost]>; +def CUDAGridConstant : InheritableAttr { + let Spellings = [GNU<"grid_constant">, Declspec<"__grid_constant__">]; + let Subjects = SubjectList<[ParmVar]>; + let LangOpts = [CUDA]; + let Documentation = [Undocumented]; +} + def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> { let Spellings = [Clang<"nvptx_kernel">]; let Subjects = SubjectList<[Function]>; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 34ff49d7238a7f..61ff4c4fb5d646 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9100,6 +9100,8 @@ def err_cuda_host_shared : Error< "%select{__device__|__global__|__host__|__host__ __device__}0 functions">; def err_cuda_nonstatic_constdev: Error<"__constant__, __device__, and " "__managed__ are not allowed on non-static local variables">; +def err_cuda_grid_constant_not_allowed : Error< + "__grid_constant__ is only allowed on const-qualified kernel parameters">; def err_cuda_ovl_target : Error< "%select{__device__|__global__|__host__|__host__ __device__}0 function %1 " "cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">; diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index ec7f1c439b1881..0431d2cc4ddc39 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -8,6 +8,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/IntrinsicsNVPTX.h" using namespace clang; @@ -78,7 +79,13 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, - int Operand); + int Operand, + const SmallVectorImpl<int> &GridConstantArgs); + + static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, + int Operand) { + addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0)); + } private: static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, @@ -240,7 +247,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( } const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); - if (!FD) return; + if (!FD) + return; llvm::Function *F = cast<llvm::Function>(GV); @@ -263,8 +271,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // __global__ functions cannot be called from the device, we do not // need to set the noinline attribute. if (FD->hasAttr<CUDAGlobalAttr>()) { + SmallVector<int, 10> GCI; + for (auto IV : llvm::enumerate(FD->parameters())) + if (IV.value()->hasAttr<CUDAGridConstantAttr>()) + // For some reason arg indices are 1-based in NVVM + GCI.push_back(IV.index() + 1); // Create !{<func-ref>, metadata !"kernel", i32 1} node - addNVVMMetadata(F, "kernel", 1); + addNVVMMetadata(F, "kernel", 1, GCI); } if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) M.handleCUDALaunchBoundsAttr(F, Attr); @@ -276,18 +289,27 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( } } -void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, - StringRef Name, int Operand) { +void NVPTXTargetCodeGenInfo::addNVVMMetadata( + llvm::GlobalValue *GV, StringRef Name, int Operand, + const SmallVectorImpl<int> &GridConstantArgs) { llvm::Module *M = GV->getParent(); llvm::LLVMContext &Ctx = M->getContext(); // Get "nvvm.annotations" metadata node llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); - llvm::Metadata *MDVals[] = { + SmallVector<llvm::Metadata *, 5> MDVals = { llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; + if (!GridConstantArgs.empty()) { + SmallVector<llvm::Metadata *, 10> GCM; + for (int I : GridConstantArgs) + GCM.push_back(llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I))); + MDVals.append({llvm::MDString::get(Ctx, "grid_constant"), + llvm::MDNode::get(Ctx, GCM)}); + } // Append metadata to nvvm.annotations MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } @@ -309,7 +331,7 @@ NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, return llvm::ConstantExpr::getAddrSpaceCast( llvm::ConstantPointerNull::get(NPT), PT); } -} +} // namespace void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *Attr, diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index f8e5f3c6d309d6..9de8cbc303016c 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -12222,8 +12222,17 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, << NewFD; } - if (!Redeclaration && LangOpts.CUDA) + if (!Redeclaration && LangOpts.CUDA) { + bool IsKernel = NewFD->hasAttr<CUDAGlobalAttr>(); + for (auto *Parm : NewFD->parameters()) { + if (!Parm->getType()->isDependentType() && + Parm->hasAttr<CUDAGridConstantAttr>() && + !(IsKernel && Parm->getType().isConstQualified())) + Diag(Parm->getAttr<CUDAGridConstantAttr>()->getLocation(), + diag::err_cuda_grid_constant_not_allowed); + } CUDA().checkTargetOverload(NewFD, Previous); + } } // Check if the function definition uses any AArch64 SME features without diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 601c6f2eef1d9c..d8550bab3eddd2 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -4748,6 +4748,15 @@ static void handleManagedAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->addAttr(CUDADeviceAttr::CreateImplicit(S.Context)); } +static void handleGridConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (D->isInvalidDecl()) + return; + // Whether __grid_constant__ is allowed to be used will be checked in + // Sema::CheckFunctionDeclaration as we need complete function decl to make + // the call. + D->addAttr(::new (S.Context) CUDAGridConstantAttr(S.Context, AL)); +} + static void handleGNUInlineAttr(Sema &S, Decl *D, const ParsedAttr &AL) { const auto *Fn = cast<FunctionDecl>(D); if (!Fn->isInlineSpecified()) { @@ -6642,6 +6651,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_CUDADevice: handleDeviceAttr(S, D, AL); break; + case ParsedAttr::AT_CUDAGridConstant: + handleGridConstantAttr(S, D, AL); + break; case ParsedAttr::AT_HIPManaged: handleManagedAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 3e948232057afe..ec3c3ce6057264 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -876,6 +876,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, continue; } + if (auto *A = dyn_cast<CUDAGridConstantAttr>(TmplAttr)) { + if (!New->hasAttr<CUDAGridConstantAttr>()) + New->addAttr(A->clone(Context)); + continue; + } + assert(!TmplAttr->isPackExpansion()); if (TmplAttr->isLateParsed() && LateAttrs) { // Late parsed attributes must be instantiated and attached after the diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h index dc760500e65d41..a8d85afb7cd21c 100644 --- a/clang/test/CodeGenCUDA/Inputs/cuda.h +++ b/clang/test/CodeGenCUDA/Inputs/cuda.h @@ -12,6 +12,7 @@ #define __managed__ __attribute__((managed)) #endif #define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__))) +#define __grid_constant__ __attribute__((grid_constant)) #else #define __constant__ #define __device__ @@ -20,6 +21,7 @@ #define __shared__ #define __managed__ #define __launch_bounds__(...) +#define __grid_constant__ #endif struct dim3 { diff --git a/clang/test/CodeGenCUDA/grid-constant.cu b/clang/test/CodeGenCUDA/grid-constant.cu new file mode 100644 index 00000000000000..8d4be9c9dc7e1e --- /dev/null +++ b/clang/test/CodeGenCUDA/grid-constant.cu @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5 +// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + +#include "Inputs/cuda.h" + +struct S {}; + +__global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {} + +// dependent arguments get diagnosed after instantiation. +template <typename T> +__global__ void tkernel_const(__grid_constant__ const T arg) {} + +template <typename T> +__global__ void tkernel(int dummy, __grid_constant__ T arg) {} + +void foo() { + tkernel_const<const S><<<1,1>>>({}); + tkernel_const<S><<<1,1>>>({}); + tkernel<const S><<<1,1>>>(1, {}); +} +//. +//. +// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]} +// CHECK: [[META1]] = !{i32 1, i32 3} +// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]} +// CHECK: [[META3]] = !{i32 1} +// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]} +// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]} +// CHECK: [[META6]] = !{i32 2} +//. diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index e28b0775410c0a..b159a45c25a7f4 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -39,6 +39,7 @@ // CHECK-NEXT: CUDADeviceBuiltinSurfaceType (SubjectMatchRule_record) // CHECK-NEXT: CUDADeviceBuiltinTextureType (SubjectMatchRule_record) // CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function) +// CHECK-NEXT: CUDAGridConstant (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: CUDAHost (SubjectMatchRule_function) // CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType) // CHECK-NEXT: CUDAShared (SubjectMatchRule_variable) diff --git a/clang/test/SemaCUDA/Inputs/cuda.h b/clang/test/SemaCUDA/Inputs/cuda.h index 405ef8bb807d90..10db947d8246ca 100644 --- a/clang/test/SemaCUDA/Inputs/cuda.h +++ b/clang/test/SemaCUDA/Inputs/cuda.h @@ -11,6 +11,7 @@ #define __host__ __attribute__((host)) #define __shared__ __attribute__((shared)) #define __managed__ __attribute__((managed)) +#define __grid_constant__ __attribute__((grid_constant)) #define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__))) struct dim3 { diff --git a/clang/test/SemaCUDA/grid-constant.cu b/clang/test/SemaCUDA/grid-constant.cu new file mode 100644 index 00000000000000..876e389355fd4b --- /dev/null +++ b/clang/test/SemaCUDA/grid-constant.cu @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s +#include "Inputs/cuda.h" + +struct S {}; + +__global__ void kernel_struct(__grid_constant__ const S arg) {} +__global__ void kernel_scalar(__grid_constant__ const int arg) {} + +__global__ void gc_kernel_non_const(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}} + +void non_kernel(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}} + +// templates w/ non-dependent argument types get diagnosed right +// away, without instantiation. +template <typename T> +__global__ void tkernel_nd_const(__grid_constant__ const S arg, T dummy) {} +template <typename T> +__global__ void tkernel_nd_non_const(__grid_constant__ S arg, T dummy) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}} + +// dependent arguments get diagnosed after instantiation. +template <typename T> +__global__ void tkernel_const(__grid_constant__ const T arg) {} + +template <typename T> +__global__ void tkernel(__grid_constant__ T arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}} + +void foo() { + tkernel_const<const S><<<1,1>>>({}); + tkernel_const<S><<<1,1>>>({}); + tkernel<const S><<<1,1>>>({}); + tkernel<S><<<1,1>>>({}); // expected-note {{in instantiation of function template specialization 'tkernel<S>' requested here}} +} `````````` </details> https://github.com/llvm/llvm-project/pull/114589 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits