Author: Sam McCall Date: 2023-09-27T10:59:04+02:00 New Revision: 0afbcb20fd908f8bf9073697423da097be7db592
URL: https://github.com/llvm/llvm-project/commit/0afbcb20fd908f8bf9073697423da097be7db592 DIFF: https://github.com/llvm/llvm-project/commit/0afbcb20fd908f8bf9073697423da097be7db592.diff LOG: Revert "[NVPTX] Add support for maxclusterrank in launch_bounds (#66496)" This reverts commit dfab31b41b4988b6dc8129840eba68f0c36c0f13. SemaDeclAttr.cpp cannot depend on Basic's private headers (lib/Basic/Targets/NVPTX.h) Added: Modified: clang/include/clang/Basic/Attr.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Sema/Sema.h clang/lib/Basic/Targets/NVPTX.h clang/lib/CodeGen/Targets/NVPTX.cpp clang/lib/Parse/ParseOpenMP.cpp clang/lib/Sema/SemaDeclAttr.cpp clang/lib/Sema/SemaTemplateInstantiateDecl.cpp clang/test/CodeGenCUDA/launch-bounds.cu clang/test/SemaCUDA/launch_bounds.cu llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp llvm/lib/Target/NVPTX/NVPTXUtilities.cpp llvm/lib/Target/NVPTX/NVPTXUtilities.h Removed: clang/test/SemaCUDA/launch_bounds_sm_90.cu llvm/test/CodeGen/NVPTX/maxclusterrank.ll ################################################################################ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index fbc27d166ed9dd1..dd4d45171db4899 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1267,8 +1267,7 @@ def CUDAInvalidTarget : InheritableAttr { def CUDALaunchBounds : InheritableAttr { let Spellings = [GNU<"launch_bounds">, Declspec<"__launch_bounds__">]; - let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>, - ExprArgument<"MaxBlocks", 1>]; + let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>]; let LangOpts = [CUDA]; let Subjects = SubjectList<[ObjCMethod, FunctionLike]>; // An AST node is created for this attribute, but is not used by other parts diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d3910bbce3b7614..3f30681a378e24f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11853,10 +11853,6 @@ def err_sycl_special_type_num_init_method : Error< "types with 'sycl_special_class' attribute must have one and only one '__init' " "method defined">; -def warn_cuda_maxclusterrank_sm_90 : Warning< - "maxclusterrank requires sm_90 or higher, CUDA arch provided: %0, ignoring " - "%1 attribute">, InGroup<IgnoredAttributes>; - def err_bit_int_bad_size : Error<"%select{signed|unsigned}0 _BitInt must " "have a bit size of at least %select{2|1}0">; def err_bit_int_max_size : Error<"%select{signed|unsigned}0 _BitInt of bit " diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index c36088ac38fb8cf..e4366170005a044 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11061,13 +11061,12 @@ class Sema final { /// Create an CUDALaunchBoundsAttr attribute. CUDALaunchBoundsAttr *CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads, - Expr *MinBlocks, - Expr *MaxBlocks); + Expr *MinBlocks); /// AddLaunchBoundsAttr - Adds a launch_bounds attribute to a particular /// declaration. void AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *MaxThreads, Expr *MinBlocks, Expr *MaxBlocks); + Expr *MaxThreads, Expr *MinBlocks); /// AddModeAttr - Adds a mode attribute to a particular declaration. void AddModeAttr(Decl *D, const AttributeCommonInfo &CI, IdentifierInfo *Name, diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 20d76b702a9426e..6fa0b8df97d7894 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -181,8 +181,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { bool hasBitIntType() const override { return true; } bool hasBFloat16Type() const override { return true; } - - CudaArch getGPU() const { return GPU; } }; } // namespace targets } // namespace clang diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 64d019a10514d60..0d4bbd795648008 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -296,8 +296,8 @@ void CodeGenModule::handleCUDALaunchBoundsAttr( NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); - // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it - // was not specified in __launch_bounds__ or if the user specified a 0 value, + // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was + // not specified in __launch_bounds__ or if the user specified a 0 value, // we don't have to add a PTX directive. if (Attr->getMinBlocks()) { llvm::APSInt MinBlocks(32); @@ -307,14 +307,6 @@ void CodeGenModule::handleCUDALaunchBoundsAttr( NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); } - if (Attr->getMaxBlocks()) { - llvm::APSInt MaxBlocks(32); - MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext()); - if (MaxBlocks > 0) - // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node - NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank", - MaxBlocks.getExtValue()); - } } std::unique_ptr<TargetCodeGenInfo> diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 8a8a126bf7244d4..605b97617432ed3 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3739,8 +3739,7 @@ OMPClause *Parser::ParseOpenMPOMPXAttributesClause(bool ParseOnly) { continue; if (auto *A = Actions.CreateLaunchBoundsAttr( PA, PA.getArgAsExpr(0), - PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr, - PA.getNumArgs() > 2 ? PA.getArgAsExpr(2) : nullptr)) + PA.getNumArgs() > 1 ? PA.getArgAsExpr(1) : nullptr)) Attrs.push_back(A); continue; default: diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 49c78fb2ffd3992..090a54eedaa07d0 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "../Basic/Targets/NVPTX.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTMutationListener.h" @@ -5609,13 +5608,6 @@ bool Sema::CheckRegparmAttr(const ParsedAttr &AL, unsigned &numParams) { return false; } -// Helper to get CudaArch. -static CudaArch getCudaArch(const TargetInfo &TI) { - if (!TI.getTriple().isNVPTX()) - llvm_unreachable("getCudaArch is only valid for NVPTX triple"); - return static_cast<const targets::NVPTXTargetInfo *>(&TI)->getGPU(); -} - // Checks whether an argument of launch_bounds attribute is // acceptable, performs implicit conversion to Rvalue, and returns // non-nullptr Expr result on success. Otherwise, it returns nullptr @@ -5659,51 +5651,34 @@ static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E, CUDALaunchBoundsAttr * Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads, - Expr *MinBlocks, Expr *MaxBlocks) { - CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks, MaxBlocks); + Expr *MinBlocks) { + CUDALaunchBoundsAttr TmpAttr(Context, CI, MaxThreads, MinBlocks); MaxThreads = makeLaunchBoundsArgExpr(*this, MaxThreads, TmpAttr, 0); - if (!MaxThreads) + if (MaxThreads == nullptr) return nullptr; if (MinBlocks) { MinBlocks = makeLaunchBoundsArgExpr(*this, MinBlocks, TmpAttr, 1); - if (!MinBlocks) + if (MinBlocks == nullptr) return nullptr; } - if (MaxBlocks) { - // '.maxclusterrank' ptx directive requires .target sm_90 or higher. - auto SM = getCudaArch(Context.getTargetInfo()); - if (SM == CudaArch::UNKNOWN || SM < CudaArch::SM_90) { - Diag(MaxBlocks->getBeginLoc(), diag::warn_cuda_maxclusterrank_sm_90) - << CudaArchToString(SM) << CI << MaxBlocks->getSourceRange(); - // Ignore it by setting MaxBlocks to null; - MaxBlocks = nullptr; - } else { - MaxBlocks = makeLaunchBoundsArgExpr(*this, MaxBlocks, TmpAttr, 2); - if (!MaxBlocks) - return nullptr; - } - } - return ::new (Context) - CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks, MaxBlocks); + CUDALaunchBoundsAttr(Context, CI, MaxThreads, MinBlocks); } void Sema::AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *MaxThreads, Expr *MinBlocks, - Expr *MaxBlocks) { - if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks, MaxBlocks)) + Expr *MaxThreads, Expr *MinBlocks) { + if (auto *Attr = CreateLaunchBoundsAttr(CI, MaxThreads, MinBlocks)) D->addAttr(Attr); } static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) { - if (!AL.checkAtLeastNumArgs(S, 1) || !AL.checkAtMostNumArgs(S, 3)) + if (!AL.checkAtLeastNumArgs(S, 1) || !AL.checkAtMostNumArgs(S, 2)) return; S.AddLaunchBoundsAttr(D, AL, AL.getArgAsExpr(0), - AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr, - AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr); + AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr); } static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D, diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index e9877056a197921..c4f1c4e06ac83b3 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -302,15 +302,7 @@ static void instantiateDependentCUDALaunchBoundsAttr( MinBlocks = Result.getAs<Expr>(); } - Expr *MaxBlocks = nullptr; - if (Attr.getMaxBlocks()) { - Result = S.SubstExpr(Attr.getMaxBlocks(), TemplateArgs); - if (Result.isInvalid()) - return; - MaxBlocks = Result.getAs<Expr>(); - } - - S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks, MaxBlocks); + S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks); } static void diff --git a/clang/test/CodeGenCUDA/launch-bounds.cu b/clang/test/CodeGenCUDA/launch-bounds.cu index 31ca9216b413e92..58bcc410201f35f 100644 --- a/clang/test/CodeGenCUDA/launch-bounds.cu +++ b/clang/test/CodeGenCUDA/launch-bounds.cu @@ -1,13 +1,9 @@ // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -target-cpu sm_90 -DUSE_MAX_BLOCKS -fcuda-is-device -emit-llvm -o - | FileCheck -check-prefix=CHECK_MAX_BLOCKS %s #include "Inputs/cuda.h" #define MAX_THREADS_PER_BLOCK 256 #define MIN_BLOCKS_PER_MP 2 -#ifdef USE_MAX_BLOCKS -#define MAX_BLOCKS_PER_MP 4 -#endif // Test both max threads per block and Min cta per sm. extern "C" { @@ -21,21 +17,6 @@ Kernel1() // CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256} // CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2} -#ifdef USE_MAX_BLOCKS -// Test max threads per block and min/max cta per sm. -extern "C" { -__global__ void -__launch_bounds__( MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP ) -Kernel1_sm_90() -{ -} -} - -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4} -#endif // USE_MAX_BLOCKS - // Test only max threads per block. Min cta per sm defaults to 0, and // CodeGen doesn't output a zero value for minctasm. extern "C" { @@ -69,20 +50,6 @@ template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>(); // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256} // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2} -#ifdef USE_MAX_BLOCKS -template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp> -__global__ void -__launch_bounds__(max_threads_per_block, min_blocks_per_mp, max_blocks_per_mp) -Kernel4_sm_90() -{ -} -template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>(); - -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4} -#endif //USE_MAX_BLOCKS - const int constint = 100; template <int max_threads_per_block, int min_blocks_per_mp> __global__ void @@ -96,23 +63,6 @@ template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>(); // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356} // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258} -#ifdef USE_MAX_BLOCKS - -template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp> -__global__ void -__launch_bounds__(max_threads_per_block + constint, - min_blocks_per_mp + max_threads_per_block, - max_blocks_per_mp + max_threads_per_block) -Kernel5_sm_90() -{ -} -template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>(); - -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260} -#endif //USE_MAX_BLOCKS - // Make sure we don't emit negative launch bounds values. __global__ void __launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP ) @@ -130,26 +80,7 @@ Kernel7() // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"maxntidx", // CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"minctasm", -#ifdef USE_MAX_BLOCKS -__global__ void -__launch_bounds__( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP, -MAX_BLOCKS_PER_MP ) -Kernel7_sm_90() -{ -} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxntidx", -// CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"minctasm", -// CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxclusterrank", -#endif // USE_MAX_BLOCKS - const char constchar = 12; __global__ void __launch_bounds__(constint, constchar) Kernel8() {} // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100 // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12 - -#ifdef USE_MAX_BLOCKS -const char constchar_2 = 14; -__global__ void __launch_bounds__(constint, constchar, constchar_2) Kernel8_sm_90() {} -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100 -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12 -// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14 -#endif // USE_MAX_BLOCKS diff --git a/clang/test/SemaCUDA/launch_bounds.cu b/clang/test/SemaCUDA/launch_bounds.cu index 045f4756929593c..0ca0c0145d8bbb6 100644 --- a/clang/test/SemaCUDA/launch_bounds.cu +++ b/clang/test/SemaCUDA/launch_bounds.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -triple nvptx-unknown-unknown -target-cpu sm_75 -verify %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s #include "Inputs/cuda.h" @@ -11,9 +11,8 @@ __launch_bounds__(0x10000000000000000) void TestWayTooBigArg(void); // expected- __launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}} __launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}} -__launch_bounds__(128, 2, -8) void TestNegArg2(void); // expected-warning {{maxclusterrank requires sm_90 or higher, CUDA arch provided: sm_75, ignoring 'launch_bounds' attribute}} -__launch_bounds__(1, 2, 3, 4) void Test4Args(void); // expected-error {{'launch_bounds' attribute takes no more than 3 arguments}} +__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}} __launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}} int TestNoFunction __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to Objective-C methods, functions, and function pointers}} @@ -48,5 +47,3 @@ __launch_bounds__(Args) void TestTemplateVariadicArgs(void) {} // expected-error template <int... Args> __launch_bounds__(1, Args) void TestTemplateVariadicArgs2(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}} - -__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-warning {{maxclusterrank requires sm_90 or higher, CUDA arch provided: sm_75, ignoring 'launch_bounds' attribute}} diff --git a/clang/test/SemaCUDA/launch_bounds_sm_90.cu b/clang/test/SemaCUDA/launch_bounds_sm_90.cu deleted file mode 100644 index d5d902816c64c62..000000000000000 --- a/clang/test/SemaCUDA/launch_bounds_sm_90.cu +++ /dev/null @@ -1,57 +0,0 @@ -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -triple nvptx-unknown-unknown -target-cpu sm_90 -verify %s - -#include "Inputs/cuda.h" - -__launch_bounds__(128, 7) void Test2Args(void); -__launch_bounds__(128) void Test1Arg(void); - -__launch_bounds__(0xffffffff) void TestMaxArg(void); -__launch_bounds__(0x100000000) void TestTooBigArg(void); // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} -__launch_bounds__(0x10000000000000000) void TestWayTooBigArg(void); // expected-error {{integer literal is too large to be represented in any integer type}} -__launch_bounds__(1, 1, 0x10000000000000000) void TestWayTooBigArg(void); // expected-error {{integer literal is too large to be represented in any integer type}} - -__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}} -__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}} -__launch_bounds__(-128, 1, 7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative and will be ignored}} -__launch_bounds__(128, -1, 7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative and will be ignored}} -__launch_bounds__(128, 1, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 2 is negative and will be ignored}} -// expected-warning@20 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}} -// expected-warning@20 {{'launch_bounds' attribute parameter 1 is negative and will be ignored}} -__launch_bounds__(-128, -1, 7) void TestNegArg2(void); -// expected-warning@23 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}} -// expected-warning@23 {{'launch_bounds' attribute parameter 2 is negative and will be ignored}} -__launch_bounds__(-128, 1, -7) void TestNegArg2(void); -// expected-warning@27 {{'launch_bounds' attribute parameter 0 is negative and will be ignored}} -// expected-warning@27 {{'launch_bounds' attribute parameter 1 is negative and will be ignored}} -// expected-warning@27 {{'launch_bounds' attribute parameter 2 is negative and will be ignored}} -__launch_bounds__(-128, -1, -7) void TestNegArg2(void); - - -__launch_bounds__(1, 2, 3, 4) void Test4Args(void); // expected-error {{'launch_bounds' attribute takes no more than 3 arguments}} -__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}} - -int TestNoFunction __launch_bounds__(128, 7, 13); // expected-warning {{'launch_bounds' attribute only applies to Objective-C methods, functions, and function pointers}} - -__launch_bounds__(true) void TestBool(void); -__launch_bounds__(128, 1, 128.0) void TestFP(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}} -__launch_bounds__(128, 1, (void*)0) void TestNullptr(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}} - -int nonconstint = 256; -__launch_bounds__(125, 1, nonconstint) void TestNonConstInt(void); // expected-error {{'launch_bounds' attribute requires parameter 2 to be an integer constant}} - -const int constint = 512; -__launch_bounds__(128, 1, constint) void TestConstInt(void); -__launch_bounds__(128, 1, constint * 2 + 3) void TestConstIntExpr(void); - -template <int a, int b, int c> __launch_bounds__(a, b, c) void TestTemplate2Args(void) {} -template void TestTemplate2Args<128,7, 13>(void); - -template <int a, int b, int c> -__launch_bounds__(a + b, c + constint, a + b + c + constint) void TestTemplateExpr(void) {} -template void TestTemplateExpr<128+constint, 3, 7>(void); - -template <int... Args> -__launch_bounds__(Args) void TestTemplateVariadicArgs(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}} - -template <int... Args> -__launch_bounds__(1, 22, Args) void TestTemplateVariadicArgs2(void) {} // expected-error {{expression contains unexpanded parameter pack 'Args'}} diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index c1df063d80f5ffb..5d6127419d6318e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -537,50 +537,59 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const { // If the NVVM IR has some of reqntid* specified, then output // the reqntid directive, and set the unspecified ones to 1. - // If none of Reqntid* is specified, don't output reqntid directive. - unsigned Reqntidx, Reqntidy, Reqntidz; - Reqntidx = Reqntidy = Reqntidz = 1; - bool ReqSpecified = false; - ReqSpecified |= getReqNTIDx(F, Reqntidx); - ReqSpecified |= getReqNTIDy(F, Reqntidy); - ReqSpecified |= getReqNTIDz(F, Reqntidz); - - if (ReqSpecified) - O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz + // If none of reqntid* is specified, don't output reqntid directive. + unsigned reqntidx, reqntidy, reqntidz; + bool specified = false; + if (!getReqNTIDx(F, reqntidx)) + reqntidx = 1; + else + specified = true; + if (!getReqNTIDy(F, reqntidy)) + reqntidy = 1; + else + specified = true; + if (!getReqNTIDz(F, reqntidz)) + reqntidz = 1; + else + specified = true; + + if (specified) + O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz << "\n"; // If the NVVM IR has some of maxntid* specified, then output // the maxntid directive, and set the unspecified ones to 1. // If none of maxntid* is specified, don't output maxntid directive. - unsigned Maxntidx, Maxntidy, Maxntidz; - Maxntidx = Maxntidy = Maxntidz = 1; - bool MaxSpecified = false; - MaxSpecified |= getMaxNTIDx(F, Maxntidx); - MaxSpecified |= getMaxNTIDy(F, Maxntidy); - MaxSpecified |= getMaxNTIDz(F, Maxntidz); - - if (MaxSpecified) - O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz - << "\n"; + unsigned maxntidx, maxntidy, maxntidz; + specified = false; + if (!getMaxNTIDx(F, maxntidx)) + maxntidx = 1; + else + specified = true; + if (!getMaxNTIDy(F, maxntidy)) + maxntidy = 1; + else + specified = true; + if (!getMaxNTIDz(F, maxntidz)) + maxntidz = 1; + else + specified = true; - unsigned Mincta = 0; - if (getMinCTASm(F, Mincta)) - O << ".minnctapersm " << Mincta << "\n"; + if (specified) + O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz + << "\n"; - unsigned Maxnreg = 0; - if (getMaxNReg(F, Maxnreg)) - O << ".maxnreg " << Maxnreg << "\n"; + unsigned mincta; + if (getMinCTASm(F, mincta)) + O << ".minnctapersm " << mincta << "\n"; - // .maxclusterrank directive requires SM_90 or higher, make sure that we - // filter it out for lower SM versions, as it causes a hard ptxas crash. - const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); - const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl()); - unsigned Maxclusterrank = 0; - if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90) - O << ".maxclusterrank " << Maxclusterrank << "\n"; + unsigned maxnreg; + if (getMaxNReg(F, maxnreg)) + O << ".maxnreg " << maxnreg << "\n"; } -std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { +std::string +NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { const TargetRegisterClass *RC = MRI->getRegClass(Reg); std::string Name; diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 35302889095f862..c3737f9fcca82a6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -262,10 +262,6 @@ bool getMaxNTIDz(const Function &F, unsigned &z) { return findOneNVVMAnnotation(&F, "maxntidz", z); } -bool getMaxClusterRank(const Function &F, unsigned &x) { - return findOneNVVMAnnotation(&F, "maxclusterrank", x); -} - bool getReqNTIDx(const Function &F, unsigned &x) { return findOneNVVMAnnotation(&F, "reqntidx", x); } diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h index 449973bb53de75c..521f8198911f29e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -55,7 +55,6 @@ bool getReqNTIDx(const Function &, unsigned &); bool getReqNTIDy(const Function &, unsigned &); bool getReqNTIDz(const Function &, unsigned &); -bool getMaxClusterRank(const Function &, unsigned &); bool getMinCTASm(const Function &, unsigned &); bool getMaxNReg(const Function &, unsigned &); bool isKernelFunction(const Function &); diff --git a/llvm/test/CodeGen/NVPTX/maxclusterrank.ll b/llvm/test/CodeGen/NVPTX/maxclusterrank.ll deleted file mode 100644 index 828dd5e4cc400c6..000000000000000 --- a/llvm/test/CodeGen/NVPTX/maxclusterrank.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s --check-prefixes=CHECK,CHECK_SM_90 -; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK,CHECK_SM_80 - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" -target triple = "nvptx64-unknown-unknown" - -; CHECK: .maxntid 128, 1, 1 -; CHECK: .minnctapersm 2 -; CHECK_SM_90: .maxclusterrank 8 -; CHECK_SM_80-NOT: .maxclusterrank 8 - -; Make sure that for SM version prior to 90 `.maxclusterrank` directive is -; sielently ignored. -define dso_local void @_Z18TestMaxClusterRankv() { -entry: - %a = alloca i32, align 4 - store volatile i32 1, ptr %a, align 4 - ret void -} - -!nvvm.annotations = !{!0, !1, !2, !3} - -!0 = !{ptr @_Z18TestMaxClusterRankv, !"kernel", i32 1} -!1 = !{ptr @_Z18TestMaxClusterRankv, !"maxntidx", i32 128} -!2 = !{ptr @_Z18TestMaxClusterRankv, !"minctasm", i32 2} -!3 = !{ptr @_Z18TestMaxClusterRankv, !"maxclusterrank", i32 8} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits