llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Sergey Kozub (sergey-kozub) <details> <summary>Changes</summary> CUDA 12.8 supports PTX 8.6 which enables architecture "sm100a" (supports Blackwell-specific instructions). CUDA 12.7 technically does not exist, map it to PTX 8.5 (same as 12.6). --- Full diff: https://github.com/llvm/llvm-project/pull/123398.diff 7 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsNVPTX.def (+4-1) - (modified) clang/include/clang/Basic/Cuda.h (+5-1) - (modified) clang/lib/Basic/Cuda.cpp (+6-2) - (modified) clang/lib/Basic/Targets/NVPTX.cpp (+3) - (modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (+1) - (modified) clang/lib/Driver/ToolChains/Cuda.cpp (+9) - (modified) llvm/lib/Target/NVPTX/NVPTX.td (+2) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 969dd9e41ebfa3..37b4e6ff77fda6 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -28,7 +28,9 @@ #pragma push_macro("SM_90") #pragma push_macro("SM_90a") #pragma push_macro("SM_100") -#define SM_100 "sm_100" +#pragma push_macro("SM_100a") +#define SM_100a "sm_100a" +#define SM_100 "sm_100|" SM_100a #define SM_90a "sm_90a" #define SM_90 "sm_90|" SM_90a "|" SM_100 #define SM_89 "sm_89|" SM_90 @@ -1091,6 +1093,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("SM_90") #pragma pop_macro("SM_90a") #pragma pop_macro("SM_100") +#pragma pop_macro("SM_100a") #pragma pop_macro("PTX42") #pragma pop_macro("PTX60") #pragma pop_macro("PTX61") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index c2a4addf488df1..1cdfc8178db843 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -44,9 +44,12 @@ enum class CudaVersion { CUDA_124, CUDA_125, CUDA_126, + CUDA_127, + CUDA_128, + CUDA_129, FULLY_SUPPORTED = CUDA_123, PARTIALLY_SUPPORTED = - CUDA_126, // Partially supported. Proceed with a warning. + CUDA_129, // Partially supported. Proceed with a warning. NEW = 10000, // Too new. Issue a warning, but allow using it. }; const char *CudaVersionToString(CudaVersion V); @@ -80,6 +83,7 @@ enum class OffloadArch { SM_90, SM_90a, SM_100, + SM_100a, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index d56609a2a8f24a..692ab7c319d8bd 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -44,6 +44,9 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = { CUDA_ENTRY(12, 4), CUDA_ENTRY(12, 5), CUDA_ENTRY(12, 6), + CUDA_ENTRY(12, 7), + CUDA_ENTRY(12, 8), + CUDA_ENTRY(12, 9), {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())}, {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. }; @@ -98,6 +101,7 @@ static const OffloadArchToStringMap arch_names[] = { SM(90), // Hopper SM(90a), // Hopper SM(100), // Blackwell + SM(100a), // Blackwell GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -227,8 +231,8 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) { case OffloadArch::SM_90a: return CudaVersion::CUDA_120; case OffloadArch::SM_100: - return CudaVersion::NEW; // TODO: use specific CUDA version once it's - // public. + case OffloadArch::SM_100a: + return CudaVersion::CUDA_128; default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index dbc3fec3657610..56efad90cb7c84 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -285,6 +285,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::SM_90a: return "900"; case OffloadArch::SM_100: + case OffloadArch::SM_100a: return "1000"; } llvm_unreachable("unhandled OffloadArch"); @@ -292,6 +293,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); if (GPU == OffloadArch::SM_90a) Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); + if (GPU == OffloadArch::SM_100a) + Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1"); } } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 87c3635ed3f70e..c13928f61a7481 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2277,6 +2277,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::SM_90: case OffloadArch::SM_90a: case OffloadArch::SM_100: + case OffloadArch::SM_100a: case OffloadArch::GFX600: case OffloadArch::GFX601: case OffloadArch::GFX602: diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 8967115bcc73d9..8cb82fe4c07d3d 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -89,6 +89,12 @@ CudaVersion getCudaVersion(uint32_t raw_version) { return CudaVersion::CUDA_125; if (raw_version < 12070) return CudaVersion::CUDA_126; + if (raw_version < 12080) + return CudaVersion::CUDA_127; + if (raw_version < 12090) + return CudaVersion::CUDA_128; + if (raw_version < 12100) + return CudaVersion::CUDA_129; return CudaVersion::NEW; } @@ -682,6 +688,9 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, case CudaVersion::CUDA_##CUDA_VER: \ PtxFeature = "+ptx" #PTX_VER; \ break; + CASE_CUDA_VERSION(129, 86); + CASE_CUDA_VERSION(128, 86); + CASE_CUDA_VERSION(127, 85); CASE_CUDA_VERSION(126, 85); CASE_CUDA_VERSION(125, 85); CASE_CUDA_VERSION(124, 84); diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index 9af8715ef52ae7..3ca8b4d294079c 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -39,6 +39,7 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>; def SM90a: FeatureSM<"90a", 901>; +def SM100a: FeatureSM<"100a", 1001>; foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, 70, 71, 72, 73, 74, 75, 76, 77, 78, @@ -74,6 +75,7 @@ def : Proc<"sm_89", [SM89, PTX78]>; def : Proc<"sm_90", [SM90, PTX78]>; def : Proc<"sm_90a", [SM90a, PTX80]>; def : Proc<"sm_100", [SM100, PTX86]>; +def : Proc<"sm_100a", [SM100a, PTX86]>; def NVPTXInstrInfo : InstrInfo { } `````````` </details> https://github.com/llvm/llvm-project/pull/123398 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits