Author: Artem Belevich Date: 2021-02-09T11:01:10-08:00 New Revision: 2aa01ccec30109fbcc65934c5d7c8907793e0660
URL: https://github.com/llvm/llvm-project/commit/2aa01ccec30109fbcc65934c5d7c8907793e0660 DIFF: https://github.com/llvm/llvm-project/commit/2aa01ccec30109fbcc65934c5d7c8907793e0660.diff LOG: [CUDA, NVPTX] Allow targeting sm_86 GPUs. The patch only plumbs through the option necessary for targeting sm_86 GPUs w/o adding any new functionality. Differential Revision: https://reviews.llvm.org/D95974 Added: Modified: clang/include/clang/Basic/BuiltinsNVPTX.def clang/include/clang/Basic/Cuda.h clang/lib/Basic/Cuda.cpp clang/lib/Basic/Targets/NVPTX.cpp clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp clang/lib/Driver/ToolChains/Cuda.cpp llvm/lib/Target/NVPTX/NVPTX.td Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index d149fa0127b9..44a5e4ae01c1 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -21,7 +21,9 @@ #pragma push_macro("SM_72") #pragma push_macro("SM_75") #pragma push_macro("SM_80") -#define SM_80 "sm_80" +#pragma push_macro("SM_86") +#define SM_86 "sm_86" +#define SM_80 "sm_80|" SM_86 #define SM_75 "sm_75|" SM_80 #define SM_72 "sm_72|" SM_75 #define SM_70 "sm_70|" SM_72 @@ -35,7 +37,9 @@ #pragma push_macro("PTX64") #pragma push_macro("PTX65") #pragma push_macro("PTX70") -#define PTX70 "ptx70" +#pragma push_macro("PTX71") +#define PTX71 "ptx71" +#define PTX70 "ptx70|" PTX71 #define PTX65 "ptx65|" PTX70 #define PTX64 "ptx64|" PTX65 #define PTX63 "ptx63|" PTX64 @@ -728,9 +732,11 @@ TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63)) #pragma pop_macro("SM_72") #pragma pop_macro("SM_75") #pragma pop_macro("SM_80") +#pragma pop_macro("SM_86") #pragma pop_macro("PTX60") #pragma pop_macro("PTX61") #pragma pop_macro("PTX63") #pragma pop_macro("PTX64") #pragma pop_macro("PTX65") #pragma pop_macro("PTX70") +#pragma pop_macro("PTX71") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index b3a2e99fe931..12ffa3e04fb8 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -29,7 +29,9 @@ enum class CudaVersion { CUDA_101, CUDA_102, CUDA_110, - LATEST = CUDA_110, + CUDA_111, + CUDA_112, + LATEST = CUDA_112, LATEST_SUPPORTED = CUDA_101, }; const char *CudaVersionToString(CudaVersion V); @@ -55,6 +57,7 @@ enum class CudaArch { SM_72, SM_75, SM_80, + SM_86, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 144113f2d2e7..22eea1fb9cf6 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -32,6 +32,10 @@ const char *CudaVersionToString(CudaVersion V) { return "10.2"; case CudaVersion::CUDA_110: return "11.0"; + case CudaVersion::CUDA_111: + return "11.1"; + case CudaVersion::CUDA_112: + return "11.2"; } llvm_unreachable("invalid enum"); } @@ -48,6 +52,8 @@ CudaVersion CudaStringToVersion(const llvm::Twine &S) { .Case("10.1", CudaVersion::CUDA_101) .Case("10.2", CudaVersion::CUDA_102) .Case("11.0", CudaVersion::CUDA_110) + .Case("11.1", CudaVersion::CUDA_111) + .Case("11.2", CudaVersion::CUDA_112) .Default(CudaVersion::UNKNOWN); } @@ -71,7 +77,7 @@ CudaArchToStringMap arch_names[] = { SM(60), SM(61), SM(62), // Pascal SM(70), SM(72), // Volta SM(75), // Turing - SM(80), // Ampere + SM(80), SM(86), // Ampere GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -164,6 +170,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) { return CudaVersion::CUDA_100; case CudaArch::SM_80: return CudaVersion::CUDA_110; + case CudaArch::SM_86: + return CudaVersion::CUDA_111; default: llvm_unreachable("invalid enum"); } @@ -209,6 +217,10 @@ CudaVersion ToCudaVersion(llvm::VersionTuple Version) { return CudaVersion::CUDA_102; case 110: return CudaVersion::CUDA_110; + case 111: + return CudaVersion::CUDA_111; + case 112: + return CudaVersion::CUDA_112; default: return CudaVersion::UNKNOWN; } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index b7f0dce33d2b..da8a578fa557 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -45,6 +45,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, if (!Feature.startswith("+ptx")) continue; PTXVersion = llvm::StringSwitch<unsigned>(Feature) + .Case("+ptx72", 72) + .Case("+ptx71", 71) .Case("+ptx70", 70) .Case("+ptx65", 65) .Case("+ptx64", 64) @@ -246,6 +248,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, return "750"; case CudaArch::SM_80: return "800"; + case CudaArch::SM_86: + return "860"; } llvm_unreachable("unhandled CudaArch"); }(); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 9c8c7b83d1d2..75be9372716f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -4613,6 +4613,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::SM_72: case CudaArch::SM_75: case CudaArch::SM_80: + case CudaArch::SM_86: case CudaArch::GFX600: case CudaArch::GFX601: case CudaArch::GFX602: @@ -4680,6 +4681,7 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) { case CudaArch::SM_72: case CudaArch::SM_75: case CudaArch::SM_80: + case CudaArch::SM_86: return {84, 32}; case CudaArch::GFX600: case CudaArch::GFX601: diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 9d050b944b5b..d01056ba878b 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -75,6 +75,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) { return CudaVersion::CUDA_102; if (raw_version < 11010) return CudaVersion::CUDA_110; + if (raw_version < 11020) + return CudaVersion::CUDA_111; return CudaVersion::LATEST; } @@ -720,6 +722,8 @@ void CudaToolChain::addClangTargetOptions( CudaVersionStr = #CUDA_VER; \ PtxFeature = "+ptx" #PTX_VER; \ break; + CASE_CUDA_VERSION(112, 72); + CASE_CUDA_VERSION(111, 71); CASE_CUDA_VERSION(110, 70); CASE_CUDA_VERSION(102, 65); CASE_CUDA_VERSION(101, 64); diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index 2b39e9f412f7..2b0972b8531e 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -57,6 +57,8 @@ def SM75 : SubtargetFeature<"sm_75", "SmVersion", "75", "Target SM 7.5">; def SM80 : SubtargetFeature<"sm_80", "SmVersion", "80", "Target SM 8.0">; +def SM86 : SubtargetFeature<"sm_86", "SmVersion", "86", + "Target SM 8.6">; // PTX Versions def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32", @@ -83,6 +85,10 @@ def PTX65 : SubtargetFeature<"ptx65", "PTXVersion", "65", "Use PTX version 6.5">; def PTX70 : SubtargetFeature<"ptx70", "PTXVersion", "70", "Use PTX version 7.0">; +def PTX71 : SubtargetFeature<"ptx71", "PTXVersion", "71", + "Use PTX version 7.1">; +def PTX72 : SubtargetFeature<"ptx72", "PTXVersion", "72", + "Use PTX version 7.2">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -107,6 +113,7 @@ def : Proc<"sm_70", [SM70, PTX60]>; def : Proc<"sm_72", [SM72, PTX61]>; def : Proc<"sm_75", [SM75, PTX63]>; def : Proc<"sm_80", [SM80, PTX70]>; +def : Proc<"sm_86", [SM86, PTX71]>; def NVPTXInstrInfo : InstrInfo { } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits