https://github.com/jzc updated https://github.com/llvm/llvm-project/pull/133194
>From 8f22fbe1f6272beec61e62bfae72832d75b4f25b Mon Sep 17 00:00:00 2001 From: "Cai, Justin" <justin....@intel.com> Date: Fri, 14 Feb 2025 21:16:27 +0000 Subject: [PATCH 1/5] [SYCL] Add support AOT compilation support for Intel GPUs in clang-sycl-linker --- clang/include/clang/Basic/SYCL.h | 131 ++++++++++ clang/lib/Basic/CMakeLists.txt | 1 + clang/lib/Basic/SYCL.cpp | 226 ++++++++++++++++++ clang/test/Driver/clang-sycl-linker-test.cpp | 36 +++ .../clang-sycl-linker/ClangSYCLLinker.cpp | 112 ++++++++- clang/tools/clang-sycl-linker/SYCLLinkOpts.td | 8 + 6 files changed, 506 insertions(+), 8 deletions(-) create mode 100644 clang/include/clang/Basic/SYCL.h create mode 100644 clang/lib/Basic/SYCL.cpp diff --git a/clang/include/clang/Basic/SYCL.h b/clang/include/clang/Basic/SYCL.h new file mode 100644 index 0000000000000..c7cad37639b91 --- /dev/null +++ b/clang/include/clang/Basic/SYCL.h @@ -0,0 +1,131 @@ +//===--- SYCL.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_SYCL_H +#define LLVM_CLANG_BASIC_SYCL_H + +#include "clang/Basic/Cuda.h" + +namespace llvm { +class StringRef; +template <unsigned InternalLen> class SmallString; +} // namespace llvm + +namespace clang { +// List of architectures (Intel CPUs and Intel GPUs) +// that support SYCL offloading. +enum class SYCLSupportedIntelArchs { + // Intel CPUs + UNKNOWN, + SKYLAKEAVX512, + COREAVX2, + COREI7AVX, + COREI7, + WESTMERE, + SANDYBRIDGE, + IVYBRIDGE, + BROADWELL, + COFFEELAKE, + ALDERLAKE, + SKYLAKE, + SKX, + CASCADELAKE, + ICELAKECLIENT, + ICELAKESERVER, + SAPPHIRERAPIDS, + GRANITERAPIDS, + // Intel GPUs + BDW, + SKL, + KBL, + CFL, + APL, + BXT, + GLK, + WHL, + AML, + CML, + ICLLP, + ICL, + EHL, + JSL, + TGLLP, + TGL, + RKL, + ADL_S, + RPL_S, + ADL_P, + ADL_N, + DG1, + ACM_G10, + DG2_G10, + ACM_G11, + DG2_G11, + ACM_G12, + DG2_G12, + PVC, + PVC_VG, + MTL_U, + MTL_S, + ARL_U, + ARL_S, + MTL_H, + ARL_H, + BMG_G21, + LNL_M, +}; + +// Check if the given Arch value is a Generic AMD GPU. +// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading. +// This list is used to filter out GFX*_GENERIC AMD GPUs in +// `IsSYCLSupportedAMDGPUArch`. +static inline bool IsAMDGenericGPUArch(OffloadArch Arch) { + return Arch == OffloadArch::GFX9_GENERIC || + Arch == OffloadArch::GFX10_1_GENERIC || + Arch == OffloadArch::GFX10_3_GENERIC || + Arch == OffloadArch::GFX11_GENERIC || + Arch == OffloadArch::GFX12_GENERIC; +} + +// Check if the given Arch value is a valid SYCL supported AMD GPU. +static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV && + !IsAMDGenericGPUArch(Arch); +} + +// Check if the given Arch value is a valid SYCL supported NVidia GPU. +static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a; +} + +// Check if the given Arch value is a valid SYCL supported Intel CPU. +static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) { + return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 && + Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS; +} + +// Check if the given Arch value is a valid SYCL supported Intel GPU. +static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) { + return Arch >= SYCLSupportedIntelArchs::BDW && + Arch <= SYCLSupportedIntelArchs::LNL_M; +} + +// Check if the user provided value for --offload-arch is a valid +// SYCL supported Intel AOT target. +SYCLSupportedIntelArchs +StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString); + +// This is a mapping between the user provided --offload-arch value for Intel +// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU +// AOT compiler). +llvm::StringRef mapIntelGPUArchName(llvm::StringRef ArchName); +llvm::SmallString<64> getGenDeviceMacro(llvm::StringRef DeviceName); + +} // namespace clang + +#endif // LLVM_CLANG_BASIC_SYCL_H diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 331dfbb3f4b67..be6d915e01b0a 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -90,6 +90,7 @@ add_clang_library(clangBasic SourceMgrAdapter.cpp Stack.cpp StackExhaustionHandler.cpp + SYCL.cpp TargetID.cpp TargetInfo.cpp Targets.cpp diff --git a/clang/lib/Basic/SYCL.cpp b/clang/lib/Basic/SYCL.cpp new file mode 100644 index 0000000000000..9ac5470cdbe5a --- /dev/null +++ b/clang/lib/Basic/SYCL.cpp @@ -0,0 +1,226 @@ +#include "clang/Basic/SYCL.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace llvm; + +namespace clang { + +// Struct that relates an AOT target value with +// Intel CPUs and Intel GPUs. +struct StringToOffloadArchSYCLMap { + const char *ArchName; + SYCLSupportedIntelArchs IntelArch; +}; + +// Mapping of supported SYCL offloading architectures. +static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = { + // Intel CPU mapping. + {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512}, + {"core-avx2", SYCLSupportedIntelArchs::COREAVX2}, + {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX}, + {"corei7", SYCLSupportedIntelArchs::COREI7}, + {"westmere", SYCLSupportedIntelArchs::WESTMERE}, + {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE}, + {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE}, + {"broadwell", SYCLSupportedIntelArchs::BROADWELL}, + {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE}, + {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE}, + {"skylake", SYCLSupportedIntelArchs::SKYLAKE}, + {"skx", SYCLSupportedIntelArchs::SKX}, + {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE}, + {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT}, + {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER}, + {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS}, + {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS}, + // Intel GPU mapping. + {"bdw", SYCLSupportedIntelArchs::BDW}, + {"skl", SYCLSupportedIntelArchs::SKL}, + {"kbl", SYCLSupportedIntelArchs::KBL}, + {"cfl", SYCLSupportedIntelArchs::CFL}, + {"apl", SYCLSupportedIntelArchs::APL}, + {"bxt", SYCLSupportedIntelArchs::BXT}, + {"glk", SYCLSupportedIntelArchs::GLK}, + {"whl", SYCLSupportedIntelArchs::WHL}, + {"aml", SYCLSupportedIntelArchs::AML}, + {"cml", SYCLSupportedIntelArchs::CML}, + {"icllp", SYCLSupportedIntelArchs::ICLLP}, + {"icl", SYCLSupportedIntelArchs::ICL}, + {"ehl", SYCLSupportedIntelArchs::EHL}, + {"jsl", SYCLSupportedIntelArchs::JSL}, + {"tgllp", SYCLSupportedIntelArchs::TGLLP}, + {"tgl", SYCLSupportedIntelArchs::TGL}, + {"rkl", SYCLSupportedIntelArchs::RKL}, + {"adl_s", SYCLSupportedIntelArchs::ADL_S}, + {"rpl_s", SYCLSupportedIntelArchs::RPL_S}, + {"adl_p", SYCLSupportedIntelArchs::ADL_P}, + {"adl_n", SYCLSupportedIntelArchs::ADL_N}, + {"dg1", SYCLSupportedIntelArchs::DG1}, + {"acm_g10", SYCLSupportedIntelArchs::ACM_G10}, + {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, + {"acm_g11", SYCLSupportedIntelArchs::ACM_G11}, + {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, + {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11}, + {"acm_g12", SYCLSupportedIntelArchs::ACM_G12}, + {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12}, + {"pvc", SYCLSupportedIntelArchs::PVC}, + {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG}, + {"mtl_u", SYCLSupportedIntelArchs::MTL_U}, + {"mtl_s", SYCLSupportedIntelArchs::MTL_S}, + {"arl_u", SYCLSupportedIntelArchs::ARL_U}, + {"arl_s", SYCLSupportedIntelArchs::ARL_S}, + {"mtl_h", SYCLSupportedIntelArchs::MTL_H}, + {"arl_h", SYCLSupportedIntelArchs::ARL_H}, + {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21}, + {"lnl_m", SYCLSupportedIntelArchs::LNL_M}}; + +// Check if the user provided value for --offload-arch is a valid +// SYCL supported Intel AOT target. +SYCLSupportedIntelArchs StringToOffloadArchSYCL(StringRef ArchNameAsString) { + auto result = + llvm::find_if(StringToArchNamesMap, + [ArchNameAsString](const StringToOffloadArchSYCLMap &map) { + return ArchNameAsString == map.ArchName; + }); + if (result == std::end(StringToArchNamesMap)) + return SYCLSupportedIntelArchs::UNKNOWN; + return result->IntelArch; +} + +// This is a mapping between the user provided --offload-arch value for Intel +// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU +// AOT compiler). +StringRef mapIntelGPUArchName(StringRef ArchName) { + StringRef Arch; + Arch = llvm::StringSwitch<StringRef>(ArchName) + .Case("bdw", "bdw") + .Case("skl", "skl") + .Case("kbl", "kbl") + .Case("cfl", "cfl") + .Cases("apl", "bxt", "apl") + .Case("glk", "glk") + .Case("whl", "whl") + .Case("aml", "aml") + .Case("cml", "cml") + .Cases("icllp", "icl", "icllp") + .Cases("ehl", "jsl", "ehl") + .Cases("tgllp", "tgl", "tgllp") + .Case("rkl", "rkl") + .Cases("adl_s", "rpl_s", "adl_s") + .Case("adl_p", "adl_p") + .Case("adl_n", "adl_n") + .Case("dg1", "dg1") + .Cases("acm_g10", "dg2_g10", "acm_g10") + .Cases("acm_g11", "dg2_g11", "acm_g11") + .Cases("acm_g12", "dg2_g12", "acm_g12") + .Case("pvc", "pvc") + .Case("pvc_vg", "pvc_vg") + .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u") + .Case("mtl_h", "mtl_h") + .Case("arl_h", "arl_h") + .Case("bmg_g21", "bmg_g21") + .Case("lnl_m", "lnl_m") + .Default(""); + return Arch; +} + +SmallString<64> getGenDeviceMacro(StringRef DeviceName) { + SmallString<64> Macro; + StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName) + .Case("bdw", "INTEL_GPU_BDW") + .Case("skl", "INTEL_GPU_SKL") + .Case("kbl", "INTEL_GPU_KBL") + .Case("cfl", "INTEL_GPU_CFL") + .Case("apl", "INTEL_GPU_APL") + .Case("glk", "INTEL_GPU_GLK") + .Case("whl", "INTEL_GPU_WHL") + .Case("aml", "INTEL_GPU_AML") + .Case("cml", "INTEL_GPU_CML") + .Case("icllp", "INTEL_GPU_ICLLP") + .Case("ehl", "INTEL_GPU_EHL") + .Case("tgllp", "INTEL_GPU_TGLLP") + .Case("rkl", "INTEL_GPU_RKL") + .Case("adl_s", "INTEL_GPU_ADL_S") + .Case("adl_p", "INTEL_GPU_ADL_P") + .Case("adl_n", "INTEL_GPU_ADL_N") + .Case("dg1", "INTEL_GPU_DG1") + .Case("acm_g10", "INTEL_GPU_ACM_G10") + .Case("acm_g11", "INTEL_GPU_ACM_G11") + .Case("acm_g12", "INTEL_GPU_ACM_G12") + .Case("pvc", "INTEL_GPU_PVC") + .Case("pvc_vg", "INTEL_GPU_PVC_VG") + .Case("mtl_u", "INTEL_GPU_MTL_U") + .Case("mtl_h", "INTEL_GPU_MTL_H") + .Case("arl_h", "INTEL_GPU_ARL_H") + .Case("bmg_g21", "INTEL_GPU_BMG_G21") + .Case("lnl_m", "INTEL_GPU_LNL_M") + .Case("ptl_h", "INTEL_GPU_PTL_H") + .Case("ptl_u", "INTEL_GPU_PTL_U") + .Case("sm_50", "NVIDIA_GPU_SM_50") + .Case("sm_52", "NVIDIA_GPU_SM_52") + .Case("sm_53", "NVIDIA_GPU_SM_53") + .Case("sm_60", "NVIDIA_GPU_SM_60") + .Case("sm_61", "NVIDIA_GPU_SM_61") + .Case("sm_62", "NVIDIA_GPU_SM_62") + .Case("sm_70", "NVIDIA_GPU_SM_70") + .Case("sm_72", "NVIDIA_GPU_SM_72") + .Case("sm_75", "NVIDIA_GPU_SM_75") + .Case("sm_80", "NVIDIA_GPU_SM_80") + .Case("sm_86", "NVIDIA_GPU_SM_86") + .Case("sm_87", "NVIDIA_GPU_SM_87") + .Case("sm_89", "NVIDIA_GPU_SM_89") + .Case("sm_90", "NVIDIA_GPU_SM_90") + .Case("sm_90a", "NVIDIA_GPU_SM_90A") + .Case("gfx700", "AMD_GPU_GFX700") + .Case("gfx701", "AMD_GPU_GFX701") + .Case("gfx702", "AMD_GPU_GFX702") + .Case("gfx703", "AMD_GPU_GFX703") + .Case("gfx704", "AMD_GPU_GFX704") + .Case("gfx705", "AMD_GPU_GFX705") + .Case("gfx801", "AMD_GPU_GFX801") + .Case("gfx802", "AMD_GPU_GFX802") + .Case("gfx803", "AMD_GPU_GFX803") + .Case("gfx805", "AMD_GPU_GFX805") + .Case("gfx810", "AMD_GPU_GFX810") + .Case("gfx900", "AMD_GPU_GFX900") + .Case("gfx902", "AMD_GPU_GFX902") + .Case("gfx904", "AMD_GPU_GFX904") + .Case("gfx906", "AMD_GPU_GFX906") + .Case("gfx908", "AMD_GPU_GFX908") + .Case("gfx909", "AMD_GPU_GFX909") + .Case("gfx90a", "AMD_GPU_GFX90A") + .Case("gfx90c", "AMD_GPU_GFX90C") + .Case("gfx940", "AMD_GPU_GFX940") + .Case("gfx941", "AMD_GPU_GFX941") + .Case("gfx942", "AMD_GPU_GFX942") + .Case("gfx1010", "AMD_GPU_GFX1010") + .Case("gfx1011", "AMD_GPU_GFX1011") + .Case("gfx1012", "AMD_GPU_GFX1012") + .Case("gfx1013", "AMD_GPU_GFX1013") + .Case("gfx1030", "AMD_GPU_GFX1030") + .Case("gfx1031", "AMD_GPU_GFX1031") + .Case("gfx1032", "AMD_GPU_GFX1032") + .Case("gfx1033", "AMD_GPU_GFX1033") + .Case("gfx1034", "AMD_GPU_GFX1034") + .Case("gfx1035", "AMD_GPU_GFX1035") + .Case("gfx1036", "AMD_GPU_GFX1036") + .Case("gfx1100", "AMD_GPU_GFX1100") + .Case("gfx1101", "AMD_GPU_GFX1101") + .Case("gfx1102", "AMD_GPU_GFX1102") + .Case("gfx1103", "AMD_GPU_GFX1103") + .Case("gfx1150", "AMD_GPU_GFX1150") + .Case("gfx1151", "AMD_GPU_GFX1151") + .Case("gfx1200", "AMD_GPU_GFX1200") + .Case("gfx1201", "AMD_GPU_GFX1201") + .Default(""); + if (!Ext.empty()) { + Macro = "__SYCL_TARGET_"; + Macro += Ext; + Macro += "__"; + } + return Macro; +} + +} // namespace clang diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp b/clang/test/Driver/clang-sycl-linker-test.cpp index f358900b4fbd8..07850dc41b4fc 100644 --- a/clang/test/Driver/clang-sycl-linker-test.cpp +++ b/clang/test/Driver/clang-sycl-linker-test.cpp @@ -46,3 +46,39 @@ // RUN: clang-sycl-linker --dry-run -triple spirv64 %t_1.bc %t_2.bc -o a.spv 2>&1 \ // RUN: | FileCheck %s --check-prefix=LLVMOPTSLIN // LLVMOPTSLIN: -spirv-debug-info-version=nonsemantic-shader-200 -spirv-allow-unknown-intrinsics=llvm.genx. -spirv-ext= +// +// Test AOT compilation for an Intel GPU. +// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU +// AOT-INTEL-GPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings +// AOT-INTEL-GPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]] +// +// Test AOT compilation for an Intel GPU with additional options. +// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: --ocloc-options="-a -b" \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU-2 +// AOT-INTEL-GPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings +// AOT-INTEL-GPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-GPU-2-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc -a -b {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]] +// +// Test AOT compilation for an Intel CPU. +// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU +// AOT-INTEL-CPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings +// AOT-INTEL-CPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]] +// +// Test AOT compilation for an Intel CPU with additional options. +// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: --opencl-aot-options="-a -b" \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU-2 +// AOT-INTEL-CPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings +// AOT-INTEL-CPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-CPU-2-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]] +// +// Check that the output file must be specified. +// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc 2>& 1 \ +// RUN: | FileCheck %s --check-prefix=NOOUTPUT +// NOOUTPUT: Output file is not specified +// diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index 2bcb3757d49d0..1798907c1f3e0 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -14,6 +14,7 @@ // target-specific device code. //===---------------------------------------------------------------------===// +#include "clang/Basic/SYCL.h" #include "clang/Basic/Version.h" #include "llvm/ADT/StringExtras.h" @@ -50,6 +51,7 @@ using namespace llvm; using namespace llvm::opt; using namespace llvm::object; +using namespace clang; /// Save intermediary results. static bool SaveTemps = false; @@ -66,6 +68,8 @@ static StringRef OutputFile; /// Directory to dump SPIR-V IR if requested by user. static SmallString<128> SPIRVDumpDir; +static bool IsAOTCompileNeeded = false; + static void printVersion(raw_ostream &OS) { OS << clang::getClangToolFullVersion("clang-sycl-linker") << '\n'; } @@ -392,7 +396,15 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File, LLVMToSPIRVOptions = A->getValue(); LLVMToSPIRVOptions.split(CmdArgs, " ", /* MaxSplit = */ -1, /* KeepEmpty = */ false); - CmdArgs.append({"-o", OutputFile}); + + Expected<StringRef> OutFileOrErr = + IsAOTCompileNeeded + ? createTempFile(Args, sys::path::filename(OutputFile), "spv") + : OutputFile; + if (!OutFileOrErr) + return OutFileOrErr.takeError(); + + CmdArgs.append({"-o", *OutFileOrErr}); CmdArgs.push_back(File); if (Error Err = executeCommands(*LLVMToSPIRVProg, CmdArgs)) return std::move(Err); @@ -406,7 +418,7 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File, formatv("failed to create dump directory. path: {0}, error_code: {1}", SPIRVDumpDir, EC.value())); - StringRef Path = OutputFile; + StringRef Path = *OutFileOrErr; StringRef Filename = llvm::sys::path::filename(Path); SmallString<128> CopyPath = SPIRVDumpDir; CopyPath.append(Filename); @@ -419,7 +431,83 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File, Path, CopyPath, EC.value())); } - return OutputFile; + return *OutFileOrErr; +} + +/// Run AOT compilation for Intel CPU. +/// Calls opencl-aot tool to generate device code for Intel CPU backend. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Error runAOTCompileIntelCPU(StringRef InputFile, const ArgList &Args) { + SmallVector<StringRef, 8> CmdArgs; + Expected<std::string> OpenCLAOTPath = + findProgram(Args, "opencl-aot", {getMainExecutable("opencl-aot")}); + if (!OpenCLAOTPath) + return OpenCLAOTPath.takeError(); + + CmdArgs.push_back(*OpenCLAOTPath); + CmdArgs.push_back("--device=cpu"); + StringRef ExtraArgs = Args.getLastArgValue(OPT_opencl_aot_options_EQ); + ExtraArgs.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + CmdArgs.push_back("-o"); + CmdArgs.push_back(OutputFile); + CmdArgs.push_back(InputFile); + if (Error Err = executeCommands(*OpenCLAOTPath, CmdArgs)) + return std::move(Err); + return Error::success(); +} + +/// Run AOT compilation for Intel GPU +/// Calls ocloc tool to generate device code for Intel GPU backend. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Error runAOTCompileIntelGPU(StringRef InputFile, const ArgList &Args) { + SmallVector<StringRef, 8> CmdArgs; + Expected<std::string> OclocPath = + findProgram(Args, "ocloc", {getMainExecutable("ocloc")}); + if (!OclocPath) + return OclocPath.takeError(); + + CmdArgs.push_back(*OclocPath); + // The next line prevents ocloc from modifying the image name + CmdArgs.push_back("-output_no_suffix"); + CmdArgs.push_back("-spirv_input"); + + StringRef Arch(Args.getLastArgValue(OPT_arch)); + assert(!Arch.empty() && "Arch must be specified for AOT compilation"); + CmdArgs.push_back("-device"); + CmdArgs.push_back(Arch); + + StringRef ExtraArgs = Args.getLastArgValue(OPT_ocloc_options_EQ); + ExtraArgs.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + + CmdArgs.push_back("-output"); + CmdArgs.push_back(OutputFile); + CmdArgs.push_back("-file"); + CmdArgs.push_back(InputFile); + if (Error Err = executeCommands(*OclocPath, CmdArgs)) + return std::move(Err); + return Error::success(); +} + +/// Run AOT compilation for Intel CPU/GPU. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Error runAOTCompile(StringRef InputFile, const ArgList &Args) { + StringRef Arch = Args.getLastArgValue(OPT_arch); + SYCLSupportedIntelArchs OffloadArch = StringToOffloadArchSYCL(Arch); + if (IsSYCLSupportedIntelGPUArch(OffloadArch)) + return runAOTCompileIntelGPU(InputFile, Args); + if (IsSYCLSupportedIntelCPUArch(OffloadArch)) + return runAOTCompileIntelCPU(InputFile, Args); + + return createStringError(inconvertibleErrorCode(), "Unsupported arch"); } Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { @@ -427,17 +515,23 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { // First llvm-link step auto LinkedFile = linkDeviceInputFiles(Files, Args); if (!LinkedFile) - reportError(LinkedFile.takeError()); + return LinkedFile.takeError(); // second llvm-link step auto DeviceLinkedFile = linkDeviceLibFiles(*LinkedFile, Args); if (!DeviceLinkedFile) - reportError(DeviceLinkedFile.takeError()); + return DeviceLinkedFile.takeError(); // LLVM to SPIR-V translation step auto SPVFile = runLLVMToSPIRVTranslation(*DeviceLinkedFile, Args); if (!SPVFile) return SPVFile.takeError(); + + if (IsAOTCompileNeeded) { + if (Error Err = runAOTCompile(*SPVFile, Args)) + return Err; + } + return Error::success(); } @@ -474,9 +568,11 @@ int main(int argc, char **argv) { DryRun = Args.hasArg(OPT_dry_run); SaveTemps = Args.hasArg(OPT_save_temps); - OutputFile = "a.spv"; - if (Args.hasArg(OPT_o)) - OutputFile = Args.getLastArgValue(OPT_o); + IsAOTCompileNeeded = Args.hasArg(OPT_arch); + + if (!Args.hasArg(OPT_o)) + reportError(createStringError("Output file is not specified")); + OutputFile = Args.getLastArgValue(OPT_o); if (Args.hasArg(OPT_spirv_dump_device_code_EQ)) { Arg *A = Args.getLastArg(OPT_spirv_dump_device_code_EQ); diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td index 959fd6c3e867c..abcacc9daed6e 100644 --- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td +++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td @@ -50,3 +50,11 @@ def llvm_spirv_path_EQ : Joined<["--"], "llvm-spirv-path=">, def llvm_spirv_options_EQ : Joined<["--", "-"], "llvm-spirv-options=">, Flags<[LinkerOnlyOption]>, HelpText<"Options that will control llvm-spirv step">; + +def ocloc_options_EQ : Joined<["--", "-"], "ocloc-options=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Options passed to ocloc for Intel GPU AOT compilation">; + +def opencl_aot_options_EQ : Joined<["--", "-"], "opencl-aot-options=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Options passed to opencl-aot for CPU AOT compilation">; >From abf2b4be9d2f093f5c2e23f2b359836cf6824a64 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" <justin....@intel.com> Date: Wed, 2 Apr 2025 20:23:24 +0000 Subject: [PATCH 2/5] Return error instead of assert --- clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index be855c124e466..23c14c8f07200 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -430,7 +430,9 @@ static Error runAOTCompileIntelGPU(StringRef InputFile, const ArgList &Args) { CmdArgs.push_back("-spirv_input"); StringRef Arch(Args.getLastArgValue(OPT_arch_EQ)); - assert(!Arch.empty() && "Arch must be specified for AOT compilation"); + if (Arch.empty()) + return createStringError(inconvertibleErrorCode(), + "Arch must be specified for AOT compilation"); CmdArgs.push_back("-device"); CmdArgs.push_back(Arch); >From ff997fab0f621d1bf195d6a854c9e2c59be5fd5a Mon Sep 17 00:00:00 2001 From: "Cai, Justin" <justin....@intel.com> Date: Thu, 3 Apr 2025 07:34:28 +0000 Subject: [PATCH 3/5] Add Intel arches to OffloadArch enum --- clang/include/clang/Basic/Cuda.h | 66 +++++ clang/include/clang/Basic/SYCL.h | 131 ---------- clang/lib/Basic/CMakeLists.txt | 1 - clang/lib/Basic/Cuda.cpp | 59 +++++ clang/lib/Basic/SYCL.cpp | 226 ------------------ clang/lib/Basic/Targets/NVPTX.cpp | 55 +++++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 55 +++++ .../clang-sycl-linker/ClangSYCLLinker.cpp | 8 +- 8 files changed, 239 insertions(+), 362 deletions(-) delete mode 100644 clang/include/clang/Basic/SYCL.h delete mode 100644 clang/lib/Basic/SYCL.cpp diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index c4eb7b7cac1d6..be8922be5167f 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -140,6 +140,63 @@ enum class OffloadArch { AMDGCNSPIRV, Generic, // A processor model named 'generic' if the target backend defines a // public one. + // Intel CPUs + SKYLAKEAVX512, + COREAVX2, + COREI7AVX, + COREI7, + WESTMERE, + SANDYBRIDGE, + IVYBRIDGE, + BROADWELL, + COFFEELAKE, + ALDERLAKE, + SKYLAKE, + SKX, + CASCADELAKE, + ICELAKECLIENT, + ICELAKESERVER, + SAPPHIRERAPIDS, + GRANITERAPIDS, + // Intel GPUs + BDW, + SKL, + KBL, + CFL, + APL, + BXT, + GLK, + WHL, + AML, + CML, + ICLLP, + ICL, + EHL, + JSL, + TGLLP, + TGL, + RKL, + ADL_S, + RPL_S, + ADL_P, + ADL_N, + DG1, + ACM_G10, + DG2_G10, + ACM_G11, + DG2_G11, + ACM_G12, + DG2_G12, + PVC, + PVC_VG, + MTL_U, + MTL_S, + ARL_U, + ARL_S, + MTL_H, + ARL_H, + BMG_G21, + LNL_M, LAST, CudaDefault = OffloadArch::SM_52, @@ -163,6 +220,15 @@ static inline bool IsAMDOffloadArch(OffloadArch A) { return A >= OffloadArch::GFX600 && A < OffloadArch::Generic; } +static inline bool IsIntelCPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::SKYLAKEAVX512 && + Arch <= OffloadArch::GRANITERAPIDS; +} + +static inline bool IsIntelGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::BDW && Arch <= OffloadArch::LNL_M; +} + const char *OffloadArchToString(OffloadArch A); const char *OffloadArchToVirtualArchString(OffloadArch A); diff --git a/clang/include/clang/Basic/SYCL.h b/clang/include/clang/Basic/SYCL.h deleted file mode 100644 index c7cad37639b91..0000000000000 --- a/clang/include/clang/Basic/SYCL.h +++ /dev/null @@ -1,131 +0,0 @@ -//===--- SYCL.h -------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_BASIC_SYCL_H -#define LLVM_CLANG_BASIC_SYCL_H - -#include "clang/Basic/Cuda.h" - -namespace llvm { -class StringRef; -template <unsigned InternalLen> class SmallString; -} // namespace llvm - -namespace clang { -// List of architectures (Intel CPUs and Intel GPUs) -// that support SYCL offloading. -enum class SYCLSupportedIntelArchs { - // Intel CPUs - UNKNOWN, - SKYLAKEAVX512, - COREAVX2, - COREI7AVX, - COREI7, - WESTMERE, - SANDYBRIDGE, - IVYBRIDGE, - BROADWELL, - COFFEELAKE, - ALDERLAKE, - SKYLAKE, - SKX, - CASCADELAKE, - ICELAKECLIENT, - ICELAKESERVER, - SAPPHIRERAPIDS, - GRANITERAPIDS, - // Intel GPUs - BDW, - SKL, - KBL, - CFL, - APL, - BXT, - GLK, - WHL, - AML, - CML, - ICLLP, - ICL, - EHL, - JSL, - TGLLP, - TGL, - RKL, - ADL_S, - RPL_S, - ADL_P, - ADL_N, - DG1, - ACM_G10, - DG2_G10, - ACM_G11, - DG2_G11, - ACM_G12, - DG2_G12, - PVC, - PVC_VG, - MTL_U, - MTL_S, - ARL_U, - ARL_S, - MTL_H, - ARL_H, - BMG_G21, - LNL_M, -}; - -// Check if the given Arch value is a Generic AMD GPU. -// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading. -// This list is used to filter out GFX*_GENERIC AMD GPUs in -// `IsSYCLSupportedAMDGPUArch`. -static inline bool IsAMDGenericGPUArch(OffloadArch Arch) { - return Arch == OffloadArch::GFX9_GENERIC || - Arch == OffloadArch::GFX10_1_GENERIC || - Arch == OffloadArch::GFX10_3_GENERIC || - Arch == OffloadArch::GFX11_GENERIC || - Arch == OffloadArch::GFX12_GENERIC; -} - -// Check if the given Arch value is a valid SYCL supported AMD GPU. -static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) { - return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV && - !IsAMDGenericGPUArch(Arch); -} - -// Check if the given Arch value is a valid SYCL supported NVidia GPU. -static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) { - return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a; -} - -// Check if the given Arch value is a valid SYCL supported Intel CPU. -static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) { - return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 && - Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS; -} - -// Check if the given Arch value is a valid SYCL supported Intel GPU. -static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) { - return Arch >= SYCLSupportedIntelArchs::BDW && - Arch <= SYCLSupportedIntelArchs::LNL_M; -} - -// Check if the user provided value for --offload-arch is a valid -// SYCL supported Intel AOT target. -SYCLSupportedIntelArchs -StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString); - -// This is a mapping between the user provided --offload-arch value for Intel -// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU -// AOT compiler). -llvm::StringRef mapIntelGPUArchName(llvm::StringRef ArchName); -llvm::SmallString<64> getGenDeviceMacro(llvm::StringRef DeviceName); - -} // namespace clang - -#endif // LLVM_CLANG_BASIC_SYCL_H diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index be6d915e01b0a..331dfbb3f4b67 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -90,7 +90,6 @@ add_clang_library(clangBasic SourceMgrAdapter.cpp Stack.cpp StackExhaustionHandler.cpp - SYCL.cpp TargetID.cpp TargetInfo.cpp Targets.cpp diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 68d042eca2492..f1015c47f314f 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -84,6 +84,7 @@ struct OffloadArchToStringMap { #define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} #define SM(sm) SM2(sm, "compute_" #sm) #define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} +#define INTEL(name, value) {OffloadArch::value, #name, ""} static const OffloadArchToStringMap arch_names[] = { // clang-format off {OffloadArch::UNUSED, "", ""}, @@ -156,12 +157,70 @@ static const OffloadArchToStringMap arch_names[] = { GFX(1200), // gfx1200 GFX(1201), // gfx1201 {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, + // Intel CPUs + INTEL(skylake-avx512, SKYLAKEAVX512), + INTEL(core-avx2, COREAVX2), + INTEL(corei7-avx, COREI7AVX), + INTEL(corei7, COREI7), + INTEL(westmere, WESTMERE), + INTEL(sandybridge, SANDYBRIDGE), + INTEL(ivybridge, IVYBRIDGE), + INTEL(broadwell, BROADWELL), + INTEL(coffeelake, COFFEELAKE), + INTEL(alderlake, ALDERLAKE), + INTEL(skylake, SKYLAKE), + INTEL(skx, SKX), + INTEL(cascadelake, CASCADELAKE), + INTEL(icelake-client, ICELAKECLIENT), + INTEL(icelakeserver, ICELAKESERVER), + INTEL(sapphirerapids, SAPPHIRERAPIDS), + INTEL(graniterapids, GRANITERAPIDS), + // Intel GPUs + INTEL(bdw, BDW), + INTEL(skl, SKL), + INTEL(kbl, KBL), + INTEL(cfl, CFL), + INTEL(apl, APL), + INTEL(bxt, BXT), + INTEL(glk, GLK), + INTEL(whl, WHL), + INTEL(aml, AML), + INTEL(cml, CML), + INTEL(icllp, ICLLP), + INTEL(icl, ICL), + INTEL(ehl, EHL), + INTEL(jsl, JSL), + INTEL(tgllp, TGLLP), + INTEL(tgl, TGL), + INTEL(rkl, RKL), + INTEL(adl_s, ADL_S), + INTEL(rpl_s, RPL_S), + INTEL(adl_p, ADL_P), + INTEL(adl_n, ADL_N), + INTEL(dg1, DG1), + INTEL(acm_g10, ACM_G10), + INTEL(dg2_g10, DG2_G10), + INTEL(acm_g11, ACM_G11), + INTEL(dg2_g11, DG2_G11), + INTEL(acm_g12, ACM_G12), + INTEL(dg2_g12, DG2_G12), + INTEL(pvc, PVC), + INTEL(pvc_vg, PVC_VG), + INTEL(mtl_u, MTL_U), + INTEL(mtl_s, MTL_S), + INTEL(arl_u, ARL_U), + INTEL(arl_s, ARL_S), + INTEL(mtl_h, MTL_H), + INTEL(arl_h, ARL_H), + INTEL(bmg_g21, BMG_G21), + INTEL(lnl_m, LNL_M), {OffloadArch::Generic, "generic", ""}, // clang-format on }; #undef SM #undef SM2 #undef GFX +#undef INTEL const char *OffloadArchToString(OffloadArch A) { auto result = std::find_if( diff --git a/clang/lib/Basic/SYCL.cpp b/clang/lib/Basic/SYCL.cpp deleted file mode 100644 index 9ac5470cdbe5a..0000000000000 --- a/clang/lib/Basic/SYCL.cpp +++ /dev/null @@ -1,226 +0,0 @@ -#include "clang/Basic/SYCL.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" - -using namespace llvm; - -namespace clang { - -// Struct that relates an AOT target value with -// Intel CPUs and Intel GPUs. -struct StringToOffloadArchSYCLMap { - const char *ArchName; - SYCLSupportedIntelArchs IntelArch; -}; - -// Mapping of supported SYCL offloading architectures. -static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = { - // Intel CPU mapping. - {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512}, - {"core-avx2", SYCLSupportedIntelArchs::COREAVX2}, - {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX}, - {"corei7", SYCLSupportedIntelArchs::COREI7}, - {"westmere", SYCLSupportedIntelArchs::WESTMERE}, - {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE}, - {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE}, - {"broadwell", SYCLSupportedIntelArchs::BROADWELL}, - {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE}, - {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE}, - {"skylake", SYCLSupportedIntelArchs::SKYLAKE}, - {"skx", SYCLSupportedIntelArchs::SKX}, - {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE}, - {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT}, - {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER}, - {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS}, - {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS}, - // Intel GPU mapping. - {"bdw", SYCLSupportedIntelArchs::BDW}, - {"skl", SYCLSupportedIntelArchs::SKL}, - {"kbl", SYCLSupportedIntelArchs::KBL}, - {"cfl", SYCLSupportedIntelArchs::CFL}, - {"apl", SYCLSupportedIntelArchs::APL}, - {"bxt", SYCLSupportedIntelArchs::BXT}, - {"glk", SYCLSupportedIntelArchs::GLK}, - {"whl", SYCLSupportedIntelArchs::WHL}, - {"aml", SYCLSupportedIntelArchs::AML}, - {"cml", SYCLSupportedIntelArchs::CML}, - {"icllp", SYCLSupportedIntelArchs::ICLLP}, - {"icl", SYCLSupportedIntelArchs::ICL}, - {"ehl", SYCLSupportedIntelArchs::EHL}, - {"jsl", SYCLSupportedIntelArchs::JSL}, - {"tgllp", SYCLSupportedIntelArchs::TGLLP}, - {"tgl", SYCLSupportedIntelArchs::TGL}, - {"rkl", SYCLSupportedIntelArchs::RKL}, - {"adl_s", SYCLSupportedIntelArchs::ADL_S}, - {"rpl_s", SYCLSupportedIntelArchs::RPL_S}, - {"adl_p", SYCLSupportedIntelArchs::ADL_P}, - {"adl_n", SYCLSupportedIntelArchs::ADL_N}, - {"dg1", SYCLSupportedIntelArchs::DG1}, - {"acm_g10", SYCLSupportedIntelArchs::ACM_G10}, - {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, - {"acm_g11", SYCLSupportedIntelArchs::ACM_G11}, - {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, - {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11}, - {"acm_g12", SYCLSupportedIntelArchs::ACM_G12}, - {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12}, - {"pvc", SYCLSupportedIntelArchs::PVC}, - {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG}, - {"mtl_u", SYCLSupportedIntelArchs::MTL_U}, - {"mtl_s", SYCLSupportedIntelArchs::MTL_S}, - {"arl_u", SYCLSupportedIntelArchs::ARL_U}, - {"arl_s", SYCLSupportedIntelArchs::ARL_S}, - {"mtl_h", SYCLSupportedIntelArchs::MTL_H}, - {"arl_h", SYCLSupportedIntelArchs::ARL_H}, - {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21}, - {"lnl_m", SYCLSupportedIntelArchs::LNL_M}}; - -// Check if the user provided value for --offload-arch is a valid -// SYCL supported Intel AOT target. -SYCLSupportedIntelArchs StringToOffloadArchSYCL(StringRef ArchNameAsString) { - auto result = - llvm::find_if(StringToArchNamesMap, - [ArchNameAsString](const StringToOffloadArchSYCLMap &map) { - return ArchNameAsString == map.ArchName; - }); - if (result == std::end(StringToArchNamesMap)) - return SYCLSupportedIntelArchs::UNKNOWN; - return result->IntelArch; -} - -// This is a mapping between the user provided --offload-arch value for Intel -// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU -// AOT compiler). -StringRef mapIntelGPUArchName(StringRef ArchName) { - StringRef Arch; - Arch = llvm::StringSwitch<StringRef>(ArchName) - .Case("bdw", "bdw") - .Case("skl", "skl") - .Case("kbl", "kbl") - .Case("cfl", "cfl") - .Cases("apl", "bxt", "apl") - .Case("glk", "glk") - .Case("whl", "whl") - .Case("aml", "aml") - .Case("cml", "cml") - .Cases("icllp", "icl", "icllp") - .Cases("ehl", "jsl", "ehl") - .Cases("tgllp", "tgl", "tgllp") - .Case("rkl", "rkl") - .Cases("adl_s", "rpl_s", "adl_s") - .Case("adl_p", "adl_p") - .Case("adl_n", "adl_n") - .Case("dg1", "dg1") - .Cases("acm_g10", "dg2_g10", "acm_g10") - .Cases("acm_g11", "dg2_g11", "acm_g11") - .Cases("acm_g12", "dg2_g12", "acm_g12") - .Case("pvc", "pvc") - .Case("pvc_vg", "pvc_vg") - .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u") - .Case("mtl_h", "mtl_h") - .Case("arl_h", "arl_h") - .Case("bmg_g21", "bmg_g21") - .Case("lnl_m", "lnl_m") - .Default(""); - return Arch; -} - -SmallString<64> getGenDeviceMacro(StringRef DeviceName) { - SmallString<64> Macro; - StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName) - .Case("bdw", "INTEL_GPU_BDW") - .Case("skl", "INTEL_GPU_SKL") - .Case("kbl", "INTEL_GPU_KBL") - .Case("cfl", "INTEL_GPU_CFL") - .Case("apl", "INTEL_GPU_APL") - .Case("glk", "INTEL_GPU_GLK") - .Case("whl", "INTEL_GPU_WHL") - .Case("aml", "INTEL_GPU_AML") - .Case("cml", "INTEL_GPU_CML") - .Case("icllp", "INTEL_GPU_ICLLP") - .Case("ehl", "INTEL_GPU_EHL") - .Case("tgllp", "INTEL_GPU_TGLLP") - .Case("rkl", "INTEL_GPU_RKL") - .Case("adl_s", "INTEL_GPU_ADL_S") - .Case("adl_p", "INTEL_GPU_ADL_P") - .Case("adl_n", "INTEL_GPU_ADL_N") - .Case("dg1", "INTEL_GPU_DG1") - .Case("acm_g10", "INTEL_GPU_ACM_G10") - .Case("acm_g11", "INTEL_GPU_ACM_G11") - .Case("acm_g12", "INTEL_GPU_ACM_G12") - .Case("pvc", "INTEL_GPU_PVC") - .Case("pvc_vg", "INTEL_GPU_PVC_VG") - .Case("mtl_u", "INTEL_GPU_MTL_U") - .Case("mtl_h", "INTEL_GPU_MTL_H") - .Case("arl_h", "INTEL_GPU_ARL_H") - .Case("bmg_g21", "INTEL_GPU_BMG_G21") - .Case("lnl_m", "INTEL_GPU_LNL_M") - .Case("ptl_h", "INTEL_GPU_PTL_H") - .Case("ptl_u", "INTEL_GPU_PTL_U") - .Case("sm_50", "NVIDIA_GPU_SM_50") - .Case("sm_52", "NVIDIA_GPU_SM_52") - .Case("sm_53", "NVIDIA_GPU_SM_53") - .Case("sm_60", "NVIDIA_GPU_SM_60") - .Case("sm_61", "NVIDIA_GPU_SM_61") - .Case("sm_62", "NVIDIA_GPU_SM_62") - .Case("sm_70", "NVIDIA_GPU_SM_70") - .Case("sm_72", "NVIDIA_GPU_SM_72") - .Case("sm_75", "NVIDIA_GPU_SM_75") - .Case("sm_80", "NVIDIA_GPU_SM_80") - .Case("sm_86", "NVIDIA_GPU_SM_86") - .Case("sm_87", "NVIDIA_GPU_SM_87") - .Case("sm_89", "NVIDIA_GPU_SM_89") - .Case("sm_90", "NVIDIA_GPU_SM_90") - .Case("sm_90a", "NVIDIA_GPU_SM_90A") - .Case("gfx700", "AMD_GPU_GFX700") - .Case("gfx701", "AMD_GPU_GFX701") - .Case("gfx702", "AMD_GPU_GFX702") - .Case("gfx703", "AMD_GPU_GFX703") - .Case("gfx704", "AMD_GPU_GFX704") - .Case("gfx705", "AMD_GPU_GFX705") - .Case("gfx801", "AMD_GPU_GFX801") - .Case("gfx802", "AMD_GPU_GFX802") - .Case("gfx803", "AMD_GPU_GFX803") - .Case("gfx805", "AMD_GPU_GFX805") - .Case("gfx810", "AMD_GPU_GFX810") - .Case("gfx900", "AMD_GPU_GFX900") - .Case("gfx902", "AMD_GPU_GFX902") - .Case("gfx904", "AMD_GPU_GFX904") - .Case("gfx906", "AMD_GPU_GFX906") - .Case("gfx908", "AMD_GPU_GFX908") - .Case("gfx909", "AMD_GPU_GFX909") - .Case("gfx90a", "AMD_GPU_GFX90A") - .Case("gfx90c", "AMD_GPU_GFX90C") - .Case("gfx940", "AMD_GPU_GFX940") - .Case("gfx941", "AMD_GPU_GFX941") - .Case("gfx942", "AMD_GPU_GFX942") - .Case("gfx1010", "AMD_GPU_GFX1010") - .Case("gfx1011", "AMD_GPU_GFX1011") - .Case("gfx1012", "AMD_GPU_GFX1012") - .Case("gfx1013", "AMD_GPU_GFX1013") - .Case("gfx1030", "AMD_GPU_GFX1030") - .Case("gfx1031", "AMD_GPU_GFX1031") - .Case("gfx1032", "AMD_GPU_GFX1032") - .Case("gfx1033", "AMD_GPU_GFX1033") - .Case("gfx1034", "AMD_GPU_GFX1034") - .Case("gfx1035", "AMD_GPU_GFX1035") - .Case("gfx1036", "AMD_GPU_GFX1036") - .Case("gfx1100", "AMD_GPU_GFX1100") - .Case("gfx1101", "AMD_GPU_GFX1101") - .Case("gfx1102", "AMD_GPU_GFX1102") - .Case("gfx1103", "AMD_GPU_GFX1103") - .Case("gfx1150", "AMD_GPU_GFX1150") - .Case("gfx1151", "AMD_GPU_GFX1151") - .Case("gfx1200", "AMD_GPU_GFX1200") - .Case("gfx1201", "AMD_GPU_GFX1201") - .Default(""); - if (!Ext.empty()) { - Macro = "__SYCL_TARGET_"; - Macro += Ext; - Macro += "__"; - } - return Macro; -} - -} // namespace clang diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 5931a77a85fec..4f04d83c9c068 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -240,6 +240,61 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::GFX1201: case OffloadArch::AMDGCNSPIRV: case OffloadArch::Generic: + case OffloadArch::SKYLAKEAVX512: + case OffloadArch::COREAVX2: + case OffloadArch::COREI7AVX: + case OffloadArch::COREI7: + case OffloadArch::WESTMERE: + case OffloadArch::SANDYBRIDGE: + case OffloadArch::IVYBRIDGE: + case OffloadArch::BROADWELL: + case OffloadArch::COFFEELAKE: + case OffloadArch::ALDERLAKE: + case OffloadArch::SKYLAKE: + case OffloadArch::SKX: + case OffloadArch::CASCADELAKE: + case OffloadArch::ICELAKECLIENT: + case OffloadArch::ICELAKESERVER: + case OffloadArch::SAPPHIRERAPIDS: + case OffloadArch::GRANITERAPIDS: + case OffloadArch::BDW: + case OffloadArch::SKL: + case OffloadArch::KBL: + case OffloadArch::CFL: + case OffloadArch::APL: + case OffloadArch::BXT: + case OffloadArch::GLK: + case OffloadArch::WHL: + case OffloadArch::AML: + case OffloadArch::CML: + case OffloadArch::ICLLP: + case OffloadArch::ICL: + case OffloadArch::EHL: + case OffloadArch::JSL: + case OffloadArch::TGLLP: + case OffloadArch::TGL: + case OffloadArch::RKL: + case OffloadArch::ADL_S: + case OffloadArch::RPL_S: + case OffloadArch::ADL_P: + case OffloadArch::ADL_N: + case OffloadArch::DG1: + case OffloadArch::ACM_G10: + case OffloadArch::DG2_G10: + case OffloadArch::ACM_G11: + case OffloadArch::DG2_G11: + case OffloadArch::ACM_G12: + case OffloadArch::DG2_G12: + case OffloadArch::PVC: + case OffloadArch::PVC_VG: + case OffloadArch::MTL_U: + case OffloadArch::MTL_S: + case OffloadArch::ARL_U: + case OffloadArch::ARL_S: + case OffloadArch::MTL_H: + case OffloadArch::ARL_H: + case OffloadArch::BMG_G21: + case OffloadArch::LNL_M: case OffloadArch::LAST: break; case OffloadArch::UNKNOWN: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index feb2448297542..80990eeed7511 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2335,6 +2335,61 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::GFX1201: case OffloadArch::AMDGCNSPIRV: case OffloadArch::Generic: + case OffloadArch::SKYLAKEAVX512: + case OffloadArch::COREAVX2: + case OffloadArch::COREI7AVX: + case OffloadArch::COREI7: + case OffloadArch::WESTMERE: + case OffloadArch::SANDYBRIDGE: + case OffloadArch::IVYBRIDGE: + case OffloadArch::BROADWELL: + case OffloadArch::COFFEELAKE: + case OffloadArch::ALDERLAKE: + case OffloadArch::SKYLAKE: + case OffloadArch::SKX: + case OffloadArch::CASCADELAKE: + case OffloadArch::ICELAKECLIENT: + case OffloadArch::ICELAKESERVER: + case OffloadArch::SAPPHIRERAPIDS: + case OffloadArch::GRANITERAPIDS: + case OffloadArch::BDW: + case OffloadArch::SKL: + case OffloadArch::KBL: + case OffloadArch::CFL: + case OffloadArch::APL: + case OffloadArch::BXT: + case OffloadArch::GLK: + case OffloadArch::WHL: + case OffloadArch::AML: + case OffloadArch::CML: + case OffloadArch::ICLLP: + case OffloadArch::ICL: + case OffloadArch::EHL: + case OffloadArch::JSL: + case OffloadArch::TGLLP: + case OffloadArch::TGL: + case OffloadArch::RKL: + case OffloadArch::ADL_S: + case OffloadArch::RPL_S: + case OffloadArch::ADL_P: + case OffloadArch::ADL_N: + case OffloadArch::DG1: + case OffloadArch::ACM_G10: + case OffloadArch::DG2_G10: + case OffloadArch::ACM_G11: + case OffloadArch::DG2_G11: + case OffloadArch::ACM_G12: + case OffloadArch::DG2_G12: + case OffloadArch::PVC: + case OffloadArch::PVC_VG: + case OffloadArch::MTL_U: + case OffloadArch::MTL_S: + case OffloadArch::ARL_U: + case OffloadArch::ARL_S: + case OffloadArch::MTL_H: + case OffloadArch::ARL_H: + case OffloadArch::BMG_G21: + case OffloadArch::LNL_M: case OffloadArch::UNUSED: case OffloadArch::UNKNOWN: break; diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index 23c14c8f07200..20e677e6f84f1 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -14,7 +14,7 @@ // target-specific device code. //===---------------------------------------------------------------------===// -#include "clang/Basic/SYCL.h" +#include "clang/Basic/Cuda.h" #include "clang/Basic/Version.h" #include "llvm/ADT/StringExtras.h" @@ -455,10 +455,10 @@ static Error runAOTCompileIntelGPU(StringRef InputFile, const ArgList &Args) { /// SYCL AOT compilation step. static Error runAOTCompile(StringRef InputFile, const ArgList &Args) { StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); - SYCLSupportedIntelArchs OffloadArch = StringToOffloadArchSYCL(Arch); - if (IsSYCLSupportedIntelGPUArch(OffloadArch)) + OffloadArch OffloadArch = StringToOffloadArch(Arch); + if (IsIntelGPUArch(OffloadArch)) return runAOTCompileIntelGPU(InputFile, Args); - if (IsSYCLSupportedIntelCPUArch(OffloadArch)) + if (IsIntelCPUArch(OffloadArch)) return runAOTCompileIntelCPU(InputFile, Args); return createStringError(inconvertibleErrorCode(), "Unsupported arch"); >From e64a41738269886bc2b8e77a266516c897ef33cc Mon Sep 17 00:00:00 2001 From: "Cai, Justin" <justin....@intel.com> Date: Thu, 3 Apr 2025 18:17:17 +0000 Subject: [PATCH 4/5] Fix test failures --- clang/test/Driver/clang-sycl-linker-test.cpp | 32 +++++++++++-------- .../clang-sycl-linker/ClangSYCLLinker.cpp | 9 ++++-- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp b/clang/test/Driver/clang-sycl-linker-test.cpp index 4b566eb5b4a2f..2ef7afaa69aac 100644 --- a/clang/test/Driver/clang-sycl-linker-test.cpp +++ b/clang/test/Driver/clang-sycl-linker-test.cpp @@ -20,7 +20,7 @@ // // Test a simple case with a random file (not bitcode) as input. // RUN: touch %t.o -// RUN: not clang-sycl-linker -triple spirv64 %t.o -o a.spv 2>&1 \ +// RUN: not clang-sycl-linker -triple=spirv64 %t.o -o a.spv 2>&1 \ // RUN: | FileCheck %s --check-prefix=FILETYPEERROR // FILETYPEERROR: Unsupported file type // @@ -33,37 +33,41 @@ // DEVLIBSERR2: '{{.*}}lib3.bc' SYCL device library file is not found // // Test AOT compilation for an Intel GPU. -// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=pvc %t_1.bc %t_2.bc -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU -// AOT-INTEL-GPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings -// AOT-INTEL-GPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-GPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// AOT-INTEL-GPU-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv // AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]] // // Test AOT compilation for an Intel GPU with additional options. -// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=pvc %t_1.bc %t_2.bc -o a.out 2>&1 \ // RUN: --ocloc-options="-a -b" \ // RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU-2 -// AOT-INTEL-GPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings -// AOT-INTEL-GPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-GPU-2: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// AOT-INTEL-GPU-2-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv // AOT-INTEL-GPU-2-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc -a -b {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]] // // Test AOT compilation for an Intel CPU. -// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU -// AOT-INTEL-CPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings -// AOT-INTEL-CPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-CPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// AOT-INTEL-CPU-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv // AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]] // // Test AOT compilation for an Intel CPU with additional options. -// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \ // RUN: --opencl-aot-options="-a -b" \ // RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU-2 -// AOT-INTEL-CPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings -// AOT-INTEL-CPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-CPU-2: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// AOT-INTEL-CPU-2-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv // AOT-INTEL-CPU-2-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]] // // Check that the output file must be specified. // RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc 2>& 1 \ // RUN: | FileCheck %s --check-prefix=NOOUTPUT -// NOOUTPUT: Output file is not specified +// NOOUTPUT: Output file must be specified // +// Check that the target triple must be. +// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc -o a.out 2>& 1 \ +// RUN: | FileCheck %s --check-prefix=NOTARGET +// NOTARGET: Target triple must be specified diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index 20e677e6f84f1..6a9ef4c0fc42c 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -381,9 +381,9 @@ static Expected<StringRef> runSPIRVCodeGen(StringRef File, const ArgList &Args, if (Verbose) errs() << formatv("SPIR-V Backend: input: {0}, output: {1}\n", File, - OutputFile); + *OutFileOrErr); - return OutputFile; + return *OutFileOrErr; } /// Run AOT compilation for Intel CPU. @@ -531,9 +531,12 @@ int main(int argc, char **argv) { IsAOTCompileNeeded = Args.hasArg(OPT_arch_EQ); if (!Args.hasArg(OPT_o)) - reportError(createStringError("Output file is not specified")); + reportError(createStringError("Output file must be specified")); OutputFile = Args.getLastArgValue(OPT_o); + if (!Args.hasArg(OPT_triple_EQ)) + reportError(createStringError("Target triple must be specified")); + if (Args.hasArg(OPT_spirv_dump_device_code_EQ)) { Arg *A = Args.getLastArg(OPT_spirv_dump_device_code_EQ); SmallString<128> Dir(A->getValue()); >From 9002f9a743b83077eeeb0fe316eadd33f4880e96 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" <justin....@intel.com> Date: Thu, 3 Apr 2025 18:36:49 +0000 Subject: [PATCH 5/5] Add Offloading.cpp/h --- clang/include/clang/Basic/Cuda.h | 175 +--------------------- clang/include/clang/Basic/Offloading.h | 193 +++++++++++++++++++++++++ clang/lib/Basic/CMakeLists.txt | 1 + clang/lib/Basic/Cuda.cpp | 176 ---------------------- clang/lib/Basic/Offloading.cpp | 185 ++++++++++++++++++++++++ 5 files changed, 381 insertions(+), 349 deletions(-) create mode 100644 clang/include/clang/Basic/Offloading.h create mode 100644 clang/lib/Basic/Offloading.cpp diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index be8922be5167f..007cf80223dec 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -9,6 +9,8 @@ #ifndef LLVM_CLANG_BASIC_CUDA_H #define LLVM_CLANG_BASIC_CUDA_H +#include "clang/Basic/Offloading.h" + namespace llvm { class StringRef; class Twine; @@ -54,155 +56,6 @@ const char *CudaVersionToString(CudaVersion V); // Input is "Major.Minor" CudaVersion CudaStringToVersion(const llvm::Twine &S); -enum class OffloadArch { - UNUSED, - UNKNOWN, - // TODO: Deprecate and remove GPU architectures older than sm_52. - SM_20, - SM_21, - SM_30, - // This has a name conflict with sys/mac.h on AIX, rename it as a workaround. - SM_32_, - SM_35, - SM_37, - SM_50, - SM_52, - SM_53, - SM_60, - SM_61, - SM_62, - SM_70, - SM_72, - SM_75, - SM_80, - SM_86, - SM_87, - SM_89, - SM_90, - SM_90a, - SM_100, - SM_100a, - SM_101, - SM_101a, - SM_120, - SM_120a, - GFX600, - GFX601, - GFX602, - GFX700, - GFX701, - GFX702, - GFX703, - GFX704, - GFX705, - GFX801, - GFX802, - GFX803, - GFX805, - GFX810, - GFX9_GENERIC, - GFX900, - GFX902, - GFX904, - GFX906, - GFX908, - GFX909, - GFX90a, - GFX90c, - GFX9_4_GENERIC, - GFX942, - GFX950, - GFX10_1_GENERIC, - GFX1010, - GFX1011, - GFX1012, - GFX1013, - GFX10_3_GENERIC, - GFX1030, - GFX1031, - GFX1032, - GFX1033, - GFX1034, - GFX1035, - GFX1036, - GFX11_GENERIC, - GFX1100, - GFX1101, - GFX1102, - GFX1103, - GFX1150, - GFX1151, - GFX1152, - GFX1153, - GFX12_GENERIC, - GFX1200, - GFX1201, - AMDGCNSPIRV, - Generic, // A processor model named 'generic' if the target backend defines a - // public one. - // Intel CPUs - SKYLAKEAVX512, - COREAVX2, - COREI7AVX, - COREI7, - WESTMERE, - SANDYBRIDGE, - IVYBRIDGE, - BROADWELL, - COFFEELAKE, - ALDERLAKE, - SKYLAKE, - SKX, - CASCADELAKE, - ICELAKECLIENT, - ICELAKESERVER, - SAPPHIRERAPIDS, - GRANITERAPIDS, - // Intel GPUs - BDW, - SKL, - KBL, - CFL, - APL, - BXT, - GLK, - WHL, - AML, - CML, - ICLLP, - ICL, - EHL, - JSL, - TGLLP, - TGL, - RKL, - ADL_S, - RPL_S, - ADL_P, - ADL_N, - DG1, - ACM_G10, - DG2_G10, - ACM_G11, - DG2_G11, - ACM_G12, - DG2_G12, - PVC, - PVC_VG, - MTL_U, - MTL_S, - ARL_U, - ARL_S, - MTL_H, - ARL_H, - BMG_G21, - LNL_M, - LAST, - - CudaDefault = OffloadArch::SM_52, - HIPDefault = OffloadArch::GFX906, -}; - enum class CUDAFunctionTarget { Device, Global, @@ -211,30 +64,6 @@ enum class CUDAFunctionTarget { InvalidTarget }; -static inline bool IsNVIDIAOffloadArch(OffloadArch A) { - return A >= OffloadArch::SM_20 && A < OffloadArch::GFX600; -} - -static inline bool IsAMDOffloadArch(OffloadArch A) { - // Generic processor model is for testing only. - return A >= OffloadArch::GFX600 && A < OffloadArch::Generic; -} - -static inline bool IsIntelCPUArch(OffloadArch Arch) { - return Arch >= OffloadArch::SKYLAKEAVX512 && - Arch <= OffloadArch::GRANITERAPIDS; -} - -static inline bool IsIntelGPUArch(OffloadArch Arch) { - return Arch >= OffloadArch::BDW && Arch <= OffloadArch::LNL_M; -} - -const char *OffloadArchToString(OffloadArch A); -const char *OffloadArchToVirtualArchString(OffloadArch A); - -// The input should have the form "sm_20". -OffloadArch StringToOffloadArch(llvm::StringRef S); - /// Get the earliest CudaVersion that supports the given OffloadArch. CudaVersion MinVersionForOffloadArch(OffloadArch A); diff --git a/clang/include/clang/Basic/Offloading.h b/clang/include/clang/Basic/Offloading.h new file mode 100644 index 0000000000000..2b49973775991 --- /dev/null +++ b/clang/include/clang/Basic/Offloading.h @@ -0,0 +1,193 @@ +//===--- Offloading.h - Utilities for offloading ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_OFFLOADING_H +#define LLVM_CLANG_BASIC_OFFLOADING_H + +namespace llvm { +class StringRef; +} // namespace llvm + +namespace clang { + +enum class OffloadArch { + UNUSED, + UNKNOWN, + // TODO: Deprecate and remove GPU architectures older than sm_52. + SM_20, + SM_21, + SM_30, + // This has a name conflict with sys/mac.h on AIX, rename it as a workaround. + SM_32_, + SM_35, + SM_37, + SM_50, + SM_52, + SM_53, + SM_60, + SM_61, + SM_62, + SM_70, + SM_72, + SM_75, + SM_80, + SM_86, + SM_87, + SM_89, + SM_90, + SM_90a, + SM_100, + SM_100a, + SM_101, + SM_101a, + SM_120, + SM_120a, + GFX600, + GFX601, + GFX602, + GFX700, + GFX701, + GFX702, + GFX703, + GFX704, + GFX705, + GFX801, + GFX802, + GFX803, + GFX805, + GFX810, + GFX9_GENERIC, + GFX900, + GFX902, + GFX904, + GFX906, + GFX908, + GFX909, + GFX90a, + GFX90c, + GFX9_4_GENERIC, + GFX942, + GFX950, + GFX10_1_GENERIC, + GFX1010, + GFX1011, + GFX1012, + GFX1013, + GFX10_3_GENERIC, + GFX1030, + GFX1031, + GFX1032, + GFX1033, + GFX1034, + GFX1035, + GFX1036, + GFX11_GENERIC, + GFX1100, + GFX1101, + GFX1102, + GFX1103, + GFX1150, + GFX1151, + GFX1152, + GFX1153, + GFX12_GENERIC, + GFX1200, + GFX1201, + AMDGCNSPIRV, + Generic, // A processor model named 'generic' if the target backend defines a + // public one. + // Intel CPUs + SKYLAKEAVX512, + COREAVX2, + COREI7AVX, + COREI7, + WESTMERE, + SANDYBRIDGE, + IVYBRIDGE, + BROADWELL, + COFFEELAKE, + ALDERLAKE, + SKYLAKE, + SKX, + CASCADELAKE, + ICELAKECLIENT, + ICELAKESERVER, + SAPPHIRERAPIDS, + GRANITERAPIDS, + // Intel GPUs + BDW, + SKL, + KBL, + CFL, + APL, + BXT, + GLK, + WHL, + AML, + CML, + ICLLP, + ICL, + EHL, + JSL, + TGLLP, + TGL, + RKL, + ADL_S, + RPL_S, + ADL_P, + ADL_N, + DG1, + ACM_G10, + DG2_G10, + ACM_G11, + DG2_G11, + ACM_G12, + DG2_G12, + PVC, + PVC_VG, + MTL_U, + MTL_S, + ARL_U, + ARL_S, + MTL_H, + ARL_H, + BMG_G21, + LNL_M, + LAST, + + CudaDefault = OffloadArch::SM_52, + HIPDefault = OffloadArch::GFX906, +}; + +static inline bool IsNVIDIAOffloadArch(OffloadArch A) { + return A >= OffloadArch::SM_20 && A < OffloadArch::GFX600; +} + +static inline bool IsAMDOffloadArch(OffloadArch A) { + // Generic processor model is for testing only. + return A >= OffloadArch::GFX600 && A < OffloadArch::Generic; +} + +static inline bool IsIntelCPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::SKYLAKEAVX512 && + Arch <= OffloadArch::GRANITERAPIDS; +} + +static inline bool IsIntelGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::BDW && Arch <= OffloadArch::LNL_M; +} + +const char *OffloadArchToString(OffloadArch A); +const char *OffloadArchToVirtualArchString(OffloadArch A); + +// The input should have the form "sm_20". +OffloadArch StringToOffloadArch(llvm::StringRef S); + +} // namespace clang + +#endif // LLVM_CLANG_BASIC_OFFLOADING_H diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 331dfbb3f4b67..5c91dc43df9b6 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -76,6 +76,7 @@ add_clang_library(clangBasic MakeSupport.cpp Module.cpp ObjCRuntime.cpp + Offloading.cpp OpenCLOptions.cpp OpenMPKinds.cpp OperatorPrecedence.cpp diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index f1015c47f314f..8ea242911a2ba 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -73,182 +73,6 @@ CudaVersion ToCudaVersion(llvm::VersionTuple Version) { return CudaVersion::UNKNOWN; } -namespace { -struct OffloadArchToStringMap { - OffloadArch arch; - const char *arch_name; - const char *virtual_arch_name; -}; -} // namespace - -#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} -#define SM(sm) SM2(sm, "compute_" #sm) -#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} -#define INTEL(name, value) {OffloadArch::value, #name, ""} -static const OffloadArchToStringMap arch_names[] = { - // clang-format off - {OffloadArch::UNUSED, "", ""}, - SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi - SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler - SM(50), SM(52), SM(53), // Maxwell - SM(60), SM(61), SM(62), // Pascal - SM(70), SM(72), // Volta - SM(75), // Turing - SM(80), SM(86), // Ampere - SM(87), // Jetson/Drive AGX Orin - SM(89), // Ada Lovelace - SM(90), // Hopper - SM(90a), // Hopper - SM(100), // Blackwell - SM(100a), // Blackwell - SM(101), // Blackwell - SM(101a), // Blackwell - SM(120), // Blackwell - SM(120a), // Blackwell - GFX(600), // gfx600 - GFX(601), // gfx601 - GFX(602), // gfx602 - GFX(700), // gfx700 - GFX(701), // gfx701 - GFX(702), // gfx702 - GFX(703), // gfx703 - GFX(704), // gfx704 - GFX(705), // gfx705 - GFX(801), // gfx801 - GFX(802), // gfx802 - GFX(803), // gfx803 - GFX(805), // gfx805 - GFX(810), // gfx810 - {OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"}, - GFX(900), // gfx900 - GFX(902), // gfx902 - GFX(904), // gfx903 - GFX(906), // gfx906 - GFX(908), // gfx908 - GFX(909), // gfx909 - GFX(90a), // gfx90a - GFX(90c), // gfx90c - {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"}, - GFX(942), // gfx942 - GFX(950), // gfx950 - {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, - GFX(1010), // gfx1010 - GFX(1011), // gfx1011 - GFX(1012), // gfx1012 - GFX(1013), // gfx1013 - {OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"}, - GFX(1030), // gfx1030 - GFX(1031), // gfx1031 - GFX(1032), // gfx1032 - GFX(1033), // gfx1033 - GFX(1034), // gfx1034 - GFX(1035), // gfx1035 - GFX(1036), // gfx1036 - {OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"}, - GFX(1100), // gfx1100 - GFX(1101), // gfx1101 - GFX(1102), // gfx1102 - GFX(1103), // gfx1103 - GFX(1150), // gfx1150 - GFX(1151), // gfx1151 - GFX(1152), // gfx1152 - GFX(1153), // gfx1153 - {OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"}, - GFX(1200), // gfx1200 - GFX(1201), // gfx1201 - {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, - // Intel CPUs - INTEL(skylake-avx512, SKYLAKEAVX512), - INTEL(core-avx2, COREAVX2), - INTEL(corei7-avx, COREI7AVX), - INTEL(corei7, COREI7), - INTEL(westmere, WESTMERE), - INTEL(sandybridge, SANDYBRIDGE), - INTEL(ivybridge, IVYBRIDGE), - INTEL(broadwell, BROADWELL), - INTEL(coffeelake, COFFEELAKE), - INTEL(alderlake, ALDERLAKE), - INTEL(skylake, SKYLAKE), - INTEL(skx, SKX), - INTEL(cascadelake, CASCADELAKE), - INTEL(icelake-client, ICELAKECLIENT), - INTEL(icelakeserver, ICELAKESERVER), - INTEL(sapphirerapids, SAPPHIRERAPIDS), - INTEL(graniterapids, GRANITERAPIDS), - // Intel GPUs - INTEL(bdw, BDW), - INTEL(skl, SKL), - INTEL(kbl, KBL), - INTEL(cfl, CFL), - INTEL(apl, APL), - INTEL(bxt, BXT), - INTEL(glk, GLK), - INTEL(whl, WHL), - INTEL(aml, AML), - INTEL(cml, CML), - INTEL(icllp, ICLLP), - INTEL(icl, ICL), - INTEL(ehl, EHL), - INTEL(jsl, JSL), - INTEL(tgllp, TGLLP), - INTEL(tgl, TGL), - INTEL(rkl, RKL), - INTEL(adl_s, ADL_S), - INTEL(rpl_s, RPL_S), - INTEL(adl_p, ADL_P), - INTEL(adl_n, ADL_N), - INTEL(dg1, DG1), - INTEL(acm_g10, ACM_G10), - INTEL(dg2_g10, DG2_G10), - INTEL(acm_g11, ACM_G11), - INTEL(dg2_g11, DG2_G11), - INTEL(acm_g12, ACM_G12), - INTEL(dg2_g12, DG2_G12), - INTEL(pvc, PVC), - INTEL(pvc_vg, PVC_VG), - INTEL(mtl_u, MTL_U), - INTEL(mtl_s, MTL_S), - INTEL(arl_u, ARL_U), - INTEL(arl_s, ARL_S), - INTEL(mtl_h, MTL_H), - INTEL(arl_h, ARL_H), - INTEL(bmg_g21, BMG_G21), - INTEL(lnl_m, LNL_M), - {OffloadArch::Generic, "generic", ""}, - // clang-format on -}; -#undef SM -#undef SM2 -#undef GFX -#undef INTEL - -const char *OffloadArchToString(OffloadArch A) { - auto result = std::find_if( - std::begin(arch_names), std::end(arch_names), - [A](const OffloadArchToStringMap &map) { return A == map.arch; }); - if (result == std::end(arch_names)) - return "unknown"; - return result->arch_name; -} - -const char *OffloadArchToVirtualArchString(OffloadArch A) { - auto result = std::find_if( - std::begin(arch_names), std::end(arch_names), - [A](const OffloadArchToStringMap &map) { return A == map.arch; }); - if (result == std::end(arch_names)) - return "unknown"; - return result->virtual_arch_name; -} - -OffloadArch StringToOffloadArch(llvm::StringRef S) { - auto result = std::find_if( - std::begin(arch_names), std::end(arch_names), - [S](const OffloadArchToStringMap &map) { return S == map.arch_name; }); - if (result == std::end(arch_names)) - return OffloadArch::UNKNOWN; - return result->arch; -} - CudaVersion MinVersionForOffloadArch(OffloadArch A) { if (A == OffloadArch::UNKNOWN) return CudaVersion::UNKNOWN; diff --git a/clang/lib/Basic/Offloading.cpp b/clang/lib/Basic/Offloading.cpp new file mode 100644 index 0000000000000..63313a6777dd8 --- /dev/null +++ b/clang/lib/Basic/Offloading.cpp @@ -0,0 +1,185 @@ +#include "clang/Basic/Offloading.h" + +#include "llvm/ADT/StringRef.h" + +#include <algorithm> + +namespace clang { + +namespace { +struct OffloadArchToStringMap { + OffloadArch arch; + const char *arch_name; + const char *virtual_arch_name; +}; +} // namespace + +#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} +#define SM(sm) SM2(sm, "compute_" #sm) +#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} +#define INTEL(name, value) {OffloadArch::value, #name, ""} +static const OffloadArchToStringMap arch_names[] = { + // clang-format off + {OffloadArch::UNUSED, "", ""}, + SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi + SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler + SM(50), SM(52), SM(53), // Maxwell + SM(60), SM(61), SM(62), // Pascal + SM(70), SM(72), // Volta + SM(75), // Turing + SM(80), SM(86), // Ampere + SM(87), // Jetson/Drive AGX Orin + SM(89), // Ada Lovelace + SM(90), // Hopper + SM(90a), // Hopper + SM(100), // Blackwell + SM(100a), // Blackwell + SM(101), // Blackwell + SM(101a), // Blackwell + SM(120), // Blackwell + SM(120a), // Blackwell + GFX(600), // gfx600 + GFX(601), // gfx601 + GFX(602), // gfx602 + GFX(700), // gfx700 + GFX(701), // gfx701 + GFX(702), // gfx702 + GFX(703), // gfx703 + GFX(704), // gfx704 + GFX(705), // gfx705 + GFX(801), // gfx801 + GFX(802), // gfx802 + GFX(803), // gfx803 + GFX(805), // gfx805 + GFX(810), // gfx810 + {OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"}, + GFX(900), // gfx900 + GFX(902), // gfx902 + GFX(904), // gfx903 + GFX(906), // gfx906 + GFX(908), // gfx908 + GFX(909), // gfx909 + GFX(90a), // gfx90a + GFX(90c), // gfx90c + {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"}, + GFX(942), // gfx942 + GFX(950), // gfx950 + {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, + GFX(1010), // gfx1010 + GFX(1011), // gfx1011 + GFX(1012), // gfx1012 + GFX(1013), // gfx1013 + {OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"}, + GFX(1030), // gfx1030 + GFX(1031), // gfx1031 + GFX(1032), // gfx1032 + GFX(1033), // gfx1033 + GFX(1034), // gfx1034 + GFX(1035), // gfx1035 + GFX(1036), // gfx1036 + {OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"}, + GFX(1100), // gfx1100 + GFX(1101), // gfx1101 + GFX(1102), // gfx1102 + GFX(1103), // gfx1103 + GFX(1150), // gfx1150 + GFX(1151), // gfx1151 + GFX(1152), // gfx1152 + GFX(1153), // gfx1153 + {OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"}, + GFX(1200), // gfx1200 + GFX(1201), // gfx1201 + {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, + // Intel CPUs + INTEL(skylake-avx512, SKYLAKEAVX512), + INTEL(core-avx2, COREAVX2), + INTEL(corei7-avx, COREI7AVX), + INTEL(corei7, COREI7), + INTEL(westmere, WESTMERE), + INTEL(sandybridge, SANDYBRIDGE), + INTEL(ivybridge, IVYBRIDGE), + INTEL(broadwell, BROADWELL), + INTEL(coffeelake, COFFEELAKE), + INTEL(alderlake, ALDERLAKE), + INTEL(skylake, SKYLAKE), + INTEL(skx, SKX), + INTEL(cascadelake, CASCADELAKE), + INTEL(icelake-client, ICELAKECLIENT), + INTEL(icelakeserver, ICELAKESERVER), + INTEL(sapphirerapids, SAPPHIRERAPIDS), + INTEL(graniterapids, GRANITERAPIDS), + // Intel GPUs + INTEL(bdw, BDW), + INTEL(skl, SKL), + INTEL(kbl, KBL), + INTEL(cfl, CFL), + INTEL(apl, APL), + INTEL(bxt, BXT), + INTEL(glk, GLK), + INTEL(whl, WHL), + INTEL(aml, AML), + INTEL(cml, CML), + INTEL(icllp, ICLLP), + INTEL(icl, ICL), + INTEL(ehl, EHL), + INTEL(jsl, JSL), + INTEL(tgllp, TGLLP), + INTEL(tgl, TGL), + INTEL(rkl, RKL), + INTEL(adl_s, ADL_S), + INTEL(rpl_s, RPL_S), + INTEL(adl_p, ADL_P), + INTEL(adl_n, ADL_N), + INTEL(dg1, DG1), + INTEL(acm_g10, ACM_G10), + INTEL(dg2_g10, DG2_G10), + INTEL(acm_g11, ACM_G11), + INTEL(dg2_g11, DG2_G11), + INTEL(acm_g12, ACM_G12), + INTEL(dg2_g12, DG2_G12), + INTEL(pvc, PVC), + INTEL(pvc_vg, PVC_VG), + INTEL(mtl_u, MTL_U), + INTEL(mtl_s, MTL_S), + INTEL(arl_u, ARL_U), + INTEL(arl_s, ARL_S), + INTEL(mtl_h, MTL_H), + INTEL(arl_h, ARL_H), + INTEL(bmg_g21, BMG_G21), + INTEL(lnl_m, LNL_M), + {OffloadArch::Generic, "generic", ""}, + // clang-format on +}; +#undef SM +#undef SM2 +#undef GFX +#undef INTEL + +const char *OffloadArchToString(OffloadArch A) { + auto result = std::find_if( + std::begin(arch_names), std::end(arch_names), + [A](const OffloadArchToStringMap &map) { return A == map.arch; }); + if (result == std::end(arch_names)) + return "unknown"; + return result->arch_name; +} + +const char *OffloadArchToVirtualArchString(OffloadArch A) { + auto result = std::find_if( + std::begin(arch_names), std::end(arch_names), + [A](const OffloadArchToStringMap &map) { return A == map.arch; }); + if (result == std::end(arch_names)) + return "unknown"; + return result->virtual_arch_name; +} + +OffloadArch StringToOffloadArch(llvm::StringRef S) { + auto result = std::find_if( + std::begin(arch_names), std::end(arch_names), + [S](const OffloadArchToStringMap &map) { return S == map.arch_name; }); + if (result == std::end(arch_names)) + return OffloadArch::UNKNOWN; + return result->arch; +} + +} // namespace clang _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits