https://github.com/jzc updated https://github.com/llvm/llvm-project/pull/133194
>From 8f22fbe1f6272beec61e62bfae72832d75b4f25b Mon Sep 17 00:00:00 2001 From: "Cai, Justin" <justin....@intel.com> Date: Fri, 14 Feb 2025 21:16:27 +0000 Subject: [PATCH 1/3] [SYCL] Add support AOT compilation support for Intel GPUs in clang-sycl-linker --- clang/include/clang/Basic/SYCL.h | 131 ++++++++++ clang/lib/Basic/CMakeLists.txt | 1 + clang/lib/Basic/SYCL.cpp | 226 ++++++++++++++++++ clang/test/Driver/clang-sycl-linker-test.cpp | 36 +++ .../clang-sycl-linker/ClangSYCLLinker.cpp | 112 ++++++++- clang/tools/clang-sycl-linker/SYCLLinkOpts.td | 8 + 6 files changed, 506 insertions(+), 8 deletions(-) create mode 100644 clang/include/clang/Basic/SYCL.h create mode 100644 clang/lib/Basic/SYCL.cpp diff --git a/clang/include/clang/Basic/SYCL.h b/clang/include/clang/Basic/SYCL.h new file mode 100644 index 0000000000000..c7cad37639b91 --- /dev/null +++ b/clang/include/clang/Basic/SYCL.h @@ -0,0 +1,131 @@ +//===--- SYCL.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_SYCL_H +#define LLVM_CLANG_BASIC_SYCL_H + +#include "clang/Basic/Cuda.h" + +namespace llvm { +class StringRef; +template <unsigned InternalLen> class SmallString; +} // namespace llvm + +namespace clang { +// List of architectures (Intel CPUs and Intel GPUs) +// that support SYCL offloading. +enum class SYCLSupportedIntelArchs { + // Intel CPUs + UNKNOWN, + SKYLAKEAVX512, + COREAVX2, + COREI7AVX, + COREI7, + WESTMERE, + SANDYBRIDGE, + IVYBRIDGE, + BROADWELL, + COFFEELAKE, + ALDERLAKE, + SKYLAKE, + SKX, + CASCADELAKE, + ICELAKECLIENT, + ICELAKESERVER, + SAPPHIRERAPIDS, + GRANITERAPIDS, + // Intel GPUs + BDW, + SKL, + KBL, + CFL, + APL, + BXT, + GLK, + WHL, + AML, + CML, + ICLLP, + ICL, + EHL, + JSL, + TGLLP, + TGL, + RKL, + ADL_S, + RPL_S, + ADL_P, + ADL_N, + DG1, + ACM_G10, + DG2_G10, + ACM_G11, + DG2_G11, + ACM_G12, + DG2_G12, + PVC, + PVC_VG, + MTL_U, + MTL_S, + ARL_U, + ARL_S, + MTL_H, + ARL_H, + BMG_G21, + LNL_M, +}; + +// Check if the given Arch value is a Generic AMD GPU. +// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading. +// This list is used to filter out GFX*_GENERIC AMD GPUs in +// `IsSYCLSupportedAMDGPUArch`. +static inline bool IsAMDGenericGPUArch(OffloadArch Arch) { + return Arch == OffloadArch::GFX9_GENERIC || + Arch == OffloadArch::GFX10_1_GENERIC || + Arch == OffloadArch::GFX10_3_GENERIC || + Arch == OffloadArch::GFX11_GENERIC || + Arch == OffloadArch::GFX12_GENERIC; +} + +// Check if the given Arch value is a valid SYCL supported AMD GPU. +static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV && + !IsAMDGenericGPUArch(Arch); +} + +// Check if the given Arch value is a valid SYCL supported NVidia GPU. +static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a; +} + +// Check if the given Arch value is a valid SYCL supported Intel CPU. +static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) { + return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 && + Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS; +} + +// Check if the given Arch value is a valid SYCL supported Intel GPU. +static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) { + return Arch >= SYCLSupportedIntelArchs::BDW && + Arch <= SYCLSupportedIntelArchs::LNL_M; +} + +// Check if the user provided value for --offload-arch is a valid +// SYCL supported Intel AOT target. +SYCLSupportedIntelArchs +StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString); + +// This is a mapping between the user provided --offload-arch value for Intel +// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU +// AOT compiler). +llvm::StringRef mapIntelGPUArchName(llvm::StringRef ArchName); +llvm::SmallString<64> getGenDeviceMacro(llvm::StringRef DeviceName); + +} // namespace clang + +#endif // LLVM_CLANG_BASIC_SYCL_H diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 331dfbb3f4b67..be6d915e01b0a 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -90,6 +90,7 @@ add_clang_library(clangBasic SourceMgrAdapter.cpp Stack.cpp StackExhaustionHandler.cpp + SYCL.cpp TargetID.cpp TargetInfo.cpp Targets.cpp diff --git a/clang/lib/Basic/SYCL.cpp b/clang/lib/Basic/SYCL.cpp new file mode 100644 index 0000000000000..9ac5470cdbe5a --- /dev/null +++ b/clang/lib/Basic/SYCL.cpp @@ -0,0 +1,226 @@ +#include "clang/Basic/SYCL.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace llvm; + +namespace clang { + +// Struct that relates an AOT target value with +// Intel CPUs and Intel GPUs. +struct StringToOffloadArchSYCLMap { + const char *ArchName; + SYCLSupportedIntelArchs IntelArch; +}; + +// Mapping of supported SYCL offloading architectures. +static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = { + // Intel CPU mapping. + {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512}, + {"core-avx2", SYCLSupportedIntelArchs::COREAVX2}, + {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX}, + {"corei7", SYCLSupportedIntelArchs::COREI7}, + {"westmere", SYCLSupportedIntelArchs::WESTMERE}, + {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE}, + {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE}, + {"broadwell", SYCLSupportedIntelArchs::BROADWELL}, + {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE}, + {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE}, + {"skylake", SYCLSupportedIntelArchs::SKYLAKE}, + {"skx", SYCLSupportedIntelArchs::SKX}, + {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE}, + {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT}, + {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER}, + {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS}, + {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS}, + // Intel GPU mapping. + {"bdw", SYCLSupportedIntelArchs::BDW}, + {"skl", SYCLSupportedIntelArchs::SKL}, + {"kbl", SYCLSupportedIntelArchs::KBL}, + {"cfl", SYCLSupportedIntelArchs::CFL}, + {"apl", SYCLSupportedIntelArchs::APL}, + {"bxt", SYCLSupportedIntelArchs::BXT}, + {"glk", SYCLSupportedIntelArchs::GLK}, + {"whl", SYCLSupportedIntelArchs::WHL}, + {"aml", SYCLSupportedIntelArchs::AML}, + {"cml", SYCLSupportedIntelArchs::CML}, + {"icllp", SYCLSupportedIntelArchs::ICLLP}, + {"icl", SYCLSupportedIntelArchs::ICL}, + {"ehl", SYCLSupportedIntelArchs::EHL}, + {"jsl", SYCLSupportedIntelArchs::JSL}, + {"tgllp", SYCLSupportedIntelArchs::TGLLP}, + {"tgl", SYCLSupportedIntelArchs::TGL}, + {"rkl", SYCLSupportedIntelArchs::RKL}, + {"adl_s", SYCLSupportedIntelArchs::ADL_S}, + {"rpl_s", SYCLSupportedIntelArchs::RPL_S}, + {"adl_p", SYCLSupportedIntelArchs::ADL_P}, + {"adl_n", SYCLSupportedIntelArchs::ADL_N}, + {"dg1", SYCLSupportedIntelArchs::DG1}, + {"acm_g10", SYCLSupportedIntelArchs::ACM_G10}, + {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, + {"acm_g11", SYCLSupportedIntelArchs::ACM_G11}, + {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, + {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11}, + {"acm_g12", SYCLSupportedIntelArchs::ACM_G12}, + {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12}, + {"pvc", SYCLSupportedIntelArchs::PVC}, + {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG}, + {"mtl_u", SYCLSupportedIntelArchs::MTL_U}, + {"mtl_s", SYCLSupportedIntelArchs::MTL_S}, + {"arl_u", SYCLSupportedIntelArchs::ARL_U}, + {"arl_s", SYCLSupportedIntelArchs::ARL_S}, + {"mtl_h", SYCLSupportedIntelArchs::MTL_H}, + {"arl_h", SYCLSupportedIntelArchs::ARL_H}, + {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21}, + {"lnl_m", SYCLSupportedIntelArchs::LNL_M}}; + +// Check if the user provided value for --offload-arch is a valid +// SYCL supported Intel AOT target. +SYCLSupportedIntelArchs StringToOffloadArchSYCL(StringRef ArchNameAsString) { + auto result = + llvm::find_if(StringToArchNamesMap, + [ArchNameAsString](const StringToOffloadArchSYCLMap &map) { + return ArchNameAsString == map.ArchName; + }); + if (result == std::end(StringToArchNamesMap)) + return SYCLSupportedIntelArchs::UNKNOWN; + return result->IntelArch; +} + +// This is a mapping between the user provided --offload-arch value for Intel +// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU +// AOT compiler). +StringRef mapIntelGPUArchName(StringRef ArchName) { + StringRef Arch; + Arch = llvm::StringSwitch<StringRef>(ArchName) + .Case("bdw", "bdw") + .Case("skl", "skl") + .Case("kbl", "kbl") + .Case("cfl", "cfl") + .Cases("apl", "bxt", "apl") + .Case("glk", "glk") + .Case("whl", "whl") + .Case("aml", "aml") + .Case("cml", "cml") + .Cases("icllp", "icl", "icllp") + .Cases("ehl", "jsl", "ehl") + .Cases("tgllp", "tgl", "tgllp") + .Case("rkl", "rkl") + .Cases("adl_s", "rpl_s", "adl_s") + .Case("adl_p", "adl_p") + .Case("adl_n", "adl_n") + .Case("dg1", "dg1") + .Cases("acm_g10", "dg2_g10", "acm_g10") + .Cases("acm_g11", "dg2_g11", "acm_g11") + .Cases("acm_g12", "dg2_g12", "acm_g12") + .Case("pvc", "pvc") + .Case("pvc_vg", "pvc_vg") + .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u") + .Case("mtl_h", "mtl_h") + .Case("arl_h", "arl_h") + .Case("bmg_g21", "bmg_g21") + .Case("lnl_m", "lnl_m") + .Default(""); + return Arch; +} + +SmallString<64> getGenDeviceMacro(StringRef DeviceName) { + SmallString<64> Macro; + StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName) + .Case("bdw", "INTEL_GPU_BDW") + .Case("skl", "INTEL_GPU_SKL") + .Case("kbl", "INTEL_GPU_KBL") + .Case("cfl", "INTEL_GPU_CFL") + .Case("apl", "INTEL_GPU_APL") + .Case("glk", "INTEL_GPU_GLK") + .Case("whl", "INTEL_GPU_WHL") + .Case("aml", "INTEL_GPU_AML") + .Case("cml", "INTEL_GPU_CML") + .Case("icllp", "INTEL_GPU_ICLLP") + .Case("ehl", "INTEL_GPU_EHL") + .Case("tgllp", "INTEL_GPU_TGLLP") + .Case("rkl", "INTEL_GPU_RKL") + .Case("adl_s", "INTEL_GPU_ADL_S") + .Case("adl_p", "INTEL_GPU_ADL_P") + .Case("adl_n", "INTEL_GPU_ADL_N") + .Case("dg1", "INTEL_GPU_DG1") + .Case("acm_g10", "INTEL_GPU_ACM_G10") + .Case("acm_g11", "INTEL_GPU_ACM_G11") + .Case("acm_g12", "INTEL_GPU_ACM_G12") + .Case("pvc", "INTEL_GPU_PVC") + .Case("pvc_vg", "INTEL_GPU_PVC_VG") + .Case("mtl_u", "INTEL_GPU_MTL_U") + .Case("mtl_h", "INTEL_GPU_MTL_H") + .Case("arl_h", "INTEL_GPU_ARL_H") + .Case("bmg_g21", "INTEL_GPU_BMG_G21") + .Case("lnl_m", "INTEL_GPU_LNL_M") + .Case("ptl_h", "INTEL_GPU_PTL_H") + .Case("ptl_u", "INTEL_GPU_PTL_U") + .Case("sm_50", "NVIDIA_GPU_SM_50") + .Case("sm_52", "NVIDIA_GPU_SM_52") + .Case("sm_53", "NVIDIA_GPU_SM_53") + .Case("sm_60", "NVIDIA_GPU_SM_60") + .Case("sm_61", "NVIDIA_GPU_SM_61") + .Case("sm_62", "NVIDIA_GPU_SM_62") + .Case("sm_70", "NVIDIA_GPU_SM_70") + .Case("sm_72", "NVIDIA_GPU_SM_72") + .Case("sm_75", "NVIDIA_GPU_SM_75") + .Case("sm_80", "NVIDIA_GPU_SM_80") + .Case("sm_86", "NVIDIA_GPU_SM_86") + .Case("sm_87", "NVIDIA_GPU_SM_87") + .Case("sm_89", "NVIDIA_GPU_SM_89") + .Case("sm_90", "NVIDIA_GPU_SM_90") + .Case("sm_90a", "NVIDIA_GPU_SM_90A") + .Case("gfx700", "AMD_GPU_GFX700") + .Case("gfx701", "AMD_GPU_GFX701") + .Case("gfx702", "AMD_GPU_GFX702") + .Case("gfx703", "AMD_GPU_GFX703") + .Case("gfx704", "AMD_GPU_GFX704") + .Case("gfx705", "AMD_GPU_GFX705") + .Case("gfx801", "AMD_GPU_GFX801") + .Case("gfx802", "AMD_GPU_GFX802") + .Case("gfx803", "AMD_GPU_GFX803") + .Case("gfx805", "AMD_GPU_GFX805") + .Case("gfx810", "AMD_GPU_GFX810") + .Case("gfx900", "AMD_GPU_GFX900") + .Case("gfx902", "AMD_GPU_GFX902") + .Case("gfx904", "AMD_GPU_GFX904") + .Case("gfx906", "AMD_GPU_GFX906") + .Case("gfx908", "AMD_GPU_GFX908") + .Case("gfx909", "AMD_GPU_GFX909") + .Case("gfx90a", "AMD_GPU_GFX90A") + .Case("gfx90c", "AMD_GPU_GFX90C") + .Case("gfx940", "AMD_GPU_GFX940") + .Case("gfx941", "AMD_GPU_GFX941") + .Case("gfx942", "AMD_GPU_GFX942") + .Case("gfx1010", "AMD_GPU_GFX1010") + .Case("gfx1011", "AMD_GPU_GFX1011") + .Case("gfx1012", "AMD_GPU_GFX1012") + .Case("gfx1013", "AMD_GPU_GFX1013") + .Case("gfx1030", "AMD_GPU_GFX1030") + .Case("gfx1031", "AMD_GPU_GFX1031") + .Case("gfx1032", "AMD_GPU_GFX1032") + .Case("gfx1033", "AMD_GPU_GFX1033") + .Case("gfx1034", "AMD_GPU_GFX1034") + .Case("gfx1035", "AMD_GPU_GFX1035") + .Case("gfx1036", "AMD_GPU_GFX1036") + .Case("gfx1100", "AMD_GPU_GFX1100") + .Case("gfx1101", "AMD_GPU_GFX1101") + .Case("gfx1102", "AMD_GPU_GFX1102") + .Case("gfx1103", "AMD_GPU_GFX1103") + .Case("gfx1150", "AMD_GPU_GFX1150") + .Case("gfx1151", "AMD_GPU_GFX1151") + .Case("gfx1200", "AMD_GPU_GFX1200") + .Case("gfx1201", "AMD_GPU_GFX1201") + .Default(""); + if (!Ext.empty()) { + Macro = "__SYCL_TARGET_"; + Macro += Ext; + Macro += "__"; + } + return Macro; +} + +} // namespace clang diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp b/clang/test/Driver/clang-sycl-linker-test.cpp index f358900b4fbd8..07850dc41b4fc 100644 --- a/clang/test/Driver/clang-sycl-linker-test.cpp +++ b/clang/test/Driver/clang-sycl-linker-test.cpp @@ -46,3 +46,39 @@ // RUN: clang-sycl-linker --dry-run -triple spirv64 %t_1.bc %t_2.bc -o a.spv 2>&1 \ // RUN: | FileCheck %s --check-prefix=LLVMOPTSLIN // LLVMOPTSLIN: -spirv-debug-info-version=nonsemantic-shader-200 -spirv-allow-unknown-intrinsics=llvm.genx. -spirv-ext= +// +// Test AOT compilation for an Intel GPU. +// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU +// AOT-INTEL-GPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings +// AOT-INTEL-GPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]] +// +// Test AOT compilation for an Intel GPU with additional options. +// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: --ocloc-options="-a -b" \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU-2 +// AOT-INTEL-GPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings +// AOT-INTEL-GPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-GPU-2-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc -a -b {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]] +// +// Test AOT compilation for an Intel CPU. +// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU +// AOT-INTEL-CPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings +// AOT-INTEL-CPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]] +// +// Test AOT compilation for an Intel CPU with additional options. +// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \ +// RUN: --opencl-aot-options="-a -b" \ +// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU-2 +// AOT-INTEL-CPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings +// AOT-INTEL-CPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc +// AOT-INTEL-CPU-2-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]] +// +// Check that the output file must be specified. +// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc 2>& 1 \ +// RUN: | FileCheck %s --check-prefix=NOOUTPUT +// NOOUTPUT: Output file is not specified +// diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index 2bcb3757d49d0..1798907c1f3e0 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -14,6 +14,7 @@ // target-specific device code. //===---------------------------------------------------------------------===// +#include "clang/Basic/SYCL.h" #include "clang/Basic/Version.h" #include "llvm/ADT/StringExtras.h" @@ -50,6 +51,7 @@ using namespace llvm; using namespace llvm::opt; using namespace llvm::object; +using namespace clang; /// Save intermediary results. static bool SaveTemps = false; @@ -66,6 +68,8 @@ static StringRef OutputFile; /// Directory to dump SPIR-V IR if requested by user. static SmallString<128> SPIRVDumpDir; +static bool IsAOTCompileNeeded = false; + static void printVersion(raw_ostream &OS) { OS << clang::getClangToolFullVersion("clang-sycl-linker") << '\n'; } @@ -392,7 +396,15 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File, LLVMToSPIRVOptions = A->getValue(); LLVMToSPIRVOptions.split(CmdArgs, " ", /* MaxSplit = */ -1, /* KeepEmpty = */ false); - CmdArgs.append({"-o", OutputFile}); + + Expected<StringRef> OutFileOrErr = + IsAOTCompileNeeded + ? createTempFile(Args, sys::path::filename(OutputFile), "spv") + : OutputFile; + if (!OutFileOrErr) + return OutFileOrErr.takeError(); + + CmdArgs.append({"-o", *OutFileOrErr}); CmdArgs.push_back(File); if (Error Err = executeCommands(*LLVMToSPIRVProg, CmdArgs)) return std::move(Err); @@ -406,7 +418,7 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File, formatv("failed to create dump directory. path: {0}, error_code: {1}", SPIRVDumpDir, EC.value())); - StringRef Path = OutputFile; + StringRef Path = *OutFileOrErr; StringRef Filename = llvm::sys::path::filename(Path); SmallString<128> CopyPath = SPIRVDumpDir; CopyPath.append(Filename); @@ -419,7 +431,83 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File, Path, CopyPath, EC.value())); } - return OutputFile; + return *OutFileOrErr; +} + +/// Run AOT compilation for Intel CPU. +/// Calls opencl-aot tool to generate device code for Intel CPU backend. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Error runAOTCompileIntelCPU(StringRef InputFile, const ArgList &Args) { + SmallVector<StringRef, 8> CmdArgs; + Expected<std::string> OpenCLAOTPath = + findProgram(Args, "opencl-aot", {getMainExecutable("opencl-aot")}); + if (!OpenCLAOTPath) + return OpenCLAOTPath.takeError(); + + CmdArgs.push_back(*OpenCLAOTPath); + CmdArgs.push_back("--device=cpu"); + StringRef ExtraArgs = Args.getLastArgValue(OPT_opencl_aot_options_EQ); + ExtraArgs.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + CmdArgs.push_back("-o"); + CmdArgs.push_back(OutputFile); + CmdArgs.push_back(InputFile); + if (Error Err = executeCommands(*OpenCLAOTPath, CmdArgs)) + return std::move(Err); + return Error::success(); +} + +/// Run AOT compilation for Intel GPU +/// Calls ocloc tool to generate device code for Intel GPU backend. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Error runAOTCompileIntelGPU(StringRef InputFile, const ArgList &Args) { + SmallVector<StringRef, 8> CmdArgs; + Expected<std::string> OclocPath = + findProgram(Args, "ocloc", {getMainExecutable("ocloc")}); + if (!OclocPath) + return OclocPath.takeError(); + + CmdArgs.push_back(*OclocPath); + // The next line prevents ocloc from modifying the image name + CmdArgs.push_back("-output_no_suffix"); + CmdArgs.push_back("-spirv_input"); + + StringRef Arch(Args.getLastArgValue(OPT_arch)); + assert(!Arch.empty() && "Arch must be specified for AOT compilation"); + CmdArgs.push_back("-device"); + CmdArgs.push_back(Arch); + + StringRef ExtraArgs = Args.getLastArgValue(OPT_ocloc_options_EQ); + ExtraArgs.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + + CmdArgs.push_back("-output"); + CmdArgs.push_back(OutputFile); + CmdArgs.push_back("-file"); + CmdArgs.push_back(InputFile); + if (Error Err = executeCommands(*OclocPath, CmdArgs)) + return std::move(Err); + return Error::success(); +} + +/// Run AOT compilation for Intel CPU/GPU. +/// 'InputFile' is the input SPIR-V file. +/// 'Args' encompasses all arguments required for linking and wrapping device +/// code and will be parsed to generate options required to be passed into the +/// SYCL AOT compilation step. +static Error runAOTCompile(StringRef InputFile, const ArgList &Args) { + StringRef Arch = Args.getLastArgValue(OPT_arch); + SYCLSupportedIntelArchs OffloadArch = StringToOffloadArchSYCL(Arch); + if (IsSYCLSupportedIntelGPUArch(OffloadArch)) + return runAOTCompileIntelGPU(InputFile, Args); + if (IsSYCLSupportedIntelCPUArch(OffloadArch)) + return runAOTCompileIntelCPU(InputFile, Args); + + return createStringError(inconvertibleErrorCode(), "Unsupported arch"); } Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { @@ -427,17 +515,23 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { // First llvm-link step auto LinkedFile = linkDeviceInputFiles(Files, Args); if (!LinkedFile) - reportError(LinkedFile.takeError()); + return LinkedFile.takeError(); // second llvm-link step auto DeviceLinkedFile = linkDeviceLibFiles(*LinkedFile, Args); if (!DeviceLinkedFile) - reportError(DeviceLinkedFile.takeError()); + return DeviceLinkedFile.takeError(); // LLVM to SPIR-V translation step auto SPVFile = runLLVMToSPIRVTranslation(*DeviceLinkedFile, Args); if (!SPVFile) return SPVFile.takeError(); + + if (IsAOTCompileNeeded) { + if (Error Err = runAOTCompile(*SPVFile, Args)) + return Err; + } + return Error::success(); } @@ -474,9 +568,11 @@ int main(int argc, char **argv) { DryRun = Args.hasArg(OPT_dry_run); SaveTemps = Args.hasArg(OPT_save_temps); - OutputFile = "a.spv"; - if (Args.hasArg(OPT_o)) - OutputFile = Args.getLastArgValue(OPT_o); + IsAOTCompileNeeded = Args.hasArg(OPT_arch); + + if (!Args.hasArg(OPT_o)) + reportError(createStringError("Output file is not specified")); + OutputFile = Args.getLastArgValue(OPT_o); if (Args.hasArg(OPT_spirv_dump_device_code_EQ)) { Arg *A = Args.getLastArg(OPT_spirv_dump_device_code_EQ); diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td index 959fd6c3e867c..abcacc9daed6e 100644 --- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td +++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td @@ -50,3 +50,11 @@ def llvm_spirv_path_EQ : Joined<["--"], "llvm-spirv-path=">, def llvm_spirv_options_EQ : Joined<["--", "-"], "llvm-spirv-options=">, Flags<[LinkerOnlyOption]>, HelpText<"Options that will control llvm-spirv step">; + +def ocloc_options_EQ : Joined<["--", "-"], "ocloc-options=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Options passed to ocloc for Intel GPU AOT compilation">; + +def opencl_aot_options_EQ : Joined<["--", "-"], "opencl-aot-options=">, + Flags<[LinkerOnlyOption]>, + HelpText<"Options passed to opencl-aot for CPU AOT compilation">; >From abf2b4be9d2f093f5c2e23f2b359836cf6824a64 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" <justin....@intel.com> Date: Wed, 2 Apr 2025 20:23:24 +0000 Subject: [PATCH 2/3] Return error instead of assert --- clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index be855c124e466..23c14c8f07200 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -430,7 +430,9 @@ static Error runAOTCompileIntelGPU(StringRef InputFile, const ArgList &Args) { CmdArgs.push_back("-spirv_input"); StringRef Arch(Args.getLastArgValue(OPT_arch_EQ)); - assert(!Arch.empty() && "Arch must be specified for AOT compilation"); + if (Arch.empty()) + return createStringError(inconvertibleErrorCode(), + "Arch must be specified for AOT compilation"); CmdArgs.push_back("-device"); CmdArgs.push_back(Arch); >From ff997fab0f621d1bf195d6a854c9e2c59be5fd5a Mon Sep 17 00:00:00 2001 From: "Cai, Justin" <justin....@intel.com> Date: Thu, 3 Apr 2025 07:34:28 +0000 Subject: [PATCH 3/3] Add Intel arches to OffloadArch enum --- clang/include/clang/Basic/Cuda.h | 66 +++++ clang/include/clang/Basic/SYCL.h | 131 ---------- clang/lib/Basic/CMakeLists.txt | 1 - clang/lib/Basic/Cuda.cpp | 59 +++++ clang/lib/Basic/SYCL.cpp | 226 ------------------ clang/lib/Basic/Targets/NVPTX.cpp | 55 +++++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 55 +++++ .../clang-sycl-linker/ClangSYCLLinker.cpp | 8 +- 8 files changed, 239 insertions(+), 362 deletions(-) delete mode 100644 clang/include/clang/Basic/SYCL.h delete mode 100644 clang/lib/Basic/SYCL.cpp diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index c4eb7b7cac1d6..be8922be5167f 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -140,6 +140,63 @@ enum class OffloadArch { AMDGCNSPIRV, Generic, // A processor model named 'generic' if the target backend defines a // public one. + // Intel CPUs + SKYLAKEAVX512, + COREAVX2, + COREI7AVX, + COREI7, + WESTMERE, + SANDYBRIDGE, + IVYBRIDGE, + BROADWELL, + COFFEELAKE, + ALDERLAKE, + SKYLAKE, + SKX, + CASCADELAKE, + ICELAKECLIENT, + ICELAKESERVER, + SAPPHIRERAPIDS, + GRANITERAPIDS, + // Intel GPUs + BDW, + SKL, + KBL, + CFL, + APL, + BXT, + GLK, + WHL, + AML, + CML, + ICLLP, + ICL, + EHL, + JSL, + TGLLP, + TGL, + RKL, + ADL_S, + RPL_S, + ADL_P, + ADL_N, + DG1, + ACM_G10, + DG2_G10, + ACM_G11, + DG2_G11, + ACM_G12, + DG2_G12, + PVC, + PVC_VG, + MTL_U, + MTL_S, + ARL_U, + ARL_S, + MTL_H, + ARL_H, + BMG_G21, + LNL_M, LAST, CudaDefault = OffloadArch::SM_52, @@ -163,6 +220,15 @@ static inline bool IsAMDOffloadArch(OffloadArch A) { return A >= OffloadArch::GFX600 && A < OffloadArch::Generic; } +static inline bool IsIntelCPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::SKYLAKEAVX512 && + Arch <= OffloadArch::GRANITERAPIDS; +} + +static inline bool IsIntelGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::BDW && Arch <= OffloadArch::LNL_M; +} + const char *OffloadArchToString(OffloadArch A); const char *OffloadArchToVirtualArchString(OffloadArch A); diff --git a/clang/include/clang/Basic/SYCL.h b/clang/include/clang/Basic/SYCL.h deleted file mode 100644 index c7cad37639b91..0000000000000 --- a/clang/include/clang/Basic/SYCL.h +++ /dev/null @@ -1,131 +0,0 @@ -//===--- SYCL.h -------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_BASIC_SYCL_H -#define LLVM_CLANG_BASIC_SYCL_H - -#include "clang/Basic/Cuda.h" - -namespace llvm { -class StringRef; -template <unsigned InternalLen> class SmallString; -} // namespace llvm - -namespace clang { -// List of architectures (Intel CPUs and Intel GPUs) -// that support SYCL offloading. -enum class SYCLSupportedIntelArchs { - // Intel CPUs - UNKNOWN, - SKYLAKEAVX512, - COREAVX2, - COREI7AVX, - COREI7, - WESTMERE, - SANDYBRIDGE, - IVYBRIDGE, - BROADWELL, - COFFEELAKE, - ALDERLAKE, - SKYLAKE, - SKX, - CASCADELAKE, - ICELAKECLIENT, - ICELAKESERVER, - SAPPHIRERAPIDS, - GRANITERAPIDS, - // Intel GPUs - BDW, - SKL, - KBL, - CFL, - APL, - BXT, - GLK, - WHL, - AML, - CML, - ICLLP, - ICL, - EHL, - JSL, - TGLLP, - TGL, - RKL, - ADL_S, - RPL_S, - ADL_P, - ADL_N, - DG1, - ACM_G10, - DG2_G10, - ACM_G11, - DG2_G11, - ACM_G12, - DG2_G12, - PVC, - PVC_VG, - MTL_U, - MTL_S, - ARL_U, - ARL_S, - MTL_H, - ARL_H, - BMG_G21, - LNL_M, -}; - -// Check if the given Arch value is a Generic AMD GPU. -// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading. -// This list is used to filter out GFX*_GENERIC AMD GPUs in -// `IsSYCLSupportedAMDGPUArch`. -static inline bool IsAMDGenericGPUArch(OffloadArch Arch) { - return Arch == OffloadArch::GFX9_GENERIC || - Arch == OffloadArch::GFX10_1_GENERIC || - Arch == OffloadArch::GFX10_3_GENERIC || - Arch == OffloadArch::GFX11_GENERIC || - Arch == OffloadArch::GFX12_GENERIC; -} - -// Check if the given Arch value is a valid SYCL supported AMD GPU. -static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) { - return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV && - !IsAMDGenericGPUArch(Arch); -} - -// Check if the given Arch value is a valid SYCL supported NVidia GPU. -static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) { - return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a; -} - -// Check if the given Arch value is a valid SYCL supported Intel CPU. -static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) { - return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 && - Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS; -} - -// Check if the given Arch value is a valid SYCL supported Intel GPU. -static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) { - return Arch >= SYCLSupportedIntelArchs::BDW && - Arch <= SYCLSupportedIntelArchs::LNL_M; -} - -// Check if the user provided value for --offload-arch is a valid -// SYCL supported Intel AOT target. -SYCLSupportedIntelArchs -StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString); - -// This is a mapping between the user provided --offload-arch value for Intel -// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU -// AOT compiler). -llvm::StringRef mapIntelGPUArchName(llvm::StringRef ArchName); -llvm::SmallString<64> getGenDeviceMacro(llvm::StringRef DeviceName); - -} // namespace clang - -#endif // LLVM_CLANG_BASIC_SYCL_H diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index be6d915e01b0a..331dfbb3f4b67 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -90,7 +90,6 @@ add_clang_library(clangBasic SourceMgrAdapter.cpp Stack.cpp StackExhaustionHandler.cpp - SYCL.cpp TargetID.cpp TargetInfo.cpp Targets.cpp diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 68d042eca2492..f1015c47f314f 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -84,6 +84,7 @@ struct OffloadArchToStringMap { #define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} #define SM(sm) SM2(sm, "compute_" #sm) #define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} +#define INTEL(name, value) {OffloadArch::value, #name, ""} static const OffloadArchToStringMap arch_names[] = { // clang-format off {OffloadArch::UNUSED, "", ""}, @@ -156,12 +157,70 @@ static const OffloadArchToStringMap arch_names[] = { GFX(1200), // gfx1200 GFX(1201), // gfx1201 {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, + // Intel CPUs + INTEL(skylake-avx512, SKYLAKEAVX512), + INTEL(core-avx2, COREAVX2), + INTEL(corei7-avx, COREI7AVX), + INTEL(corei7, COREI7), + INTEL(westmere, WESTMERE), + INTEL(sandybridge, SANDYBRIDGE), + INTEL(ivybridge, IVYBRIDGE), + INTEL(broadwell, BROADWELL), + INTEL(coffeelake, COFFEELAKE), + INTEL(alderlake, ALDERLAKE), + INTEL(skylake, SKYLAKE), + INTEL(skx, SKX), + INTEL(cascadelake, CASCADELAKE), + INTEL(icelake-client, ICELAKECLIENT), + INTEL(icelakeserver, ICELAKESERVER), + INTEL(sapphirerapids, SAPPHIRERAPIDS), + INTEL(graniterapids, GRANITERAPIDS), + // Intel GPUs + INTEL(bdw, BDW), + INTEL(skl, SKL), + INTEL(kbl, KBL), + INTEL(cfl, CFL), + INTEL(apl, APL), + INTEL(bxt, BXT), + INTEL(glk, GLK), + INTEL(whl, WHL), + INTEL(aml, AML), + INTEL(cml, CML), + INTEL(icllp, ICLLP), + INTEL(icl, ICL), + INTEL(ehl, EHL), + INTEL(jsl, JSL), + INTEL(tgllp, TGLLP), + INTEL(tgl, TGL), + INTEL(rkl, RKL), + INTEL(adl_s, ADL_S), + INTEL(rpl_s, RPL_S), + INTEL(adl_p, ADL_P), + INTEL(adl_n, ADL_N), + INTEL(dg1, DG1), + INTEL(acm_g10, ACM_G10), + INTEL(dg2_g10, DG2_G10), + INTEL(acm_g11, ACM_G11), + INTEL(dg2_g11, DG2_G11), + INTEL(acm_g12, ACM_G12), + INTEL(dg2_g12, DG2_G12), + INTEL(pvc, PVC), + INTEL(pvc_vg, PVC_VG), + INTEL(mtl_u, MTL_U), + INTEL(mtl_s, MTL_S), + INTEL(arl_u, ARL_U), + INTEL(arl_s, ARL_S), + INTEL(mtl_h, MTL_H), + INTEL(arl_h, ARL_H), + INTEL(bmg_g21, BMG_G21), + INTEL(lnl_m, LNL_M), {OffloadArch::Generic, "generic", ""}, // clang-format on }; #undef SM #undef SM2 #undef GFX +#undef INTEL const char *OffloadArchToString(OffloadArch A) { auto result = std::find_if( diff --git a/clang/lib/Basic/SYCL.cpp b/clang/lib/Basic/SYCL.cpp deleted file mode 100644 index 9ac5470cdbe5a..0000000000000 --- a/clang/lib/Basic/SYCL.cpp +++ /dev/null @@ -1,226 +0,0 @@ -#include "clang/Basic/SYCL.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" - -using namespace llvm; - -namespace clang { - -// Struct that relates an AOT target value with -// Intel CPUs and Intel GPUs. -struct StringToOffloadArchSYCLMap { - const char *ArchName; - SYCLSupportedIntelArchs IntelArch; -}; - -// Mapping of supported SYCL offloading architectures. -static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = { - // Intel CPU mapping. - {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512}, - {"core-avx2", SYCLSupportedIntelArchs::COREAVX2}, - {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX}, - {"corei7", SYCLSupportedIntelArchs::COREI7}, - {"westmere", SYCLSupportedIntelArchs::WESTMERE}, - {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE}, - {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE}, - {"broadwell", SYCLSupportedIntelArchs::BROADWELL}, - {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE}, - {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE}, - {"skylake", SYCLSupportedIntelArchs::SKYLAKE}, - {"skx", SYCLSupportedIntelArchs::SKX}, - {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE}, - {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT}, - {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER}, - {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS}, - {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS}, - // Intel GPU mapping. - {"bdw", SYCLSupportedIntelArchs::BDW}, - {"skl", SYCLSupportedIntelArchs::SKL}, - {"kbl", SYCLSupportedIntelArchs::KBL}, - {"cfl", SYCLSupportedIntelArchs::CFL}, - {"apl", SYCLSupportedIntelArchs::APL}, - {"bxt", SYCLSupportedIntelArchs::BXT}, - {"glk", SYCLSupportedIntelArchs::GLK}, - {"whl", SYCLSupportedIntelArchs::WHL}, - {"aml", SYCLSupportedIntelArchs::AML}, - {"cml", SYCLSupportedIntelArchs::CML}, - {"icllp", SYCLSupportedIntelArchs::ICLLP}, - {"icl", SYCLSupportedIntelArchs::ICL}, - {"ehl", SYCLSupportedIntelArchs::EHL}, - {"jsl", SYCLSupportedIntelArchs::JSL}, - {"tgllp", SYCLSupportedIntelArchs::TGLLP}, - {"tgl", SYCLSupportedIntelArchs::TGL}, - {"rkl", SYCLSupportedIntelArchs::RKL}, - {"adl_s", SYCLSupportedIntelArchs::ADL_S}, - {"rpl_s", SYCLSupportedIntelArchs::RPL_S}, - {"adl_p", SYCLSupportedIntelArchs::ADL_P}, - {"adl_n", SYCLSupportedIntelArchs::ADL_N}, - {"dg1", SYCLSupportedIntelArchs::DG1}, - {"acm_g10", SYCLSupportedIntelArchs::ACM_G10}, - {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, - {"acm_g11", SYCLSupportedIntelArchs::ACM_G11}, - {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, - {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11}, - {"acm_g12", SYCLSupportedIntelArchs::ACM_G12}, - {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12}, - {"pvc", SYCLSupportedIntelArchs::PVC}, - {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG}, - {"mtl_u", SYCLSupportedIntelArchs::MTL_U}, - {"mtl_s", SYCLSupportedIntelArchs::MTL_S}, - {"arl_u", SYCLSupportedIntelArchs::ARL_U}, - {"arl_s", SYCLSupportedIntelArchs::ARL_S}, - {"mtl_h", SYCLSupportedIntelArchs::MTL_H}, - {"arl_h", SYCLSupportedIntelArchs::ARL_H}, - {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21}, - {"lnl_m", SYCLSupportedIntelArchs::LNL_M}}; - -// Check if the user provided value for --offload-arch is a valid -// SYCL supported Intel AOT target. -SYCLSupportedIntelArchs StringToOffloadArchSYCL(StringRef ArchNameAsString) { - auto result = - llvm::find_if(StringToArchNamesMap, - [ArchNameAsString](const StringToOffloadArchSYCLMap &map) { - return ArchNameAsString == map.ArchName; - }); - if (result == std::end(StringToArchNamesMap)) - return SYCLSupportedIntelArchs::UNKNOWN; - return result->IntelArch; -} - -// This is a mapping between the user provided --offload-arch value for Intel -// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU -// AOT compiler). -StringRef mapIntelGPUArchName(StringRef ArchName) { - StringRef Arch; - Arch = llvm::StringSwitch<StringRef>(ArchName) - .Case("bdw", "bdw") - .Case("skl", "skl") - .Case("kbl", "kbl") - .Case("cfl", "cfl") - .Cases("apl", "bxt", "apl") - .Case("glk", "glk") - .Case("whl", "whl") - .Case("aml", "aml") - .Case("cml", "cml") - .Cases("icllp", "icl", "icllp") - .Cases("ehl", "jsl", "ehl") - .Cases("tgllp", "tgl", "tgllp") - .Case("rkl", "rkl") - .Cases("adl_s", "rpl_s", "adl_s") - .Case("adl_p", "adl_p") - .Case("adl_n", "adl_n") - .Case("dg1", "dg1") - .Cases("acm_g10", "dg2_g10", "acm_g10") - .Cases("acm_g11", "dg2_g11", "acm_g11") - .Cases("acm_g12", "dg2_g12", "acm_g12") - .Case("pvc", "pvc") - .Case("pvc_vg", "pvc_vg") - .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u") - .Case("mtl_h", "mtl_h") - .Case("arl_h", "arl_h") - .Case("bmg_g21", "bmg_g21") - .Case("lnl_m", "lnl_m") - .Default(""); - return Arch; -} - -SmallString<64> getGenDeviceMacro(StringRef DeviceName) { - SmallString<64> Macro; - StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName) - .Case("bdw", "INTEL_GPU_BDW") - .Case("skl", "INTEL_GPU_SKL") - .Case("kbl", "INTEL_GPU_KBL") - .Case("cfl", "INTEL_GPU_CFL") - .Case("apl", "INTEL_GPU_APL") - .Case("glk", "INTEL_GPU_GLK") - .Case("whl", "INTEL_GPU_WHL") - .Case("aml", "INTEL_GPU_AML") - .Case("cml", "INTEL_GPU_CML") - .Case("icllp", "INTEL_GPU_ICLLP") - .Case("ehl", "INTEL_GPU_EHL") - .Case("tgllp", "INTEL_GPU_TGLLP") - .Case("rkl", "INTEL_GPU_RKL") - .Case("adl_s", "INTEL_GPU_ADL_S") - .Case("adl_p", "INTEL_GPU_ADL_P") - .Case("adl_n", "INTEL_GPU_ADL_N") - .Case("dg1", "INTEL_GPU_DG1") - .Case("acm_g10", "INTEL_GPU_ACM_G10") - .Case("acm_g11", "INTEL_GPU_ACM_G11") - .Case("acm_g12", "INTEL_GPU_ACM_G12") - .Case("pvc", "INTEL_GPU_PVC") - .Case("pvc_vg", "INTEL_GPU_PVC_VG") - .Case("mtl_u", "INTEL_GPU_MTL_U") - .Case("mtl_h", "INTEL_GPU_MTL_H") - .Case("arl_h", "INTEL_GPU_ARL_H") - .Case("bmg_g21", "INTEL_GPU_BMG_G21") - .Case("lnl_m", "INTEL_GPU_LNL_M") - .Case("ptl_h", "INTEL_GPU_PTL_H") - .Case("ptl_u", "INTEL_GPU_PTL_U") - .Case("sm_50", "NVIDIA_GPU_SM_50") - .Case("sm_52", "NVIDIA_GPU_SM_52") - .Case("sm_53", "NVIDIA_GPU_SM_53") - .Case("sm_60", "NVIDIA_GPU_SM_60") - .Case("sm_61", "NVIDIA_GPU_SM_61") - .Case("sm_62", "NVIDIA_GPU_SM_62") - .Case("sm_70", "NVIDIA_GPU_SM_70") - .Case("sm_72", "NVIDIA_GPU_SM_72") - .Case("sm_75", "NVIDIA_GPU_SM_75") - .Case("sm_80", "NVIDIA_GPU_SM_80") - .Case("sm_86", "NVIDIA_GPU_SM_86") - .Case("sm_87", "NVIDIA_GPU_SM_87") - .Case("sm_89", "NVIDIA_GPU_SM_89") - .Case("sm_90", "NVIDIA_GPU_SM_90") - .Case("sm_90a", "NVIDIA_GPU_SM_90A") - .Case("gfx700", "AMD_GPU_GFX700") - .Case("gfx701", "AMD_GPU_GFX701") - .Case("gfx702", "AMD_GPU_GFX702") - .Case("gfx703", "AMD_GPU_GFX703") - .Case("gfx704", "AMD_GPU_GFX704") - .Case("gfx705", "AMD_GPU_GFX705") - .Case("gfx801", "AMD_GPU_GFX801") - .Case("gfx802", "AMD_GPU_GFX802") - .Case("gfx803", "AMD_GPU_GFX803") - .Case("gfx805", "AMD_GPU_GFX805") - .Case("gfx810", "AMD_GPU_GFX810") - .Case("gfx900", "AMD_GPU_GFX900") - .Case("gfx902", "AMD_GPU_GFX902") - .Case("gfx904", "AMD_GPU_GFX904") - .Case("gfx906", "AMD_GPU_GFX906") - .Case("gfx908", "AMD_GPU_GFX908") - .Case("gfx909", "AMD_GPU_GFX909") - .Case("gfx90a", "AMD_GPU_GFX90A") - .Case("gfx90c", "AMD_GPU_GFX90C") - .Case("gfx940", "AMD_GPU_GFX940") - .Case("gfx941", "AMD_GPU_GFX941") - .Case("gfx942", "AMD_GPU_GFX942") - .Case("gfx1010", "AMD_GPU_GFX1010") - .Case("gfx1011", "AMD_GPU_GFX1011") - .Case("gfx1012", "AMD_GPU_GFX1012") - .Case("gfx1013", "AMD_GPU_GFX1013") - .Case("gfx1030", "AMD_GPU_GFX1030") - .Case("gfx1031", "AMD_GPU_GFX1031") - .Case("gfx1032", "AMD_GPU_GFX1032") - .Case("gfx1033", "AMD_GPU_GFX1033") - .Case("gfx1034", "AMD_GPU_GFX1034") - .Case("gfx1035", "AMD_GPU_GFX1035") - .Case("gfx1036", "AMD_GPU_GFX1036") - .Case("gfx1100", "AMD_GPU_GFX1100") - .Case("gfx1101", "AMD_GPU_GFX1101") - .Case("gfx1102", "AMD_GPU_GFX1102") - .Case("gfx1103", "AMD_GPU_GFX1103") - .Case("gfx1150", "AMD_GPU_GFX1150") - .Case("gfx1151", "AMD_GPU_GFX1151") - .Case("gfx1200", "AMD_GPU_GFX1200") - .Case("gfx1201", "AMD_GPU_GFX1201") - .Default(""); - if (!Ext.empty()) { - Macro = "__SYCL_TARGET_"; - Macro += Ext; - Macro += "__"; - } - return Macro; -} - -} // namespace clang diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 5931a77a85fec..4f04d83c9c068 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -240,6 +240,61 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::GFX1201: case OffloadArch::AMDGCNSPIRV: case OffloadArch::Generic: + case OffloadArch::SKYLAKEAVX512: + case OffloadArch::COREAVX2: + case OffloadArch::COREI7AVX: + case OffloadArch::COREI7: + case OffloadArch::WESTMERE: + case OffloadArch::SANDYBRIDGE: + case OffloadArch::IVYBRIDGE: + case OffloadArch::BROADWELL: + case OffloadArch::COFFEELAKE: + case OffloadArch::ALDERLAKE: + case OffloadArch::SKYLAKE: + case OffloadArch::SKX: + case OffloadArch::CASCADELAKE: + case OffloadArch::ICELAKECLIENT: + case OffloadArch::ICELAKESERVER: + case OffloadArch::SAPPHIRERAPIDS: + case OffloadArch::GRANITERAPIDS: + case OffloadArch::BDW: + case OffloadArch::SKL: + case OffloadArch::KBL: + case OffloadArch::CFL: + case OffloadArch::APL: + case OffloadArch::BXT: + case OffloadArch::GLK: + case OffloadArch::WHL: + case OffloadArch::AML: + case OffloadArch::CML: + case OffloadArch::ICLLP: + case OffloadArch::ICL: + case OffloadArch::EHL: + case OffloadArch::JSL: + case OffloadArch::TGLLP: + case OffloadArch::TGL: + case OffloadArch::RKL: + case OffloadArch::ADL_S: + case OffloadArch::RPL_S: + case OffloadArch::ADL_P: + case OffloadArch::ADL_N: + case OffloadArch::DG1: + case OffloadArch::ACM_G10: + case OffloadArch::DG2_G10: + case OffloadArch::ACM_G11: + case OffloadArch::DG2_G11: + case OffloadArch::ACM_G12: + case OffloadArch::DG2_G12: + case OffloadArch::PVC: + case OffloadArch::PVC_VG: + case OffloadArch::MTL_U: + case OffloadArch::MTL_S: + case OffloadArch::ARL_U: + case OffloadArch::ARL_S: + case OffloadArch::MTL_H: + case OffloadArch::ARL_H: + case OffloadArch::BMG_G21: + case OffloadArch::LNL_M: case OffloadArch::LAST: break; case OffloadArch::UNKNOWN: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index feb2448297542..80990eeed7511 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2335,6 +2335,61 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::GFX1201: case OffloadArch::AMDGCNSPIRV: case OffloadArch::Generic: + case OffloadArch::SKYLAKEAVX512: + case OffloadArch::COREAVX2: + case OffloadArch::COREI7AVX: + case OffloadArch::COREI7: + case OffloadArch::WESTMERE: + case OffloadArch::SANDYBRIDGE: + case OffloadArch::IVYBRIDGE: + case OffloadArch::BROADWELL: + case OffloadArch::COFFEELAKE: + case OffloadArch::ALDERLAKE: + case OffloadArch::SKYLAKE: + case OffloadArch::SKX: + case OffloadArch::CASCADELAKE: + case OffloadArch::ICELAKECLIENT: + case OffloadArch::ICELAKESERVER: + case OffloadArch::SAPPHIRERAPIDS: + case OffloadArch::GRANITERAPIDS: + case OffloadArch::BDW: + case OffloadArch::SKL: + case OffloadArch::KBL: + case OffloadArch::CFL: + case OffloadArch::APL: + case OffloadArch::BXT: + case OffloadArch::GLK: + case OffloadArch::WHL: + case OffloadArch::AML: + case OffloadArch::CML: + case OffloadArch::ICLLP: + case OffloadArch::ICL: + case OffloadArch::EHL: + case OffloadArch::JSL: + case OffloadArch::TGLLP: + case OffloadArch::TGL: + case OffloadArch::RKL: + case OffloadArch::ADL_S: + case OffloadArch::RPL_S: + case OffloadArch::ADL_P: + case OffloadArch::ADL_N: + case OffloadArch::DG1: + case OffloadArch::ACM_G10: + case OffloadArch::DG2_G10: + case OffloadArch::ACM_G11: + case OffloadArch::DG2_G11: + case OffloadArch::ACM_G12: + case OffloadArch::DG2_G12: + case OffloadArch::PVC: + case OffloadArch::PVC_VG: + case OffloadArch::MTL_U: + case OffloadArch::MTL_S: + case OffloadArch::ARL_U: + case OffloadArch::ARL_S: + case OffloadArch::MTL_H: + case OffloadArch::ARL_H: + case OffloadArch::BMG_G21: + case OffloadArch::LNL_M: case OffloadArch::UNUSED: case OffloadArch::UNKNOWN: break; diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index 23c14c8f07200..20e677e6f84f1 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -14,7 +14,7 @@ // target-specific device code. //===---------------------------------------------------------------------===// -#include "clang/Basic/SYCL.h" +#include "clang/Basic/Cuda.h" #include "clang/Basic/Version.h" #include "llvm/ADT/StringExtras.h" @@ -455,10 +455,10 @@ static Error runAOTCompileIntelGPU(StringRef InputFile, const ArgList &Args) { /// SYCL AOT compilation step. static Error runAOTCompile(StringRef InputFile, const ArgList &Args) { StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); - SYCLSupportedIntelArchs OffloadArch = StringToOffloadArchSYCL(Arch); - if (IsSYCLSupportedIntelGPUArch(OffloadArch)) + OffloadArch OffloadArch = StringToOffloadArch(Arch); + if (IsIntelGPUArch(OffloadArch)) return runAOTCompileIntelGPU(InputFile, Args); - if (IsSYCLSupportedIntelCPUArch(OffloadArch)) + if (IsIntelCPUArch(OffloadArch)) return runAOTCompileIntelCPU(InputFile, Args); return createStringError(inconvertibleErrorCode(), "Unsupported arch"); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits