pdhaliwal updated this revision to Diff 317810. pdhaliwal added a comment. Herald added a subscriber: mgorny.
> Won't this just prevent us from building clang due to the missing cmake > changes? It compiles and builds fine, however, I wasn't actually aware such sanity checking being present. It turns out the unknown files inside llvm/ will lead cmake to report error but such reporting will not happen inside clang. Maybe such checks were not enabled inside clang. Anyways thanks for pointing out. I will keep that in mind in future. The idea for this patch was basically to introduce AMDGPUToolChain classes without much of the functionality in order to keep its size in check. And the second patch would have integrated the toolchain with driver along with testing. But during the intermediate time of the two patches, bare files would have existed (never built and tested). I have updated this patch to now include somewhat functional driver along with tests. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D94961/new/ https://reviews.llvm.org/D94961 Files: clang/lib/Driver/CMakeLists.txt clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.h clang/test/Driver/amdgpu-openmp-toolchain.c
Index: clang/test/Driver/amdgpu-openmp-toolchain.c =================================================================== --- /dev/null +++ clang/test/Driver/amdgpu-openmp-toolchain.c @@ -0,0 +1,35 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s 2>&1 \ +// RUN: | FileCheck %s + +// verify the tools invocations +// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}} +// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}} +// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc" "-emit-llvm-uselists"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device"{{.*}}"-fopenmp" "-fopenmp-cuda-parallel-target-regions"{{.*}}"-fopenmp-is-device"{{.*}}"-o" {{.*}}amdgpu-openmp-toolchain-{{.*}}.bc{{.*}}"-x" "c"{{.*}}amdgpu-openmp-toolchain.c{{.*}} +// CHECK: llvm-link{{.*}}amdgpu-openmp-toolchain-{{.*}}.bc" "-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" +// CHECK: opt{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}} "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-optimized-{{.*}}.bc" +// CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-optimized-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o" +// CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o" +// CHECK: clang-offload-wrapper{{.*}}"-target" "x86_64-unknown-linux-gnu" "-o" "{{.*}}a-{{.*}}.bc" {{.*}}amdgpu-openmp-toolchain-{{.*}}.out" +// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-o" "{{.*}}a-{{.*}}.o" "-x" "ir" "{{.*}}a-{{.*}}.bc" +// CHECK: ld{{.*}}"-o" "a.out"{{.*}}"{{.*}}amdgpu-openmp-toolchain-{{.*}}.o" "{{.*}}a-{{.*}}.o" "-lomp" "-lomptarget" + +// RUN: %clang -ccc-print-phases -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-PHASES %s +// phases +// CHECK-PHASES: 0: input, "{{.*}}amdgpu-openmp-toolchain.c", c, (host-openmp) +// CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) +// CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp) +// CHECK-PHASES: 3: backend, {2}, assembler, (host-openmp) +// CHECK-PHASES: 4: assembler, {3}, object, (host-openmp) +// CHECK-PHASES: 5: input, "{{.*}}amdgpu-openmp-toolchain.c", c, (device-openmp) +// CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp) +// CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp) +// CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {7}, ir +// CHECK-PHASES: 9: linker, {8}, image, (device-openmp) +// CHECK-PHASES: 10: offload, "device-openmp (amdgcn-amd-amdhsa)" {9}, image +// CHECK-PHASES: 11: clang-offload-wrapper, {10}, ir, (host-openmp) +// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp) +// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp) +// CHECK-PHASES: 14: linker, {4, 13}, image, (host-openmp) + Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.h =================================================================== --- /dev/null +++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.h @@ -0,0 +1,123 @@ +//===- AMDGPUOpenMP.h - AMDGPUOpenMP ToolChain Implementation -*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H +#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H + +#include "AMDGPU.h" +#include "clang/Driver/Tool.h" +#include "clang/Driver/ToolChain.h" + +namespace clang { +namespace driver { + +namespace tools { + +namespace AMDGCN { +// Runs llvm-link/opt/llc/lld, which links multiple LLVM bitcode, together with +// device library, then compiles it to ISA in a shared object. +class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool { +public: + OpenMPLinker(const ToolChain &TC) + : Tool("AMDGCN::OpenMPLinker", "amdgcn-link", TC) {} + + bool hasIntegratedCPP() const override { return false; } + + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; + +private: + /// \return llvm-link output file name. + const char *constructLLVMLinkCommand(Compilation &C, const JobAction &JA, + const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, + llvm::StringRef SubArchName, + llvm::StringRef OutputFilePrefix) const; + + /// \return opt output file name. + const char *constructOptCommand(Compilation &C, const JobAction &JA, + const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, + llvm::StringRef SubArchName, + llvm::StringRef OutputFilePrefix, + const char *InputFileName) const; + + /// \return llc output file name. + const char *constructLlcCommand(Compilation &C, const JobAction &JA, + const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, + llvm::StringRef SubArchName, + llvm::StringRef OutputFilePrefix, + const char *InputFileName, + bool OutputIsAsm = false) const; + + void constructLldCommand(Compilation &C, const JobAction &JA, + const InputInfoList &Inputs, const InputInfo &Output, + const llvm::opt::ArgList &Args, + const char *InputFileName) const; +}; + +} // end namespace AMDGCN +} // end namespace tools + +namespace toolchains { + +class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final + : public ROCMToolChain { +public: + AMDGPUOpenMPToolChain(const Driver &D, const llvm::Triple &Triple, + const ToolChain &HostTC, + const llvm::opt::ArgList &Args); + + const llvm::Triple *getAuxTriple() const override { + return &HostTC.getTriple(); + } + + llvm::opt::DerivedArgList * + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; + void + addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadKind) const override; + + bool useIntegratedAs() const override { return true; } + bool isCrossCompiling() const override { return true; } + bool isPICDefault() const override { return false; } + bool isPIEDefault() const override { return false; } + bool isPICDefaultForced() const override { return false; } + bool SupportsProfiling() const override { return false; } + bool IsMathErrnoDefault() const override { return false; } + + void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override; + CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; + void + AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + + SanitizerMask getSupportedSanitizers() const override; + + VersionTuple + computeMSVCVersion(const Driver *D, + const llvm::opt::ArgList &Args) const override; + + const ToolChain &HostTC; + +protected: + Tool *buildLinker() const override; +}; + +} // end namespace toolchains +} // end namespace driver +} // end namespace clang + +#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp =================================================================== --- /dev/null +++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -0,0 +1,302 @@ +//===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUOpenMP.h" +#include "AMDGPU.h" +#include "CommonArgs.h" +#include "InputInfo.h" +#include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" + +using namespace clang::driver; +using namespace clang::driver::toolchains; +using namespace clang::driver::tools; +using namespace clang; +using namespace llvm::opt; + +namespace { + +static const char *getOutputFileName(Compilation &C, StringRef Base, + const char *Postfix, + const char *Extension) { + const char *OutputFileName; + if (C.getDriver().isSaveTempsEnabled()) { + OutputFileName = + C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension); + } else { + std::string TmpName = + C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension); + OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName)); + } + return OutputFileName; +} + +static void addOptLevelArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, + bool IsLlc = false) { + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + StringRef OOpt = "3"; + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + OOpt = "3"; + else if (A->getOption().matches(options::OPT_O0)) + OOpt = "0"; + else if (A->getOption().matches(options::OPT_O)) { + // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3 + // so we map -Os/-Oz to -O2. + // Only clang supports -Og, and maps it to -O1. + // We map anything else to -O2. + OOpt = llvm::StringSwitch<const char *>(A->getValue()) + .Case("1", "1") + .Case("2", "2") + .Case("3", "3") + .Case("s", IsLlc ? "2" : "s") + .Case("z", IsLlc ? "2" : "z") + .Case("g", "1") + .Default("2"); + } + CmdArgs.push_back(Args.MakeArgString("-O" + OOpt)); + } +} +} // namespace + +const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( + Compilation &C, const JobAction &JA, const InputInfoList &Inputs, + const ArgList &Args, StringRef SubArchName, + StringRef OutputFilePrefix) const { + ArgStringList CmdArgs; + + for (const auto &II : Inputs) + if (II.isFilename()) + CmdArgs.push_back(II.getFilename()); + // Add an intermediate output file. + CmdArgs.push_back("-o"); + const char *OutputFileName = + getOutputFileName(C, OutputFilePrefix, "-linked", "bc"); + CmdArgs.push_back(OutputFileName); + const char *Exec = + Args.MakeArgString(getToolChain().GetProgramPath("llvm-link")); + C.addCommand(std::make_unique<Command>( + JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, + InputInfo(&JA, Args.MakeArgString(OutputFileName)))); + return OutputFileName; +} + +const char *AMDGCN::OpenMPLinker::constructOptCommand( + Compilation &C, const JobAction &JA, const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, + llvm::StringRef OutputFilePrefix, const char *InputFileName) const { + // Construct opt command. + ArgStringList OptArgs; + // The input to opt is the output from llvm-link. + OptArgs.push_back(InputFileName); + // Pass optimization arg to opt. + addOptLevelArgs(Args, OptArgs); + OptArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); + OptArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); + + for (const Arg *A : Args.filtered(options::OPT_mllvm)) { + OptArgs.push_back(A->getValue(0)); + } + + OptArgs.push_back("-o"); + const char *OutputFileName = + getOutputFileName(C, OutputFilePrefix, "-optimized", "bc"); + OptArgs.push_back(OutputFileName); + const char *OptExec = + Args.MakeArgString(getToolChain().GetProgramPath("opt")); + C.addCommand(std::make_unique<Command>( + JA, *this, ResponseFileSupport::AtFileCurCP(), OptExec, OptArgs, Inputs, + InputInfo(&JA, Args.MakeArgString(OutputFileName)))); + return OutputFileName; +} + +const char *AMDGCN::OpenMPLinker::constructLlcCommand( + Compilation &C, const JobAction &JA, const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, + llvm::StringRef OutputFilePrefix, const char *InputFileName, + bool OutputIsAsm) const { + // Construct llc command. + ArgStringList LlcArgs; + // The input to llc is the output from opt. + LlcArgs.push_back(InputFileName); + // Pass optimization arg to llc. + addOptLevelArgs(Args, LlcArgs, /*IsLlc=*/true); + LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); + LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); + LlcArgs.push_back( + Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj"))); + + for (const Arg *A : Args.filtered(options::OPT_mllvm)) { + LlcArgs.push_back(A->getValue(0)); + } + + // Add output filename + LlcArgs.push_back("-o"); + const char *LlcOutputFile = + getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o"); + LlcArgs.push_back(LlcOutputFile); + const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc")); + C.addCommand(std::make_unique<Command>( + JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs, + InputInfo(&JA, Args.MakeArgString(LlcOutputFile)))); + return LlcOutputFile; +} + +void AMDGCN::OpenMPLinker::constructLldCommand( + Compilation &C, const JobAction &JA, const InputInfoList &Inputs, + const InputInfo &Output, const llvm::opt::ArgList &Args, + const char *InputFileName) const { + // Construct lld command. + // The output from ld.lld is an HSA code object file. + ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", + "-shared", "-o", Output.getFilename(), + InputFileName}; + + const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); + C.addCommand(std::make_unique<Command>( + JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs, + InputInfo(&JA, Args.MakeArgString(Output.getFilename())))); +} + +// For amdgcn the inputs of the linker job are device bitcode and output is +// object file. It calls llvm-link, opt, llc, then lld steps. +void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target"); + + StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); + assert(GPUArch.startswith("gfx") && "Unsupported sub arch"); + + // Prefix for temporary file name. + std::string Prefix; + for (const auto &II : Inputs) + if (II.isFilename()) + Prefix = + llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch.str(); + assert(Prefix.length() && "no linker inputs are files "); + + // Each command outputs different files. + const char *LLVMLinkCommand = + constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix); + const char *OptCommand = constructOptCommand(C, JA, Inputs, Args, GPUArch, + Prefix, LLVMLinkCommand); + const char *LlcCommand = + constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, OptCommand); + constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); +} + +AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D, + const llvm::Triple &Triple, + const ToolChain &HostTC, + const ArgList &Args) + : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { + // Lookup binaries into the driver directory, this is used to + // discover the clang-offload-bundler executable. + getProgramPaths().push_back(getDriver().Dir); +} + +void AMDGPUOpenMPToolChain::addClangTargetOptions( + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadingKind) const { + HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); + + StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); + assert(!GpuArch.empty() && "Must have an explicit GPU arch."); + assert(DeviceOffloadingKind == Action::OFK_OpenMP && + "Only OpenMP offloading kinds are supported."); + + CC1Args.push_back("-target-cpu"); + CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); + CC1Args.push_back("-fcuda-is-device"); + + // Default to "hidden" visibility, as object level linking will not be + // supported for the foreseeable future. + if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, + options::OPT_fvisibility_ms_compat) && + DeviceOffloadingKind != Action::OFK_OpenMP) { + CC1Args.append({"-fvisibility", "hidden"}); + CC1Args.push_back("-fapply-global-visibility-to-externs"); + } +} + +llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( + const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) const { + DerivedArgList *DAL = + HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); + if (!DAL) + DAL = new DerivedArgList(Args.getBaseArgs()); + + const OptTable &Opts = getDriver().getOpts(); + + if (DeviceOffloadKind != Action::OFK_OpenMP) { + for (Arg *A : Args) { + DAL->append(A); + } + } + + if (!BoundArch.empty()) { + DAL->eraseArg(options::OPT_march_EQ); + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), + BoundArch); + } + + return DAL; +} + +Tool *AMDGPUOpenMPToolChain::buildLinker() const { + assert(getTriple().isAMDGCN()); + return new tools::AMDGCN::OpenMPLinker(*this); +} + +void AMDGPUOpenMPToolChain::addClangWarningOptions( + ArgStringList &CC1Args) const { + HostTC.addClangWarningOptions(CC1Args); +} + +ToolChain::CXXStdlibType +AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { + return HostTC.GetCXXStdlibType(Args); +} + +void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs( + const ArgList &DriverArgs, ArgStringList &CC1Args) const { + HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); +} + +void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, + ArgStringList &CC1Args) const { + HostTC.AddIAMCUIncludeArgs(Args, CC1Args); +} + +SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const { + // The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it + // allows sanitizer arguments on the command line if they are supported by the + // host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command + // line arguments for any of these "supported" sanitizers. That means that no + // sanitization of device code is actually supported at this time. + // + // This behavior is necessary because the host and device toolchains + // invocations often share the command line, so the device toolchain must + // tolerate flags meant only for the host toolchain. + return HostTC.getSupportedSanitizers(); +} + +VersionTuple +AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, + const ArgList &Args) const { + return HostTC.computeMSVCVersion(D, Args); +} Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -10,6 +10,7 @@ #include "InputInfo.h" #include "ToolChains/AIX.h" #include "ToolChains/AMDGPU.h" +#include "ToolChains/AMDGPUOpenMP.h" #include "ToolChains/AVR.h" #include "ToolChains/Ananas.h" #include "ToolChains/BareMetal.h" @@ -751,6 +752,19 @@ CudaTC = std::make_unique<toolchains::CudaToolChain>( *this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP); TC = CudaTC.get(); + } else if (TT.isAMDGCN()) { + const ToolChain *HostTC = + C.getSingleOffloadToolChain<Action::OFK_Host>(); + const llvm::Triple &HostTriple = HostTC->getTriple(); + llvm::Triple AMDGPUTriple("amdgcn-amd-amdhsa"); + auto &AMDGPUOpenMPTC = + ToolChains[AMDGPUTriple.str() + "/" + HostTriple.str()]; + if (!AMDGPUOpenMPTC) { + AMDGPUOpenMPTC = + std::make_unique<toolchains::AMDGPUOpenMPToolChain>( + *this, AMDGPUTriple, *HostTC, C.getInputArgs()); + } + TC = AMDGPUOpenMPTC.get(); } else TC = &getToolChain(C.getInputArgs(), TT); C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP); @@ -2976,6 +2990,15 @@ assert(OpenMPDeviceActions.size() == ToolChains.size() && "Number of OpenMP actions and toolchains do not match."); + const ToolChain *OpenMPTC = + C.getSingleOffloadToolChain<Action::OFK_OpenMP>(); + + // amdgcn does not support linking of object files, therefore we skip + // backend and assemble phases to output LLVM IR. + if (OpenMPTC->getTriple().isAMDGCN() && + (CurPhase == phases::Backend || CurPhase == phases::Assemble)) + return ABRT_Success; + // The host only depends on device action in the linking phase, when all // the device images have to be embedded in the host image. if (CurPhase == phases::Link) { Index: clang/lib/Driver/CMakeLists.txt =================================================================== --- clang/lib/Driver/CMakeLists.txt +++ clang/lib/Driver/CMakeLists.txt @@ -36,6 +36,7 @@ ToolChains/AIX.cpp ToolChains/Ananas.cpp ToolChains/AMDGPU.cpp + ToolChains/AMDGPUOpenMP.cpp ToolChains/AVR.cpp ToolChains/BareMetal.cpp ToolChains/Clang.cpp
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits