jhuber6 created this revision. jhuber6 added reviewers: jdoerfert, tianshilei1992, JonChesterfield, tra, yaxunl. Herald added a subscriber: guansong. Herald added a project: All. jhuber6 requested review of this revision. Herald added subscribers: cfe-commits, sstefan1, MaskRay. Herald added a project: clang.
This patch adds support for '--offload-arch=native' to OpenMP offloading. This will automatically generate the toolchains required to fulfil whatever GPUs the user has installed. Getting this to work requires a bit of a hack. The problem is that we need the ToolChain to launch its searching program. But we do not yet have that ToolChain built. I had to temporarily make the ToolChain and also add some logic to ignore regular warnings & errors. Depends on D141078 <https://reviews.llvm.org/D141078> Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D141105 Files: clang/include/clang/Driver/Driver.h clang/lib/Driver/Driver.cpp
Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -848,9 +848,29 @@ HostTC->getTriple()); // Attempt to deduce the offloading triple from the set of architectures. - // We can only correctly deduce NVPTX / AMDGPU triples currently. - llvm::DenseSet<StringRef> Archs = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr); + // We can only correctly deduce NVPTX / AMDGPU triples currently. We need + // to temporarily create these toolchains so that we can access tools for + // inferring architectures. + llvm::DenseSet<StringRef> Archs; + if (NVPTXTriple) { + auto TempTC = std::make_unique<toolchains::CudaToolChain>( + *this, *NVPTXTriple, *HostTC, C.getInputArgs()); + for (StringRef Arch : getOffloadArchs( + C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true)) + Archs.insert(Arch); + } + if (AMDTriple) { + auto TempTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>( + *this, *AMDTriple, *HostTC, C.getInputArgs()); + for (StringRef Arch : getOffloadArchs( + C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true)) + Archs.insert(Arch); + } + if (!AMDTriple && !NVPTXTriple) { + for (StringRef Arch : + getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr, true)) + Archs.insert(Arch); + } for (StringRef Arch : Archs) { if (NVPTXTriple && IsNVIDIAGpuArch(StringToCudaArch( getProcessorFromTargetID(*NVPTXTriple, Arch)))) { @@ -4182,16 +4202,17 @@ static StringRef getCanonicalArchString(Compilation &C, const llvm::opt::DerivedArgList &Args, StringRef ArchStr, - const llvm::Triple &Triple) { + const llvm::Triple &Triple, + bool Query = false) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. CudaArch Arch = StringToCudaArch(getProcessorFromTargetID(Triple, ArchStr)); - if (Triple.isNVPTX() && + if (!Query && Triple.isNVPTX() && (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch))) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "CUDA" << ArchStr; return StringRef(); - } else if (Triple.isAMDGPU() && + } else if (!Query && Triple.isAMDGPU() && (Arch == CudaArch::UNKNOWN || !IsAMDGpuArch(Arch))) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "HIP" << ArchStr; @@ -4234,7 +4255,8 @@ llvm::DenseSet<StringRef> Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC) const { + Action::OffloadKind Kind, const ToolChain *TC, + bool Query) const { if (!TC) TC = &C.getDefaultToolChain(); @@ -4271,18 +4293,22 @@ if (Arch == "native") { auto GPUsOrErr = TC->getSystemGPUArchs(Args); if (!GPUsOrErr) { - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; + if (Query) + llvm::consumeError(GPUsOrErr.takeError()); + else + TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC->getArch()) + << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; continue; } - for (auto ArchStr : *GPUsOrErr) - Archs.insert( - getCanonicalArchString(C, Args, ArchStr, TC->getTriple())); + for (auto ArchStr : *GPUsOrErr) { + Archs.insert(getCanonicalArchString( + C, Args, Args.MakeArgString(ArchStr), TC->getTriple(), Query)); + } } else { StringRef ArchStr = - getCanonicalArchString(C, Args, Arch, TC->getTriple()); + getCanonicalArchString(C, Args, Arch, TC->getTriple(), Query); if (ArchStr.empty()) return Archs; Archs.insert(ArchStr); @@ -4294,7 +4320,7 @@ Archs.clear(); } else { StringRef ArchStr = - getCanonicalArchString(C, Args, Arch, TC->getTriple()); + getCanonicalArchString(C, Args, Arch, TC->getTriple(), Query); if (ArchStr.empty()) return Archs; Archs.erase(ArchStr); @@ -4309,6 +4335,10 @@ C.setContainsError(); } + // Skip filling defaults if we're just querying what is availible. + if (Query) + return Archs; + if (Archs.empty()) { if (Kind == Action::OFK_Cuda) Archs.insert(CudaArchToString(CudaArch::CudaDefault)); Index: clang/include/clang/Driver/Driver.h =================================================================== --- clang/include/clang/Driver/Driver.h +++ clang/include/clang/Driver/Driver.h @@ -481,10 +481,11 @@ /// Returns the set of bound architectures active for this offload kind. /// If there are no bound architctures we return a set containing only the - /// empty string. + /// empty string. The \p Query option is used to suppress errors on failure. llvm::DenseSet<StringRef> getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC) const; + Action::OffloadKind Kind, const ToolChain *TC, + bool Query = false) const; /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits