[clang] 10c779d - [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S
Author: Pushpinder Singh Date: 2021-05-13T01:34:03Z New Revision: 10c779d2065f7e216660f1687244269afcee13b1 URL: https://github.com/llvm/llvm-project/commit/10c779d2065f7e216660f1687244269afcee13b1 DIFF: https://github.com/llvm/llvm-project/commit/10c779d2065f7e216660f1687244269afcee13b1.diff LOG: [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S Previously clang would print a binary blob into the bundled file for amdgcn. With this patch, it will instead print textual IR as expected. Reviewed By: JonChesterfield, ronlieb Differential Revision: https://reviews.llvm.org/D102065 Change-Id: I10c0127ab7357787769fdf9a2edd4b3071e790a1 Added: Modified: clang/lib/Driver/ToolChains/Clang.cpp clang/test/Driver/amdgpu-openmp-toolchain.c Removed: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1b2c5b0de73da..061b578cc6ff7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4394,7 +4394,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-emit-llvm"); } else if (JA.getType() == types::TY_LLVM_BC || JA.getType() == types::TY_LTO_BC) { - CmdArgs.push_back("-emit-llvm-bc"); + // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S + if (Triple.isAMDGCN() && IsOpenMPDevice && Args.hasArg(options::OPT_S) && + Args.hasArg(options::OPT_emit_llvm)) { +CmdArgs.push_back("-emit-llvm"); + } else { +CmdArgs.push_back("-emit-llvm-bc"); + } } else if (JA.getType() == types::TY_IFS || JA.getType() == types::TY_IFS_CPP) { StringRef ArgStr = diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c index 5f2bdff549607..cee9797af281a 100644 --- a/clang/test/Driver/amdgpu-openmp-toolchain.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -6,7 +6,7 @@ // verify the tools invocations // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}} // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}} -// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}} +// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}} // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o" // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o" @@ -71,3 +71,6 @@ // CHECK-C: "x86_64-unknown-linux-gnu" - "clang" // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as" // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR +// CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] fcf03e7 - [OpenMP] Add OpenMP offloading toolchain for AMDGPU
Author: Pushpinder Singh Date: 2021-02-03T00:42:52-05:00 New Revision: fcf03e728007fa4aa1f64985a3a9bf91fd476d78 URL: https://github.com/llvm/llvm-project/commit/fcf03e728007fa4aa1f64985a3a9bf91fd476d78 DIFF: https://github.com/llvm/llvm-project/commit/fcf03e728007fa4aa1f64985a3a9bf91fd476d78.diff LOG: [OpenMP] Add OpenMP offloading toolchain for AMDGPU This patch adds AMDGPUOpenMPToolChain for supporting OpenMP offloading to AMD GPU's. Originally authored by Greg Rodgers Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D94961 Added: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.h clang/test/Driver/amdgpu-openmp-toolchain.c Modified: clang/lib/Driver/CMakeLists.txt clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/HIP.h Removed: diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index aeffcf0bb43a..7542daf3b8f7 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -36,6 +36,7 @@ add_clang_library(clangDriver ToolChains/AIX.cpp ToolChains/Ananas.cpp ToolChains/AMDGPU.cpp + ToolChains/AMDGPUOpenMP.cpp ToolChains/AVR.cpp ToolChains/BareMetal.cpp ToolChains/Clang.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 418e1d3e8ec9..21e602436549 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -10,6 +10,7 @@ #include "InputInfo.h" #include "ToolChains/AIX.h" #include "ToolChains/AMDGPU.h" +#include "ToolChains/AMDGPUOpenMP.h" #include "ToolChains/AVR.h" #include "ToolChains/Ananas.h" #include "ToolChains/BareMetal.h" @@ -739,18 +740,27 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, Diag(clang::diag::err_drv_invalid_omp_target) << Val; else { const ToolChain *TC; -// CUDA toolchains have to be selected diff erently. They pair host +// Device toolchains have to be selected diff erently. They pair host // and device in their implementation. -if (TT.isNVPTX()) { +if (TT.isNVPTX() || TT.isAMDGCN()) { const ToolChain *HostTC = C.getSingleOffloadToolChain(); assert(HostTC && "Host toolchain should be always defined."); - auto &CudaTC = + auto &DeviceTC = ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()]; - if (!CudaTC) -CudaTC = std::make_unique( -*this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP); - TC = CudaTC.get(); + if (!DeviceTC) { +if (TT.isNVPTX()) + DeviceTC = std::make_unique( + *this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP); +else if (TT.isAMDGCN()) + DeviceTC = + std::make_unique( + *this, TT, *HostTC, C.getInputArgs()); +else + assert(DeviceTC && "Device toolchain not defined."); + } + + TC = DeviceTC.get(); } else TC = &getToolChain(C.getInputArgs(), TT); C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP); diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 55ef6e01967e..1aa0849ee922 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -64,6 +64,13 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { bool IsIntegratedAssemblerDefault() const override { return true; } bool IsMathErrnoDefault() const override { return false; } + bool useIntegratedAs() const override { return true; } + bool isCrossCompiling() const override { return true; } + bool isPICDefault() const override { return false; } + bool isPIEDefault() const override { return false; } + bool isPICDefaultForced() const override { return false; } + bool SupportsProfiling() const override { return false; } + llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp new file mode 100644 index ..43b07360625f --- /dev/null +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -0,0 +1,262 @@ +//===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//
[clang] 79401b4 - [OpenMP][AMDGPU] Add support for linking libomptarget bitcode
Author: Pushpinder Singh Date: 2021-02-12T00:42:41-05:00 New Revision: 79401b43ce4e3aa856c50e78b38327e3ff4ae9eb URL: https://github.com/llvm/llvm-project/commit/79401b43ce4e3aa856c50e78b38327e3ff4ae9eb DIFF: https://github.com/llvm/llvm-project/commit/79401b43ce4e3aa856c50e78b38327e3ff4ae9eb.diff LOG: [OpenMP][AMDGPU] Add support for linking libomptarget bitcode This patch uses the existing logic of CUDA for searching libomptarget and extracts it to a common method. Reviewed By: JonChesterfield, tianshilei1992 Differential Revision: https://reviews.llvm.org/D96248 Added: clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx803.bc clang/test/Driver/Inputs/hip_dev_lib/libomptarget-amdgcn-gfx906.bc Modified: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/lib/Driver/ToolChains/CommonArgs.cpp clang/lib/Driver/ToolChains/CommonArgs.h clang/lib/Driver/ToolChains/Cuda.cpp clang/test/Driver/amdgpu-openmp-toolchain.c Removed: diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 00a2cae52e9b..6a37a15c6564 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -261,7 +261,7 @@ def err_drv_omp_host_target_not_supported : Error< def err_drv_expecting_fopenmp_with_fopenmp_targets : Error< "The option -fopenmp-targets must be used in conjunction with a -fopenmp option compatible with offloading, please use -fopenmp=libomp or -fopenmp=libiomp5.">; def err_drv_omp_offload_target_missingbcruntime : Error< - "No library '%0' found in the default clang lib directory or in LIBRARY_PATH. Please use --libomptarget-nvptx-bc-path to specify nvptx bitcode library.">; + "No library '%0' found in the default clang lib directory or in LIBRARY_PATH. Please use --libomptarget-%1-bc-path to specify %1 bitcode library.">; def err_drv_omp_offload_target_bcruntime_not_found : Error<"Bitcode library '%0' does not exist.">; def warn_drv_omp_offload_target_duplicate : Warning< "The OpenMP offloading target '%0' is similar to target '%1' already specified - will be ignored.">, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e2a955ea0243..bcb9916a4abd 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -942,6 +942,8 @@ def fuse_cuid_EQ : Joined<["-"], "fuse-cuid=">, "file path and command line options) | 'random' (ID's generated as " "random numbers) | 'none' (disabled). Default is 'hash'. This option " "will be overriden by option '-cuid=[ID]' if it is specified." >; +def libomptarget_amdgcn_bc_path_EQ : Joined<["--"], "libomptarget-amdgcn-bc-path=">, Group, + HelpText<"Path to libomptarget-amdgcn bitcode library">; def libomptarget_nvptx_bc_path_EQ : Joined<["--"], "libomptarget-nvptx-bc-path=">, Group, HelpText<"Path to libomptarget-nvptx bitcode library">; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 43b07360625f..53d25c05b623 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -190,6 +190,12 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); CC1Args.push_back("-fcuda-is-device"); CC1Args.push_back("-emit-llvm-bc"); + + if (DriverArgs.hasArg(options::OPT_nogpulib)) +return; + std::string BitcodeSuffix = "amdgcn-" + GpuArch.str(); + addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, + getTriple()); } llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index bcaea71dca94..1cbf5e5dfb4e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1627,3 +1627,63 @@ void tools::addMachineOutlinerArgs(const Driver &D, } } } + +void tools::addOpenMPDeviceRTL(const Driver &D, + const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, + StringRef BitcodeSuffix, + const llvm::Triple &Triple) { + SmallVector LibraryPaths; + // Add user defined library paths from LIBRARY_PATH. + llvm::Optional LibPath = + llvm::sys::Process::GetEnv("LIBRARY_PATH"); + if (LibPath) { +SmallVector Frags; +const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'}; +llvm::SplitString(*LibPath, Frags, EnvPathSe
[clang] 99951aa - OpenMP: Fix object clobbering issue when using save-temps
Author: Pushpinder Singh Date: 2021-02-25T00:50:51-05:00 New Revision: 99951aa68da3c85ba03edf977cd9b22458aae6ca URL: https://github.com/llvm/llvm-project/commit/99951aa68da3c85ba03edf977cd9b22458aae6ca DIFF: https://github.com/llvm/llvm-project/commit/99951aa68da3c85ba03edf977cd9b22458aae6ca.diff LOG: OpenMP: Fix object clobbering issue when using save-temps There are two preconditions to reproduce the issue, 1. Use -save-temps option 2. Provide the -o option with name equal to the input file name without the file extension. For e.g. clang a.c -o a With the -o specified, the AssembleJobAction after OffloadWrapperJobAction will produce the object file with same name as host code object file. Due to this clash, the OffloadWrapperAction overwrites the initial host object file, which results in lld error. This also fixes the `multiple definition of __dummy.omp_offloading.entry'` issue in D96769 . Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D97273 Added: Modified: clang/lib/Driver/Driver.cpp clang/test/Driver/openmp-offload-gpu.c Removed: diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 566fd63e8478..8c180140ae92 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4675,11 +4675,12 @@ InputInfo Driver::BuildJobsForActionNoCache( /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() && !AtTopLevel); if (isa(JA)) { - OffloadingPrefix += "-wrapper"; if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) BaseInput = FinalOutput->getValue(); else BaseInput = getDefaultImageName(); + BaseInput = + C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper"); } Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel, MultipleArchs, diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index 37de504bfe73..f8f063503a9e 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -310,3 +310,9 @@ // RUN: | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s // OPENMP_NVPTX_WRAPPERS: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" // OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers" + +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: -save-temps -no-canonical-prefixes -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \ +// RUN: | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s + +// SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"] ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 9830f90 - [AMDGPU][OpenMP] Support linking of math libraries
Author: Pushpinder Singh Date: 2021-07-30T13:53:44Z New Revision: 9830f902e4d087ecb1706912b730c046f20600ee URL: https://github.com/llvm/llvm-project/commit/9830f902e4d087ecb1706912b730c046f20600ee DIFF: https://github.com/llvm/llvm-project/commit/9830f902e4d087ecb1706912b730c046f20600ee.diff LOG: [AMDGPU][OpenMP] Support linking of math libraries Math libraries are linked only when -lm is specified. This is because host system could be missing rocm-device-libs. Reviewed By: JonChesterfield, yaxunl Differential Revision: https://reviews.llvm.org/D105981 Added: Modified: clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/lib/Driver/ToolChains/HIP.cpp clang/test/Driver/amdgpu-openmp-toolchain.c Removed: diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index d63c5e12c4af4..4a7413112b55d 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -893,3 +893,38 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const { return true; return false; } + +llvm::SmallVector +ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, + const std::string &GPUArch) const { + auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch); + const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); + + std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); + if (LibDeviceFile.empty()) { +getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch; +return {}; + } + + // If --hip-device-lib is not set, add the default bitcode libraries. + // TODO: There are way too many flags that change this. Do we need to check + // them all? + bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, +options::OPT_fno_gpu_flush_denormals_to_zero, +getDefaultDenormsAreZeroForTarget(Kind)); + bool FiniteOnly = DriverArgs.hasFlag( + options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false); + bool UnsafeMathOpt = + DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, + options::OPT_fno_unsafe_math_optimizations, false); + bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math, +options::OPT_fno_fast_math, false); + bool CorrectSqrt = DriverArgs.hasFlag( + options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, + options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt); + bool Wave64 = isWave64(DriverArgs, Kind); + + return RocmInstallation.getCommonBitcodeLibs( + DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, + FastRelaxedMath, CorrectSqrt); +} \ No newline at end of file diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 50ed3b3ded9a3..a4bcf315ca765 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -136,6 +136,11 @@ class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain { addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override; + + // Returns a list of device library names shared by diff erent languages + llvm::SmallVector + getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, + const std::string &GPUArch) const; }; } // end namespace toolchains diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index fe1d19c2dd676..7b335f33aa824 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -9,12 +9,14 @@ #include "AMDGPUOpenMP.h" #include "AMDGPU.h" #include "CommonArgs.h" +#include "ToolChains/ROCm.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/InputInfo.h" #include "clang/Driver/Options.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" @@ -232,6 +234,27 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, getTriple()); + + if (!DriverArgs.hasArg(options::OPT_l)) +return; + + auto Lm = DriverArgs.getAllArgValues(options::OPT_l); + bool HasLibm = false; + for (auto &Lib : Lm) { +if (Lib == "m") { + HasLibm = true; + break; +} + } + + if
[clang] 12da97e - [OpenMP][AMDGCN] Initial math headers support
Author: Pushpinder Singh Date: 2021-07-30T14:52:41Z New Revision: 12da97ea10a941f0123340831300d09a2121e173 URL: https://github.com/llvm/llvm-project/commit/12da97ea10a941f0123340831300d09a2121e173 DIFF: https://github.com/llvm/llvm-project/commit/12da97ea10a941f0123340831300d09a2121e173.diff LOG: [OpenMP][AMDGCN] Initial math headers support With this patch, OpenMP on AMDGCN will use the math functions provided by ROCm ocml library. Linking device code to the ocml will be done in the next patch. Reviewed By: JonChesterfield, jdoerfert, scchan Differential Revision: https://reviews.llvm.org/D104904 Added: clang/test/Headers/Inputs/include/algorithm clang/test/Headers/Inputs/include/utility clang/test/Headers/amdgcn_openmp_device_math.c Modified: clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Headers/__clang_hip_cmath.h clang/lib/Headers/__clang_hip_math.h clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h clang/lib/Headers/openmp_wrappers/cmath clang/lib/Headers/openmp_wrappers/math.h clang/test/Headers/Inputs/include/cstdlib clang/test/Headers/openmp_device_math_isnan.cpp Removed: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index e13302528cbd1..278ae118563d6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1256,7 +1256,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, // If we are offloading to a target via OpenMP we need to include the // openmp_wrappers folder which contains alternative system headers. if (JA.isDeviceOffloading(Action::OFK_OpenMP) && - getToolChain().getTriple().isNVPTX()){ + (getToolChain().getTriple().isNVPTX() || + getToolChain().getTriple().isAMDGCN())) { if (!Args.hasArg(options::OPT_nobuiltininc)) { // Add openmp_wrappers/* to our system include path. This lets us wrap // standard library headers. diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h index 7342705434e6b..d488db0a94d9d 100644 --- a/clang/lib/Headers/__clang_hip_cmath.h +++ b/clang/lib/Headers/__clang_hip_cmath.h @@ -10,7 +10,7 @@ #ifndef __CLANG_HIP_CMATH_H__ #define __CLANG_HIP_CMATH_H__ -#if !defined(__HIP__) +#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif @@ -25,31 +25,43 @@ #endif // !defined(__HIPCC_RTC__) #pragma push_macro("__DEVICE__") +#pragma push_macro("__CONSTEXPR__") +#ifdef __OPENMP_AMDGCN__ +#define __DEVICE__ static __attribute__((always_inline, nothrow)) +#define __CONSTEXPR__ constexpr +#else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) +#define __CONSTEXPR__ +#endif // __OPENMP_AMDGCN__ // Start with functions that cannot be defined by DEF macros below. #if defined(__cplusplus) -__DEVICE__ double abs(double __x) { return ::fabs(__x); } -__DEVICE__ float abs(float __x) { return ::fabsf(__x); } -__DEVICE__ long long abs(long long __n) { return ::llabs(__n); } -__DEVICE__ long abs(long __n) { return ::labs(__n); } -__DEVICE__ float fma(float __x, float __y, float __z) { +#if defined __OPENMP_AMDGCN__ +__DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); } +__DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); } +__DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); } +#endif +__DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); } +__DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); } +__DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); } +__DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); } +__DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) { return ::fmaf(__x, __y, __z); } #if !defined(__HIPCC_RTC__) // The value returned by fpclassify is platform dependent, therefore it is not // supported by hipRTC. -__DEVICE__ int fpclassify(float __x) { +__DEVICE__ __CONSTEXPR__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } -__DEVICE__ int fpclassify(double __x) { +__DEVICE__ __CONSTEXPR__ int fpclassify(double __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } #endif // !defined(__HIPCC_RTC__) -__DEVICE__ float frexp(float __arg, int *__exp) { +__DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } @@ -71,93 +83,101 @@ __DEVICE__ float frexp(float __arg, int *__exp) { //of the variants inside the inner region and avoid the clash. #pragma omp begin declare variant match(implementation = {vendor(llvm)}) -__DEVICE__ int isin
[clang] 713a5d1 - [OpenMP][AMDGCN] Initial math headers support
Author: Pushpinder Singh Date: 2021-08-02T14:38:52Z New Revision: 713a5d12cde58a5dff90cc3e2d1e67c2a78fe52f URL: https://github.com/llvm/llvm-project/commit/713a5d12cde58a5dff90cc3e2d1e67c2a78fe52f DIFF: https://github.com/llvm/llvm-project/commit/713a5d12cde58a5dff90cc3e2d1e67c2a78fe52f.diff LOG: [OpenMP][AMDGCN] Initial math headers support With this patch, OpenMP on AMDGCN will use the math functions provided by ROCm ocml library. Linking device code to the ocml will be done in the next patch. Reviewed By: JonChesterfield, jdoerfert, scchan Differential Revision: https://reviews.llvm.org/D104904 Added: clang/test/Headers/Inputs/include/algorithm clang/test/Headers/Inputs/include/utility clang/test/Headers/amdgcn_openmp_device_math.c Modified: clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Headers/__clang_hip_cmath.h clang/lib/Headers/__clang_hip_math.h clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h clang/lib/Headers/openmp_wrappers/cmath clang/lib/Headers/openmp_wrappers/math.h clang/test/Headers/Inputs/include/cstdlib clang/test/Headers/openmp_device_math_isnan.cpp Removed: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7042bf5fc9e2a..863e5a17bc47f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1256,7 +1256,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, // If we are offloading to a target via OpenMP we need to include the // openmp_wrappers folder which contains alternative system headers. if (JA.isDeviceOffloading(Action::OFK_OpenMP) && - getToolChain().getTriple().isNVPTX()){ + (getToolChain().getTriple().isNVPTX() || + getToolChain().getTriple().isAMDGCN())) { if (!Args.hasArg(options::OPT_nobuiltininc)) { // Add openmp_wrappers/* to our system include path. This lets us wrap // standard library headers. diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h index 7342705434e6b..d488db0a94d9d 100644 --- a/clang/lib/Headers/__clang_hip_cmath.h +++ b/clang/lib/Headers/__clang_hip_cmath.h @@ -10,7 +10,7 @@ #ifndef __CLANG_HIP_CMATH_H__ #define __CLANG_HIP_CMATH_H__ -#if !defined(__HIP__) +#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif @@ -25,31 +25,43 @@ #endif // !defined(__HIPCC_RTC__) #pragma push_macro("__DEVICE__") +#pragma push_macro("__CONSTEXPR__") +#ifdef __OPENMP_AMDGCN__ +#define __DEVICE__ static __attribute__((always_inline, nothrow)) +#define __CONSTEXPR__ constexpr +#else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) +#define __CONSTEXPR__ +#endif // __OPENMP_AMDGCN__ // Start with functions that cannot be defined by DEF macros below. #if defined(__cplusplus) -__DEVICE__ double abs(double __x) { return ::fabs(__x); } -__DEVICE__ float abs(float __x) { return ::fabsf(__x); } -__DEVICE__ long long abs(long long __n) { return ::llabs(__n); } -__DEVICE__ long abs(long __n) { return ::labs(__n); } -__DEVICE__ float fma(float __x, float __y, float __z) { +#if defined __OPENMP_AMDGCN__ +__DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); } +__DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); } +__DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); } +#endif +__DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); } +__DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); } +__DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); } +__DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); } +__DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) { return ::fmaf(__x, __y, __z); } #if !defined(__HIPCC_RTC__) // The value returned by fpclassify is platform dependent, therefore it is not // supported by hipRTC. -__DEVICE__ int fpclassify(float __x) { +__DEVICE__ __CONSTEXPR__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } -__DEVICE__ int fpclassify(double __x) { +__DEVICE__ __CONSTEXPR__ int fpclassify(double __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } #endif // !defined(__HIPCC_RTC__) -__DEVICE__ float frexp(float __arg, int *__exp) { +__DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } @@ -71,93 +83,101 @@ __DEVICE__ float frexp(float __arg, int *__exp) { //of the variants inside the inner region and avoid the clash. #pragma omp begin declare variant match(implementation = {vendor(llvm)}) -__DEVICE__ int isin
[clang] f3eb5f9 - [AMDGPU][OpenMP] Wrap amdgcn declare variant inside ifdef
Author: Pushpinder Singh Date: 2021-08-04T15:24:46Z New Revision: f3eb5f900d2ae6c8e1c03d1b250415a7b7aa39b1 URL: https://github.com/llvm/llvm-project/commit/f3eb5f900d2ae6c8e1c03d1b250415a7b7aa39b1 DIFF: https://github.com/llvm/llvm-project/commit/f3eb5f900d2ae6c8e1c03d1b250415a7b7aa39b1.diff LOG: [AMDGPU][OpenMP] Wrap amdgcn declare variant inside ifdef This fixes the issue https://bugs.llvm.org/show_bug.cgi?id=51337 Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D107468 Added: Modified: clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h Removed: diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h index 99cf2483e7343..279fb26fbaf78 100644 --- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h +++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h @@ -35,6 +35,7 @@ extern "C" { #pragma omp end declare variant +#ifdef __AMDGCN__ #pragma omp begin declare variant match(device = {arch(amdgcn)}) // Import types which will be used by __clang_hip_libdevice_declares.h @@ -54,6 +55,7 @@ extern "C" { #undef __OPENMP_AMDGCN__ #pragma omp end declare variant +#endif #ifdef __cplusplus } // extern "C" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 60e07a9 - [AMDGPU][OpenMP] Use llvm-link to link ocml libraries
Author: Pushpinder Singh Date: 2021-08-13T13:36:57+05:30 New Revision: 60e07a9568625a196f1ed8ed9e502c8c4d56da7f URL: https://github.com/llvm/llvm-project/commit/60e07a9568625a196f1ed8ed9e502c8c4d56da7f DIFF: https://github.com/llvm/llvm-project/commit/60e07a9568625a196f1ed8ed9e502c8c4d56da7f.diff LOG: [AMDGPU][OpenMP] Use llvm-link to link ocml libraries This fixes the 'unused linker option: -lm' warning when compiling program with -c. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D107952 Added: Modified: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.h clang/test/Driver/amdgpu-openmp-toolchain.c Removed: diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 7b335f33aa82..135e3694434d 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -86,14 +86,34 @@ static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC, } // namespace const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( -Compilation &C, const JobAction &JA, const InputInfoList &Inputs, -const ArgList &Args, StringRef SubArchName, -StringRef OutputFilePrefix) const { +const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C, +const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, +StringRef SubArchName, StringRef OutputFilePrefix) const { ArgStringList CmdArgs; for (const auto &II : Inputs) if (II.isFilename()) CmdArgs.push_back(II.getFilename()); + + if (Args.hasArg(options::OPT_l)) { +auto Lm = Args.getAllArgValues(options::OPT_l); +bool HasLibm = false; +for (auto &Lib : Lm) { + if (Lib == "m") { +HasLibm = true; +break; + } +} + +if (HasLibm) { + SmallVector BCLibs = + AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str()); + llvm::for_each(BCLibs, [&](StringRef BCFile) { +CmdArgs.push_back(Args.MakeArgString(BCFile)); + }); +} + } + // Add an intermediate output file. CmdArgs.push_back("-o"); const char *OutputFileName = @@ -182,8 +202,8 @@ void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, assert(Prefix.length() && "no linker inputs are files "); // Each command outputs diff erent files. - const char *LLVMLinkCommand = - constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix); + const char *LLVMLinkCommand = constructLLVMLinkCommand( + AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix); // Produce readable assembly if save-temps is enabled. if (C.getDriver().isSaveTempsEnabled()) @@ -234,27 +254,6 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, getTriple()); - - if (!DriverArgs.hasArg(options::OPT_l)) -return; - - auto Lm = DriverArgs.getAllArgValues(options::OPT_l); - bool HasLibm = false; - for (auto &Lib : Lm) { -if (Lib == "m") { - HasLibm = true; - break; -} - } - - if (HasLibm) { -SmallVector BCLibs = -getCommonDeviceLibNames(DriverArgs, GPUArch); -llvm::for_each(BCLibs, [&](StringRef BCFile) { - CC1Args.push_back("-mlink-builtin-bitcode"); - CC1Args.push_back(DriverArgs.MakeArgString(BCFile)); -}); - } } llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h index effca7e212cc..233256bf7378 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h @@ -16,6 +16,10 @@ namespace clang { namespace driver { +namespace toolchains { +class AMDGPUOpenMPToolChain; +} + namespace tools { namespace AMDGCN { @@ -35,11 +39,11 @@ class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool { private: /// \return llvm-link output file name. - const char *constructLLVMLinkCommand(Compilation &C, const JobAction &JA, - const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, - llvm::StringRef SubArchName, - llvm::StringRef OutputFilePrefix) const; + const char *constructLLVMLinkCommand( + const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C, + const JobAction &JA, const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, + llvm::StringRef OutputFilePrefix) const; /// \return llc output file name. const char *constructLlcCommand(Compilation &C, const JobAction &JA, diff --git a/clang/test/Driver/amdgpu-openmp-toolc
[clang] 9360430 - Revert "Reapply "[AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed""
Author: Pushpinder Singh Date: 2021-04-27T02:23:44Z New Revision: 93604305bb72201641f31cc50a6e7b2fe65d3af3 URL: https://github.com/llvm/llvm-project/commit/93604305bb72201641f31cc50a6e7b2fe65d3af3 DIFF: https://github.com/llvm/llvm-project/commit/93604305bb72201641f31cc50a6e7b2fe65d3af3.diff LOG: Revert "Reapply "[AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed"" This reverts commit 15be0c41d2e59fb4599c9aebf21ede498c61f51d. Added: Modified: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/tools/CMakeLists.txt Removed: clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906 clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 clang/test/Driver/amdgpu-openmp-system-arch-fail.c clang/test/Driver/amdgpu-openmp-system-arch.c clang/tools/amdgpu-arch/AMDGPUArch.cpp clang/tools/amdgpu-arch/CMakeLists.txt diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index a2ffe1378cb6d..5e580cc4fbb7a 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -67,8 +67,6 @@ def err_drv_no_hip_runtime : Error< "cannot find HIP runtime. Provide its path via --rocm-path, or pass " "-nogpuinc to build without HIP runtime.">; -def err_drv_undetermined_amdgpu_arch : Error< - "Cannot determine AMDGPU architecture: %0. Consider passing it via --march.">; def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4. Use --cuda-path to specify a diff erent CUDA " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index df3049fe40326..04a05207cc74b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -924,8 +924,6 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; -def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, - HelpText<"Tool used for detecting AMD GPU arch in the system.">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 4da1239dce84e..c0b2b78a1b4b2 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,16 +12,9 @@ #include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/FileUtilities.h" -#include "llvm/Support/LineIterator.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" -#include - -#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch" using namespace clang::driver; using namespace clang::driver::tools; @@ -722,78 +715,6 @@ void AMDGPUToolChain::checkTargetID( } } -llvm::Error -AMDGPUToolChain::detectSystemGPUs(const ArgList &Args, - SmallVector &GPUArchs) const { - std::string Program; - if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) -Program = A->getValue(); - else -Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME); - llvm::SmallString<64> OutputFile; - llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, - OutputFile); - llvm::FileRemover OutputRemover(OutputFile.c_str()); - llvm::Optional Redirects[] = { - {""}, - StringRef(OutputFile), - {""}, - }; - - std::string ErrorMessage; - if (int Result = llvm::sys::ExecuteAndWait( - Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0, - /*MemoryLimit*/ 0, &ErrorMessage)) { -if (Result > 0) { - ErrorMessage = "Exited with error code " + std::to_string(Result); -} else if (Result == -1) { - ErrorMessage = "Execute failed: " + ErrorMessage; -} else { - ErrorMessage = "Crashed: " + ErrorMessage; -} - -return llvm::createStringError(std::error_code(), - Program +
[clang] 59ad4e0 - Reapply "[AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed"
Author: Pushpinder Singh Date: 2021-04-27T10:47:05Z New Revision: 59ad4e0f01a8402016a690b3915bdd083285561e URL: https://github.com/llvm/llvm-project/commit/59ad4e0f01a8402016a690b3915bdd083285561e DIFF: https://github.com/llvm/llvm-project/commit/59ad4e0f01a8402016a690b3915bdd083285561e.diff LOG: Reapply "[AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed" This reverts commit 93604305bb72201641f31cc50a6e7b2fe65d3af3. Added: clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906 clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 clang/test/Driver/amdgpu-openmp-system-arch-fail.c clang/test/Driver/amdgpu-openmp-system-arch.c clang/tools/amdgpu-arch/AMDGPUArch.cpp clang/tools/amdgpu-arch/CMakeLists.txt Modified: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/tools/CMakeLists.txt Removed: diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 5e580cc4fbb7a..a2ffe1378cb6d 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -67,6 +67,8 @@ def err_drv_no_hip_runtime : Error< "cannot find HIP runtime. Provide its path via --rocm-path, or pass " "-nogpuinc to build without HIP runtime.">; +def err_drv_undetermined_amdgpu_arch : Error< + "Cannot determine AMDGPU architecture: %0. Consider passing it via --march.">; def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4. Use --cuda-path to specify a diff erent CUDA " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 04a05207cc74b..df3049fe40326 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -924,6 +924,8 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; +def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, + HelpText<"Tool used for detecting AMD GPU arch in the system.">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index c0b2b78a1b4b2..4da1239dce84e 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,9 +12,16 @@ #include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/LineIterator.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" +#include + +#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch" using namespace clang::driver; using namespace clang::driver::tools; @@ -715,6 +722,78 @@ void AMDGPUToolChain::checkTargetID( } } +llvm::Error +AMDGPUToolChain::detectSystemGPUs(const ArgList &Args, + SmallVector &GPUArchs) const { + std::string Program; + if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) +Program = A->getValue(); + else +Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME); + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, + OutputFile); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + llvm::Optional Redirects[] = { + {""}, + StringRef(OutputFile), + {""}, + }; + + std::string ErrorMessage; + if (int Result = llvm::sys::ExecuteAndWait( + Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0, + /*MemoryLimit*/ 0, &ErrorMessage)) { +if (Result > 0) { + ErrorMessage = "Exited with error code " + std::to_string(Result); +} else if (Result == -1) { + ErrorMessage = "Execute failed: " + ErrorMessage; +} else { + ErrorMessage = "Crashed: " + ErrorMessage; +} + +return llvm::createStringError(std::error_code(), + Program + ": " + Er
[clang] 1f5cacf - [AMDGPU][OpenMP] Fix clang driver crash when provided -c
Author: Pushpinder Singh Date: 2021-05-05T14:26:58Z New Revision: 1f5cacfcb845fd4163dec5a8c7991934c53d6cb3 URL: https://github.com/llvm/llvm-project/commit/1f5cacfcb845fd4163dec5a8c7991934c53d6cb3 DIFF: https://github.com/llvm/llvm-project/commit/1f5cacfcb845fd4163dec5a8c7991934c53d6cb3.diff LOG: [AMDGPU][OpenMP] Fix clang driver crash when provided -c The offload action is used in four different ways as explained in Driver.cpp:4495. When -c is present, the final phase will be assemble (linker when -c is not present). However, this phase is skipped according to D96769 for amdgcn. So, offload action arrives into following situation, compile (device) ---> offload ---> offload without -c the chain looks like, compile (device) ---> offload ---> linker (device) ---> offload The former situation creates an unhandled case which causes problem. The solution presented in this patch delays the D96769 logic until job creation time. This keeps the offload action in the 1 of the 4 specified situations. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D101901 Added: Modified: clang/lib/Driver/Driver.cpp clang/test/Driver/amdgpu-openmp-toolchain.c Removed: diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 5008b506a883b..1086887a8de56 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3100,16 +3100,8 @@ class OffloadingActionBuilder final { } // By default, we produce an action for each device arch. - for (unsigned I = 0; I < ToolChains.size(); ++I) { -Action *&A = OpenMPDeviceActions[I]; -// AMDGPU does not support linking of object files, so we skip -// assemble and backend actions to produce LLVM IR. -if (ToolChains[I]->getTriple().isAMDGCN() && -(CurPhase == phases::Assemble || CurPhase == phases::Backend)) - continue; - + for (Action *&A : OpenMPDeviceActions) A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A); - } return ABRT_Success; } @@ -4594,6 +4586,25 @@ InputInfo Driver::BuildJobsForActionNoCache( if (!T) return InputInfo(); + if (BuildingForOffloadDevice && + A->getOffloadingDeviceKind() == Action::OFK_OpenMP) { +if (TC->getTriple().isAMDGCN()) { + // AMDGCN treats backend and assemble actions as no-op because + // linker does not support object files. + if (const BackendJobAction *BA = dyn_cast(A)) { +return BuildJobsForAction(C, *BA->input_begin(), TC, BoundArch, + AtTopLevel, MultipleArchs, LinkingOutput, + CachedResults, TargetDeviceOffloadKind); + } + + if (const AssembleJobAction *AA = dyn_cast(A)) { +return BuildJobsForAction(C, *AA->input_begin(), TC, BoundArch, + AtTopLevel, MultipleArchs, LinkingOutput, + CachedResults, TargetDeviceOffloadKind); + } +} + } + // If we've collapsed action list that contained OffloadAction we // need to build jobs for host/device-side inputs it may have held. for (const auto *OA : CollapsedOffloadActions) diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c index f2350a51817a9..5f2bdff549607 100644 --- a/clang/test/Driver/amdgpu-openmp-toolchain.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -26,12 +26,14 @@ // CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp) // CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp) // CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {7}, ir -// CHECK-PHASES: 9: linker, {8}, image, (device-openmp) -// CHECK-PHASES: 10: offload, "device-openmp (amdgcn-amd-amdhsa)" {9}, image -// CHECK-PHASES: 11: clang-offload-wrapper, {10}, ir, (host-openmp) -// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp) -// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp) -// CHECK-PHASES: 14: linker, {4, 13}, image, (host-openmp) +// CHECK-PHASES: 9: backend, {8}, assembler, (device-openmp) +// CHECK-PHASES: 10: assembler, {9}, object, (device-openmp) +// CHECK-PHASES: 11: linker, {10}, image, (device-openmp) +// CHECK-PHASES: 12: offload, "device-openmp (amdgcn-amd-amdhsa)" {11}, image +// CHECK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp) +// CHECK-PHASES: 14: backend, {13}, assembler, (host-openmp) +// CHECK-PHASES: 15: assembler, {14}, object, (host-openmp) +// CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp) // handling of --libomptarget-amdgcn-bc-path // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgcn-bc-pa
[clang] c711aa0 - [amdgpu-arch] Guard hsa.h with __has_include
Author: Pushpinder Singh Date: 2021-05-10T07:33:30Z New Revision: c711aa0f6f9d9400fbe619c7f0d6d4aa723b3a64 URL: https://github.com/llvm/llvm-project/commit/c711aa0f6f9d9400fbe619c7f0d6d4aa723b3a64 DIFF: https://github.com/llvm/llvm-project/commit/c711aa0f6f9d9400fbe619c7f0d6d4aa723b3a64.diff LOG: [amdgpu-arch] Guard hsa.h with __has_include This patch is suppose to fix the issue of hsa.h not found. Issue was reported in D99949 Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D102067 Added: Modified: clang/tools/amdgpu-arch/AMDGPUArch.cpp Removed: diff --git a/clang/tools/amdgpu-arch/AMDGPUArch.cpp b/clang/tools/amdgpu-arch/AMDGPUArch.cpp index 29f9c8bc23974..4fae78b4f121c 100644 --- a/clang/tools/amdgpu-arch/AMDGPUArch.cpp +++ b/clang/tools/amdgpu-arch/AMDGPUArch.cpp @@ -11,7 +11,24 @@ // //===--===// -#include +#if defined(__has_include) +#if __has_include("hsa.h") +#define HSA_HEADER_FOUND 1 +#include "hsa.h" +#elif __has_include("hsa/hsa.h") +#define HSA_HEADER_FOUND 1 +#include "hsa/hsa.h" +#else +#define HSA_HEADER_FOUND 0 +#endif +#else +#define HSA_HEADER_FOUND 0 +#endif + +#if !HSA_HEADER_FOUND +int main() { return 1; } +#else + #include #include @@ -57,3 +74,5 @@ int main() { hsa_shut_down(); return 0; } + +#endif ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 7f78e40 - [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S
Author: Pushpinder Singh Date: 2021-05-10T07:54:23Z New Revision: 7f78e409d0280c62209e1a7dc8c6d1409acc9184 URL: https://github.com/llvm/llvm-project/commit/7f78e409d0280c62209e1a7dc8c6d1409acc9184 DIFF: https://github.com/llvm/llvm-project/commit/7f78e409d0280c62209e1a7dc8c6d1409acc9184.diff LOG: [AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S Previously clang would print a binary blob into the bundled file for amdgcn. With this patch, it will instead print textual IR as expected. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D102065 Added: Modified: clang/lib/Driver/ToolChains/Clang.cpp clang/test/Driver/amdgpu-openmp-toolchain.c Removed: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2b3934fc25418..97a92e69419fa 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4394,7 +4394,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-emit-llvm"); } else if (JA.getType() == types::TY_LLVM_BC || JA.getType() == types::TY_LTO_BC) { - CmdArgs.push_back("-emit-llvm-bc"); + // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S + if (Triple.isAMDGCN() && IsOpenMPDevice) { +if (Args.hasArg(options::OPT_S) && Args.hasArg(options::OPT_emit_llvm)) + CmdArgs.push_back("-emit-llvm"); + } else { +CmdArgs.push_back("-emit-llvm-bc"); + } } else if (JA.getType() == types::TY_IFS || JA.getType() == types::TY_IFS_CPP) { StringRef ArgStr = diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c index 5f2bdff549607..12067c4c0739d 100644 --- a/clang/test/Driver/amdgpu-openmp-toolchain.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -71,3 +71,6 @@ // CHECK-C: "x86_64-unknown-linux-gnu" - "clang" // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as" // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR +// CHECK-EMIT-LLVM-IR: {{.*}}clang-13" "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] eca3d68 - Revert "[AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S"
Author: Pushpinder Singh Date: 2021-05-11T10:07:13-05:00 New Revision: eca3d68399246765bc6e8c94ffb4d5927b1add12 URL: https://github.com/llvm/llvm-project/commit/eca3d68399246765bc6e8c94ffb4d5927b1add12 DIFF: https://github.com/llvm/llvm-project/commit/eca3d68399246765bc6e8c94ffb4d5927b1add12.diff LOG: Revert "[AMDGPU][OpenMP] Emit textual IR for -emit-llvm -S" This reverts commit 7f78e409d0280c62209e1a7dc8c6d1409acc9184. Added: Modified: clang/lib/Driver/ToolChains/Clang.cpp clang/test/Driver/amdgpu-openmp-toolchain.c Removed: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c4ea67ea93660..0cad688873b7c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4394,13 +4394,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-emit-llvm"); } else if (JA.getType() == types::TY_LLVM_BC || JA.getType() == types::TY_LTO_BC) { - // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S - if (Triple.isAMDGCN() && IsOpenMPDevice) { -if (Args.hasArg(options::OPT_S) && Args.hasArg(options::OPT_emit_llvm)) - CmdArgs.push_back("-emit-llvm"); - } else { -CmdArgs.push_back("-emit-llvm-bc"); - } + CmdArgs.push_back("-emit-llvm-bc"); } else if (JA.getType() == types::TY_IFS || JA.getType() == types::TY_IFS_CPP) { StringRef ArgStr = diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c index e4b89dcedf01f..5f2bdff549607 100644 --- a/clang/test/Driver/amdgpu-openmp-toolchain.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -71,6 +71,3 @@ // CHECK-C: "x86_64-unknown-linux-gnu" - "clang" // CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as" // CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler" - -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR -// CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] fc12a64 - [OpenMP][AMDGPU] Skip backend and assemble phases for amdgcn
Author: Pushpinder Singh Date: 2021-03-16T04:58:14Z New Revision: fc12a64ecc71bf6789b3ca67d3cda77acc8785f9 URL: https://github.com/llvm/llvm-project/commit/fc12a64ecc71bf6789b3ca67d3cda77acc8785f9 DIFF: https://github.com/llvm/llvm-project/commit/fc12a64ecc71bf6789b3ca67d3cda77acc8785f9.diff LOG: [OpenMP][AMDGPU] Skip backend and assemble phases for amdgcn Remove emit-llvm-bc from addClangTargetOptions as it conflicts with -E for save-temps. AMDGCN does not yet support linking object files so backend and assemble actions are skipped, leaving LLVM IR as the output format. Reviewed By: JonChesterfield, ronlieb Differential Revision: https://reviews.llvm.org/D96769 Added: Modified: clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/test/Driver/amdgpu-openmp-toolchain.c Removed: diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ceaeb16df6f1..dbd365e7c9bc 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3086,8 +3086,16 @@ class OffloadingActionBuilder final { } // By default, we produce an action for each device arch. - for (Action *&A : OpenMPDeviceActions) + for (unsigned I = 0; I < ToolChains.size(); ++I) { +Action *&A = OpenMPDeviceActions[I]; +// AMDGPU does not support linking of object files, so we skip +// assemble and backend actions to produce LLVM IR. +if (ToolChains[I]->getTriple().isAMDGCN() && +(CurPhase == phases::Assemble || CurPhase == phases::Backend)) + continue; + A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A); + } return ABRT_Success; } diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 53d25c05b623..38abd2f48368 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -161,6 +161,11 @@ void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, // Each command outputs diff erent files. const char *LLVMLinkCommand = constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix); + + // Produce readable assembly if save-temps is enabled. + if (C.getDriver().isSaveTempsEnabled()) +constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand, +/*OutputIsAsm=*/true); const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand); constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); @@ -189,7 +194,6 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( CC1Args.push_back("-target-cpu"); CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); CC1Args.push_back("-fcuda-is-device"); - CC1Args.push_back("-emit-llvm-bc"); if (DriverArgs.hasArg(options::OPT_nogpulib)) return; diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c index 0645177f00e2..f2350a51817a 100644 --- a/clang/test/Driver/amdgpu-openmp-toolchain.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -1,3 +1,4 @@ +// REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target // RUN: env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \ // RUN: | FileCheck %s @@ -5,7 +6,7 @@ // verify the tools invocations // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"{{.*}} // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"{{.*}} -// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-emit-llvm-bc" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}} +// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgcn-gfx906.bc"{{.*}} // CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" // CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o" // CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o" @@ -25,18 +26,35 @@ // CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp) // CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp) // CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {7}, ir -// CHECK-PHASES: 9: backend, {8}, assembler, (
[clang] 7029cff - [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed
Author: Pushpinder Singh Date: 2021-04-16T05:26:20Z New Revision: 7029cffc4e78556cfe820791c612968bb15b2ffb URL: https://github.com/llvm/llvm-project/commit/7029cffc4e78556cfe820791c612968bb15b2ffb DIFF: https://github.com/llvm/llvm-project/commit/7029cffc4e78556cfe820791c612968bb15b2ffb.diff LOG: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed This patch adds new clang tool named amdgpu-arch which uses HSA to detect installed AMDGPU and report back latter's march. This tool is built only if system has HSA installed. The value printed by amdgpu-arch is used to fill -march when latter is not explicitly provided in -Xopenmp-target. Reviewed By: JonChesterfield, gregrodgers Differential Revision: https://reviews.llvm.org/D99949 Added: clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906 clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 clang/test/Driver/amdgpu-openmp-system-arch-fail.c clang/test/Driver/amdgpu-openmp-system-arch.c clang/tools/amdgpu-arch/AMDGPUArch.cpp clang/tools/amdgpu-arch/CMakeLists.txt Modified: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/tools/CMakeLists.txt Removed: diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 5e580cc4fbb7a..aa3b00c231cbe 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -67,6 +67,8 @@ def err_drv_no_hip_runtime : Error< "cannot find HIP runtime. Provide its path via --rocm-path, or pass " "-nogpuinc to build without HIP runtime.">; +def err_drv_undetermined_amdgpu_arch : Error< + "Cannot determine AMDGPU architecture. Consider passing it via -march">; def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4. Use --cuda-path to specify a diff erent CUDA " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9e15712eb2d51..5fbcd64b69376 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -918,6 +918,8 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; +def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, + HelpText<"Tool used for detecting AMD GPU arch in the system.">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index dc9c9751c851d..37da2c05dcf67 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,10 +12,15 @@ #include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/LineIterator.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" +#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch" + using namespace clang::driver; using namespace clang::driver::tools; using namespace clang::driver::toolchains; @@ -715,6 +720,57 @@ void AMDGPUToolChain::checkTargetID( } } +llvm::SmallVector, 1> +AMDGPUToolChain::detectSystemGPUs(const ArgList &Args) const { + std::string Program; + if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) +Program = A->getValue(); + else +Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME); + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, + OutputFile); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + llvm::Optional Redirects[] = { + {""}, + StringRef(OutputFile), + {""}, + }; + + if (llvm::sys::ExecuteAndWait(Program.c_str(), {}, {}, Redirects)) { +return {}; + } + + llvm::ErrorOr> OutputBuf = + llvm::MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) +return {}; + + llvm::SmallVector, 1> GPUArchs; + for (llvm::line_iterator LineIt(**OutputBuf)
[clang] efc013e - Revert "[AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed"
Author: Pushpinder Singh Date: 2021-04-16T09:16:58Z New Revision: efc013ec4d950a68e6f80dd98cda35e1a96a6fc8 URL: https://github.com/llvm/llvm-project/commit/efc013ec4d950a68e6f80dd98cda35e1a96a6fc8 DIFF: https://github.com/llvm/llvm-project/commit/efc013ec4d950a68e6f80dd98cda35e1a96a6fc8.diff LOG: Revert "[AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed" This reverts commit 7029cffc4e78556cfe820791c612968bb15b2ffb. Added: Modified: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/tools/CMakeLists.txt Removed: clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906 clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 clang/test/Driver/amdgpu-openmp-system-arch-fail.c clang/test/Driver/amdgpu-openmp-system-arch.c clang/tools/amdgpu-arch/AMDGPUArch.cpp clang/tools/amdgpu-arch/CMakeLists.txt diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index aa3b00c231cbe..5e580cc4fbb7a 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -67,8 +67,6 @@ def err_drv_no_hip_runtime : Error< "cannot find HIP runtime. Provide its path via --rocm-path, or pass " "-nogpuinc to build without HIP runtime.">; -def err_drv_undetermined_amdgpu_arch : Error< - "Cannot determine AMDGPU architecture. Consider passing it via -march">; def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4. Use --cuda-path to specify a diff erent CUDA " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5fbcd64b69376..9e15712eb2d51 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -918,8 +918,6 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; -def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, - HelpText<"Tool used for detecting AMD GPU arch in the system.">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 37da2c05dcf67..dc9c9751c851d 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,15 +12,10 @@ #include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" -#include "llvm/Support/FileUtilities.h" -#include "llvm/Support/LineIterator.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" -#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch" - using namespace clang::driver; using namespace clang::driver::tools; using namespace clang::driver::toolchains; @@ -720,57 +715,6 @@ void AMDGPUToolChain::checkTargetID( } } -llvm::SmallVector, 1> -AMDGPUToolChain::detectSystemGPUs(const ArgList &Args) const { - std::string Program; - if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) -Program = A->getValue(); - else -Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME); - llvm::SmallString<64> OutputFile; - llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, - OutputFile); - llvm::FileRemover OutputRemover(OutputFile.c_str()); - llvm::Optional Redirects[] = { - {""}, - StringRef(OutputFile), - {""}, - }; - - if (llvm::sys::ExecuteAndWait(Program.c_str(), {}, {}, Redirects)) { -return {}; - } - - llvm::ErrorOr> OutputBuf = - llvm::MemoryBuffer::getFile(OutputFile.c_str()); - if (!OutputBuf) -return {}; - - llvm::SmallVector, 1> GPUArchs; - for (llvm::line_iterator LineIt(**OutputBuf); !LineIt.is_at_end(); ++LineIt) { -GPUArchs.push_back(*LineIt); - } - return GPUArchs; -} - -SmallString<8> AMDGPUToolChain::getSystemGPUArch(const ArgList &Args) const { - // detect the AMDGPU installed in system - auto GPUArchs = detectSystemGPUs(Args); - if (GPUArchs.empty()) { -return SmallString<8>(
[clang] 3194761 - [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed
Author: Pushpinder Singh Date: 2021-04-21T05:05:49Z New Revision: 3194761d2763a471dc6426a3e77c1445cb9ded3b URL: https://github.com/llvm/llvm-project/commit/3194761d2763a471dc6426a3e77c1445cb9ded3b DIFF: https://github.com/llvm/llvm-project/commit/3194761d2763a471dc6426a3e77c1445cb9ded3b.diff LOG: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed This patch adds new clang tool named amdgpu-arch which uses HSA to detect installed AMDGPU and report back latter's march. This tool is built only if system has HSA installed. The value printed by amdgpu-arch is used to fill -march when latter is not explicitly provided in -Xopenmp-target. Reviewed By: JonChesterfield, gregrodgers Differential Revision: https://reviews.llvm.org/D99949 Added: clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906 clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 clang/test/Driver/amdgpu-openmp-system-arch-fail.c clang/test/Driver/amdgpu-openmp-system-arch.c clang/tools/amdgpu-arch/AMDGPUArch.cpp clang/tools/amdgpu-arch/CMakeLists.txt Modified: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/tools/CMakeLists.txt Removed: diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 5e580cc4fbb7a..a2ffe1378cb6d 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -67,6 +67,8 @@ def err_drv_no_hip_runtime : Error< "cannot find HIP runtime. Provide its path via --rocm-path, or pass " "-nogpuinc to build without HIP runtime.">; +def err_drv_undetermined_amdgpu_arch : Error< + "Cannot determine AMDGPU architecture: %0. Consider passing it via --march.">; def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4. Use --cuda-path to specify a diff erent CUDA " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f93b8a2496e07..dbdb6c6dab3ac 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -924,6 +924,8 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; +def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, + HelpText<"Tool used for detecting AMD GPU arch in the system.">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index dc9c9751c851d..328753b21f8ea 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,9 +12,16 @@ #include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/LineIterator.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" +#include + +#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch" using namespace clang::driver; using namespace clang::driver::tools; @@ -715,6 +722,78 @@ void AMDGPUToolChain::checkTargetID( } } +llvm::Error +AMDGPUToolChain::detectSystemGPUs(const ArgList &Args, + SmallVector &GPUArchs) const { + std::string Program; + if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) +Program = A->getValue(); + else +Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME); + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, + OutputFile); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + llvm::Optional Redirects[] = { + {""}, + StringRef(OutputFile), + {""}, + }; + + std::string ErrorMessage; + if (int Result = llvm::sys::ExecuteAndWait( + Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0, + /*MemoryLimit*/ 0, &ErrorMessage)) { +if (Result > 0) { + ErrorMessage = "Exite
[clang] 0ad50bf - Revert "[AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed"
Author: Pushpinder Singh Date: 2021-04-21T08:05:38Z New Revision: 0ad50bf27f892873427bd372a8e7d2e9b234586d URL: https://github.com/llvm/llvm-project/commit/0ad50bf27f892873427bd372a8e7d2e9b234586d DIFF: https://github.com/llvm/llvm-project/commit/0ad50bf27f892873427bd372a8e7d2e9b234586d.diff LOG: Revert "[AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed" This reverts commit 3194761d2763a471dc6426a3e77c1445cb9ded3b. Added: Modified: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/tools/CMakeLists.txt Removed: clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906 clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 clang/test/Driver/amdgpu-openmp-system-arch-fail.c clang/test/Driver/amdgpu-openmp-system-arch.c clang/tools/amdgpu-arch/AMDGPUArch.cpp clang/tools/amdgpu-arch/CMakeLists.txt diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index a2ffe1378cb6d..5e580cc4fbb7a 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -67,8 +67,6 @@ def err_drv_no_hip_runtime : Error< "cannot find HIP runtime. Provide its path via --rocm-path, or pass " "-nogpuinc to build without HIP runtime.">; -def err_drv_undetermined_amdgpu_arch : Error< - "Cannot determine AMDGPU architecture: %0. Consider passing it via --march.">; def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4. Use --cuda-path to specify a diff erent CUDA " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index dbdb6c6dab3ac..f93b8a2496e07 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -924,8 +924,6 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; -def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, - HelpText<"Tool used for detecting AMD GPU arch in the system.">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 328753b21f8ea..dc9c9751c851d 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,16 +12,9 @@ #include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/FileUtilities.h" -#include "llvm/Support/LineIterator.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" -#include - -#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch" using namespace clang::driver; using namespace clang::driver::tools; @@ -722,78 +715,6 @@ void AMDGPUToolChain::checkTargetID( } } -llvm::Error -AMDGPUToolChain::detectSystemGPUs(const ArgList &Args, - SmallVector &GPUArchs) const { - std::string Program; - if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) -Program = A->getValue(); - else -Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME); - llvm::SmallString<64> OutputFile; - llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, - OutputFile); - llvm::FileRemover OutputRemover(OutputFile.c_str()); - llvm::Optional Redirects[] = { - {""}, - StringRef(OutputFile), - {""}, - }; - - std::string ErrorMessage; - if (int Result = llvm::sys::ExecuteAndWait( - Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0, - /*MemoryLimit*/ 0, &ErrorMessage)) { -if (Result > 0) { - ErrorMessage = "Exited with error code " + std::to_string(Result); -} else if (Result == -1) { - ErrorMessage = "Execute failed: " + ErrorMessage; -} else { - ErrorMessage = "Crashed: " + ErrorMessage; -} - -return llvm::createStringError(std::error_code(), - Program + ": " + Erro
[clang] 722d4d8 - [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed
Author: Pushpinder Singh Date: 2021-04-22T05:20:28Z New Revision: 722d4d8e7585457d407d0639a4ae2610157e06a8 URL: https://github.com/llvm/llvm-project/commit/722d4d8e7585457d407d0639a4ae2610157e06a8 DIFF: https://github.com/llvm/llvm-project/commit/722d4d8e7585457d407d0639a4ae2610157e06a8.diff LOG: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed This patch adds new clang tool named amdgpu-arch which uses HSA to detect installed AMDGPU and report back latter's march. This tool is built only if system has HSA installed. The value printed by amdgpu-arch is used to fill -march when latter is not explicitly provided in -Xopenmp-target. Reviewed By: JonChesterfield, gregrodgers Differential Revision: https://reviews.llvm.org/D99949 Added: clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906 clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 clang/test/Driver/amdgpu-openmp-system-arch-fail.c clang/test/Driver/amdgpu-openmp-system-arch.c clang/tools/amdgpu-arch/AMDGPUArch.cpp clang/tools/amdgpu-arch/CMakeLists.txt Modified: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/tools/CMakeLists.txt Removed: diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 5e580cc4fbb7a..a2ffe1378cb6d 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -67,6 +67,8 @@ def err_drv_no_hip_runtime : Error< "cannot find HIP runtime. Provide its path via --rocm-path, or pass " "-nogpuinc to build without HIP runtime.">; +def err_drv_undetermined_amdgpu_arch : Error< + "Cannot determine AMDGPU architecture: %0. Consider passing it via --march.">; def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4. Use --cuda-path to specify a diff erent CUDA " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 04a05207cc74b..df3049fe40326 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -924,6 +924,8 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; +def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group, + HelpText<"Tool used for detecting AMD GPU arch in the system.">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index dc9c9751c851d..328753b21f8ea 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,9 +12,16 @@ #include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/LineIterator.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" +#include + +#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch" using namespace clang::driver; using namespace clang::driver::tools; @@ -715,6 +722,78 @@ void AMDGPUToolChain::checkTargetID( } } +llvm::Error +AMDGPUToolChain::detectSystemGPUs(const ArgList &Args, + SmallVector &GPUArchs) const { + std::string Program; + if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ)) +Program = A->getValue(); + else +Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME); + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, + OutputFile); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + llvm::Optional Redirects[] = { + {""}, + StringRef(OutputFile), + {""}, + }; + + std::string ErrorMessage; + if (int Result = llvm::sys::ExecuteAndWait( + Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0, + /*MemoryLimit*/ 0, &ErrorMessage)) { +if (Result > 0) { + ErrorMessage = "Exite
[clang] 4909cb1 - [OpenMP][AMDGPU] Use AMDGPU_KERNEL calling convention for entry function
Author: Pushpinder Singh Date: 2021-01-06T02:03:30-05:00 New Revision: 4909cb1a0fe9f2494ccbadc2856b6ddfc70051b5 URL: https://github.com/llvm/llvm-project/commit/4909cb1a0fe9f2494ccbadc2856b6ddfc70051b5 DIFF: https://github.com/llvm/llvm-project/commit/4909cb1a0fe9f2494ccbadc2856b6ddfc70051b5.diff LOG: [OpenMP][AMDGPU] Use AMDGPU_KERNEL calling convention for entry function AMDGPU backend requires entry functions/kernels to have AMDGPU_KERNEL calling convention for proper linking. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D94060 Added: Modified: clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/test/OpenMP/amdgcn_target_codegen.cpp Removed: diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c15f6350b95e..a3b24039365b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6471,6 +6471,8 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); OutlinedFn->setDSOLocal(false); +if (CGM.getTriple().isAMDGCN()) + OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); } else { std::string Name = getName({EntryFnName, "region_id"}); OutlinedFnID = new llvm::GlobalVariable( diff --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp index 416ed06083b0..701211d449ca 100644 --- a/clang/test/OpenMP/amdgcn_target_codegen.cpp +++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -9,7 +9,7 @@ #define N 1000 int test_amdgcn_target_tid_threads() { -// CHECK-LABEL: define weak void @{{.*}}test_amdgcn_target_tid_threads +// CHECK-LABEL: define weak amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads int arr[N]; @@ -25,7 +25,7 @@ int test_amdgcn_target_tid_threads() { } int test_amdgcn_target_tid_threads_simd() { -// CHECK-LABEL: define weak void @{{.*}}test_amdgcn_target_tid_threads_simd +// CHECK-LABEL: define weak amdgpu_kernel void @{{.*}}test_amdgcn_target_tid_threads_simd int arr[N]; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 07e8582 - [OpenMP][AMDGCN] Enable complex functions
Author: Pushpinder Singh Date: 2021-08-24T12:40:41+05:30 New Revision: 07e85823aa75293888bba98868165f5acc3b2554 URL: https://github.com/llvm/llvm-project/commit/07e85823aa75293888bba98868165f5acc3b2554 DIFF: https://github.com/llvm/llvm-project/commit/07e85823aa75293888bba98868165f5acc3b2554.diff LOG: [OpenMP][AMDGCN] Enable complex functions This patch enables basic complex functionality using the ocml builtins. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D108552 Added: clang/test/Headers/amdgcn-openmp-device-math-complex.c Modified: clang/lib/Headers/__clang_cuda_complex_builtins.h clang/lib/Headers/openmp_wrappers/complex clang/lib/Headers/openmp_wrappers/complex.h Removed: diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h index 2b701fef0ea2a..7bc7bc2ce63e1 100644 --- a/clang/lib/Headers/__clang_cuda_complex_builtins.h +++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h @@ -16,7 +16,7 @@ // to work with CUDA and OpenMP target offloading [in C and C++ mode].) #pragma push_macro("__DEVICE__") -#ifdef __OPENMP_NVPTX__ +#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) #pragma omp declare target #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) #else @@ -26,7 +26,7 @@ // To make the algorithms available for C and C++ in CUDA and OpenMP we select // diff erent but equivalent function versions. TODO: For OpenMP we currently // select the native builtins as the overload support for templates is lacking. -#if !defined(__OPENMP_NVPTX__) +#if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__) #define _ISNANd std::isnan #define _ISNANf std::isnan #define _ISINFd std::isinf @@ -276,7 +276,7 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { #undef _fmaxd #undef _fmaxf -#ifdef __OPENMP_NVPTX__ +#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) #pragma omp end declare target #endif diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex index dfd6193c97cbd..d6e740df42fb3 100644 --- a/clang/lib/Headers/openmp_wrappers/complex +++ b/clang/lib/Headers/openmp_wrappers/complex @@ -17,9 +17,18 @@ // We require std::math functions in the complex builtins below. #include +#ifdef __NVPTX__ #define __OPENMP_NVPTX__ #include <__clang_cuda_complex_builtins.h> #undef __OPENMP_NVPTX__ +#endif // __NVPTX__ + +#ifdef __AMDGCN__ +#define __OPENMP_AMDGCN__ +#include <__clang_cuda_complex_builtins.h> +#undef __OPENMP_AMDGCN__ +#endif // __AMDGCN__ + #endif // Grab the host header too. @@ -43,4 +52,4 @@ #pragma omp end declare variant -#endif +#endif // _LIBCPP_STD_VER diff --git a/clang/lib/Headers/openmp_wrappers/complex.h b/clang/lib/Headers/openmp_wrappers/complex.h index 15dc415b8126d..7e7c0866426bc 100644 --- a/clang/lib/Headers/openmp_wrappers/complex.h +++ b/clang/lib/Headers/openmp_wrappers/complex.h @@ -17,10 +17,19 @@ // We require math functions in the complex builtins below. #include +#ifdef __NVPTX__ #define __OPENMP_NVPTX__ #include <__clang_cuda_complex_builtins.h> #undef __OPENMP_NVPTX__ #endif +#ifdef __AMDGCN__ +#define __OPENMP_AMDGCN__ +#include <__clang_cuda_complex_builtins.h> +#undef __OPENMP_AMDGCN__ +#endif + +#endif + // Grab the host header too. #include_next diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c new file mode 100644 index 0..74d4b2485fabc --- /dev/null +++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK + +#include + +void test_complex_f64(double _Complex a) { +// CHECK-LABEL: define {{.*}}test_complex_f64 +#pragma omp target + { +// CHECK: call { double, double } @__divdc3 +// CHECK: call { double, double } @__muldc3 +(void)(a * (a / a)); + } +} + +// CHECK: define weak {{.*}} @__divdc3 +// CHECK-DAG: call double @__ocml_fabs_f64( +// CHECK-DAG: call i32 @__ocml_isnan_f64( +// CHECK-DAG: call i32 @__ocml_isfinite_f64( +// CHECK-DAG: call double @__ocml_copysign_f64( +// CHECK-DAG: call double @__ocml_scalbn_f64( +// CHECK-DAG: call double @__ocml_logb_f64( + +// CHECK: define weak {{.
[clang] 12dcbf9 - [AMDGPU][OpenMP] Use complex definitions from complex_cmath.h
Author: Pushpinder Singh Date: 2021-09-09T10:55:17+05:30 New Revision: 12dcbf913c49db839b3669db0dcacd5de25facde URL: https://github.com/llvm/llvm-project/commit/12dcbf913c49db839b3669db0dcacd5de25facde DIFF: https://github.com/llvm/llvm-project/commit/12dcbf913c49db839b3669db0dcacd5de25facde.diff LOG: [AMDGPU][OpenMP] Use complex definitions from complex_cmath.h Following nvptx approach, this patch uses complex function definitions from complex_cmath.h. With this patch, ovo passes 23/34 complex mathematical test cases. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D109344 Added: clang/test/Headers/amdgcn-openmp-device-math-complex.cpp Modified: clang/lib/Headers/openmp_wrappers/complex Removed: diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex index d6e740df42fb3..1ceecc1af8aec 100644 --- a/clang/lib/Headers/openmp_wrappers/complex +++ b/clang/lib/Headers/openmp_wrappers/complex @@ -45,7 +45,7 @@ #ifndef _LIBCPP_STD_VER #pragma omp begin declare variant match( \ -device = {arch(nvptx, nvptx64)}, \ +device = {arch(amdgcn, nvptx, nvptx64)}, \ implementation = {extension(match_any, allow_templates)}) #include diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp b/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp new file mode 100644 index 0..d1a2cf31fabae --- /dev/null +++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.cpp @@ -0,0 +1,85 @@ +// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -aux-triple x86_64-unknown-unknown -o - | FileCheck %s +// expected-no-diagnostics + +#include +#include + +// CHECK: define weak {{.*}} @__muldc3 +// CHECK-DAG: call i32 @__ocml_isnan_f64( +// CHECK-DAG: call i32 @__ocml_isinf_f64( + +// CHECK: define weak {{.*}} @__mulsc3 +// CHECK-DAG: call i32 @__ocml_isnan_f32( +// CHECK-DAG: call i32 @__ocml_isinf_f32( +// CHECK-DAG: call float @__ocml_copysign_f32( + +// CHECK: define weak {{.*}} @__divdc3 +// CHECK-DAG: call i32 @__ocml_isnan_f64( +// CHECK-DAG: call i32 @__ocml_isinf_f64( +// CHECK-DAG: call i32 @__ocml_isfinite_f64( +// CHECK-DAG: call double @__ocml_copysign_f64( +// CHECK-DAG: call double @__ocml_scalbn_f64( +// CHECK-DAG: call double @__ocml_fabs_f64( +// CHECK-DAG: call double @__ocml_logb_f64( + +// CHECK: define weak {{.*}} @__divsc3 +// CHECK-DAG: call i32 @__ocml_isnan_f32( +// CHECK-DAG: call i32 @__ocml_isinf_f32( +// CHECK-DAG: call i32 @__ocml_isfinite_f32( +// CHECK-DAG: call float @__ocml_copysign_f32( +// CHECK-DAG: call float @__ocml_scalbn_f32( +// CHECK-DAG: call float @__ocml_fabs_f32( +// CHECK-DAG: call float @__ocml_logb_f32( + +// We actually check that there are no declarations of non-OpenMP functions. +// That is, as long as we don't call an unkown function with a name that +// doesn't start with '__' we are good :) + +// CHECK-NOT: declare.*@[^_] + +void test_scmplx(std::complex a) { +#pragma omp target + { +(void)(a * (a / a)); + } +} + +void test_dcmplx(std::complex a) { +#pragma omp target + { +(void)(a * (a / a)); + } +} + +template +std::complex test_template_math_calls(std::complex a) { + decltype(a) r = a; +#pragma omp target + { +r = std::sin(r); +r = std::cos(r); +r = std::exp(r); +r = std::atan(r); +r = std::acos(r); + } + return r; +} + +std::complex test_scall(std::complex a) { + decltype(a) r; +#pragma omp target + { +r = std::sin(a); + } + return test_template_math_calls(r); +} + +std::complex test_dcall(std::complex a) { + decltype(a) r; +#pragma omp target + { +r = std::exp(a); + } + return test_template_math_calls(r); +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits