jhuber6 updated this revision to Diff 426521.
jhuber6 added a comment.
Changing slightly, I'm using the `getArgsForToolchain` to only get the
`--offload-arch` options for that toolchain. This lets us quality it with
options like `-Xopenmp-target=` so we can now specify architectures
per-toolchain without it causing an error. For example, the following should
work:
clang input.c -fopenmp -fopenmp-targets=nvptx64,amdgcn
-Xopenmp-targets=amdgcn --offload-arch=gfx803 -Xopenmp-targets=nvptx64
--offload-arch=sm_70 -c
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D124721/new/
https://reviews.llvm.org/D124721
Files:
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
clang/lib/Driver/ToolChains/Cuda.cpp
clang/test/Driver/amdgpu-openmp-toolchain-new.c
clang/test/Driver/openmp-offload-gpu-new.c
Index: clang/test/Driver/openmp-offload-gpu-new.c
===================================================================
--- clang/test/Driver/openmp-offload-gpu-new.c
+++ clang/test/Driver/openmp-offload-gpu-new.c
@@ -10,6 +10,10 @@
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
// RUN: | FileCheck %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: --offload-arch=sm_52 \
+// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
+// RUN: | FileCheck %s
// verify the tools invocations
// CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
@@ -40,6 +44,27 @@
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.*]]"
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908 \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU
+
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]"
+// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[NVIDIA_CUBIN]]", "[[AMD_BC]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
// CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
Index: clang/test/Driver/amdgpu-openmp-toolchain-new.c
===================================================================
--- clang/test/Driver/amdgpu-openmp-toolchain-new.c
+++ clang/test/Driver/amdgpu-openmp-toolchain-new.c
@@ -3,6 +3,9 @@
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
// RUN: | FileCheck %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN: --offload-arch=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
+// RUN: | FileCheck %s
// verify the tools invocations
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
@@ -34,6 +37,7 @@
// CHECK-NOGPULIB-NOT: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgpu-gfx803.bc"{{.*}}
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
// CHECK-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.*]]"
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_BC]]"], output: "[[HOST_OBJ:.*]]"
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -847,10 +847,10 @@
if (!llvm::is_contained(*DAL, A))
DAL->append(A);
- StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
- if (Arch.empty())
+ if (!DAL->hasArg(options::OPT_march_EQ))
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
- CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
+ !BoundArch.empty() ? BoundArch
+ : CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
return DAL;
}
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -307,9 +307,10 @@
if (!llvm::is_contained(*DAL, A))
DAL->append(A);
- std::string Arch = DAL->getLastArgValue(options::OPT_march_EQ).str();
- if (Arch.empty()) {
- checkSystemForAMDGPU(Args, *this, Arch);
+ if (!DAL->hasArg(options::OPT_march_EQ)) {
+ std::string Arch = BoundArch.str();
+ if (BoundArch.empty())
+ checkSystemForAMDGPU(Args, *this, Arch);
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
}
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4213,17 +4213,20 @@
/// Returns the canonical name for the offloading architecture when using HIP or
/// CUDA.
static StringRef getCanonicalArchString(Compilation &C,
- llvm::opt::DerivedArgList &Args,
+ const llvm::opt::DerivedArgList &Args,
StringRef ArchStr,
- Action::OffloadKind Kind) {
- if (Kind == Action::OFK_Cuda) {
+ Action::OffloadKind Kind,
+ const ToolChain *TC) {
+ if (Kind == Action::OFK_Cuda ||
+ (Kind == Action::OFK_OpenMP && TC->getTriple().isNVPTX())) {
CudaArch Arch = StringToCudaArch(ArchStr);
if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
return StringRef();
}
return Args.MakeArgStringRef(CudaArchToString(Arch));
- } else if (Kind == Action::OFK_HIP) {
+ } else if (Kind == Action::OFK_HIP ||
+ (Kind == Action::OFK_OpenMP && TC->getTriple().isAMDGPU())) {
llvm::StringMap<bool> Features;
// getHIPOffloadTargetTriple() is known to return valid value as it has
// been called successfully in the CreateOffloadingDeviceToolChains().
@@ -4238,7 +4241,8 @@
return Args.MakeArgStringRef(
getCanonicalTargetID(Arch.getValue(), Features));
}
- return StringRef();
+ // If the input isn't CUDA or HIP just return the architecture.
+ return ArchStr;
}
/// Checks if the set offloading architectures does not conflict. Returns the
@@ -4258,12 +4262,8 @@
/// This function returns a set of bound architectures, if there are no bound
/// architctures we return a set containing only the empty string.
static llvm::DenseSet<StringRef>
-getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args,
- Action::OffloadKind Kind) {
-
- // If this is OpenMP offloading we don't use a bound architecture.
- if (Kind == Action::OFK_OpenMP)
- return llvm::DenseSet<StringRef>{StringRef()};
+getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
+ Action::OffloadKind Kind, const ToolChain *TC) {
// --offload and --offload-arch options are mutually exclusive.
if (Args.hasArgNoClaim(options::OPT_offload_EQ) &&
@@ -4279,12 +4279,12 @@
llvm::DenseSet<StringRef> Archs;
for (auto &Arg : Args) {
if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) {
- Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind));
+ Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC));
} else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) {
if (Arg->getValue() == StringRef("all"))
Archs.clear();
else
- Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind));
+ Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC));
}
}
@@ -4300,6 +4300,11 @@
Archs.insert(CudaArchToString(CudaArch::CudaDefault));
else if (Kind == Action::OFK_HIP)
Archs.insert(CudaArchToString(CudaArch::HIPDefault));
+ else if (Kind == Action::OFK_OpenMP)
+ Archs.insert(StringRef());
+ } else {
+ Args.ClaimAllArgs(options::OPT_offload_arch_EQ);
+ Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ);
}
return Archs;
@@ -4345,7 +4350,8 @@
// Get the product of all bound architectures and toolchains.
SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
for (const ToolChain *TC : ToolChains)
- for (StringRef Arch : getOffloadArchs(C, Args, Kind))
+ for (StringRef Arch : getOffloadArchs(
+ C, C.getArgsForToolChain(TC, "generic", Kind), Kind, TC))
TCAndArchs.push_back(std::make_pair(TC, Arch));
for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I)
@@ -4374,9 +4380,9 @@
HostAction->setCannotBeCollapsedWithNextDependentAction();
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
- /*BoundArch=*/nullptr, Kind);
+ TCAndArch->second.data(), Kind);
OffloadAction::DeviceDependences DDep;
- DDep.add(*A, *TCAndArch->first, /*BoundArch=*/nullptr, Kind);
+ DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
A = C.MakeAction<OffloadAction>(HDep, DDep);
} else if (isa<AssembleJobAction>(A) && Kind == Action::OFK_Cuda) {
// The Cuda toolchain uses fatbinary as the linker phase to bundle the
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits