jhuber6 updated this revision to Diff 426049.
jhuber6 added a comment.
Rebase after landing the cuda support for the new driver.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D124220/new/
https://reviews.llvm.org/D124220
Files:
clang/include/clang/Driver/Options.td
clang/lib/Driver/Driver.cpp
clang/test/Driver/cuda-openmp-driver.cu
clang/test/Driver/openmp-offload-gpu-new.c
Index: clang/test/Driver/openmp-offload-gpu-new.c
===================================================================
--- clang/test/Driver/openmp-offload-gpu-new.c
+++ clang/test/Driver/openmp-offload-gpu-new.c
@@ -3,7 +3,6 @@
///
// REQUIRES: x86-registered-target
-// REQUIRES: powerpc-registered-target
// REQUIRES: nvptx-registered-target
// REQUIRES: amdgpu-registered-target
@@ -50,3 +49,18 @@
// RUN: | FileCheck -check-prefix=DRIVER_EMBEDDING %s
// DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp,nvptx64-nvidia-cuda,sm_70
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
+// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]"
+// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: --offload-device-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY
+// CHECK-DEVICE-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
+// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.*]]"
+// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "{{.*}}-openmp-nvptx64-nvidia-cuda.o"
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP
+// CHECK-DEVICE-ONLY-PP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.*]]"], output: "-"
Index: clang/test/Driver/cuda-openmp-driver.cu
===================================================================
--- clang/test/Driver/cuda-openmp-driver.cu
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -16,3 +16,18 @@
// RUN: %clang -### -nocudalib --offload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s
// RDC: error: Using '--offload-new-driver' requires '-fgpu-rdc'
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN: --offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix BINDINGS-HOST %s
+
+// BINDINGS-HOST: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[OUTPUT:.+]]"
+// BINDINGS-HOST: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN: --offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix BINDINGS-DEVICE %s
+
+// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX:.+]]"
+// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX]]"], output: "[[CUBIN:.+]]"
+// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN]]", "[[PTX]]"], output: "{{.*}}.fatbin"
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -2868,14 +2868,14 @@
: C.getSingleOffloadToolChain<Action::OFK_HIP>());
Arg *PartialCompilationArg = Args.getLastArg(
- options::OPT_cuda_host_only, options::OPT_cuda_device_only,
- options::OPT_cuda_compile_host_device);
- CompileHostOnly = PartialCompilationArg &&
- PartialCompilationArg->getOption().matches(
- options::OPT_cuda_host_only);
- CompileDeviceOnly = PartialCompilationArg &&
- PartialCompilationArg->getOption().matches(
- options::OPT_cuda_device_only);
+ options::OPT_offload_host_only, options::OPT_offload_device_only,
+ options::OPT_offload_host_device);
+ CompileHostOnly =
+ PartialCompilationArg && PartialCompilationArg->getOption().matches(
+ options::OPT_offload_host_only);
+ CompileDeviceOnly =
+ PartialCompilationArg && PartialCompilationArg->getOption().matches(
+ options::OPT_offload_device_only);
EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
EmitAsm = Args.getLastArg(options::OPT_S);
FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ);
@@ -4055,11 +4055,6 @@
break;
}
- // Try to build the offloading actions and add the result as a dependency
- // to the host.
- if (UseNewOffloadingDriver)
- Current = BuildOffloadingActions(C, Args, I, Current);
-
// FIXME: Should we include any prior module file outputs as inputs of
// later actions in the same command line?
@@ -4083,6 +4078,11 @@
if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
break;
+ // Try to build the offloading actions and add the result as a dependency
+ // to the host.
+ if (UseNewOffloadingDriver)
+ Current = BuildOffloadingActions(C, Args, I, Current);
+
if (Current->getType() == types::TY_Nothing)
break;
}
@@ -4204,10 +4204,10 @@
// Claim ignored clang-cl options.
Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
- // Claim --cuda-host-only and --cuda-compile-host-device, which may be passed
- // to non-CUDA compilations and should not trigger warnings there.
- Args.ClaimAllArgs(options::OPT_cuda_host_only);
- Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
+ // Claim --offload-host-only and --offload-compile-host-device, which may be
+ // passed to non-CUDA compilations and should not trigger warnings there.
+ Args.ClaimAllArgs(options::OPT_offload_host_only);
+ Args.ClaimAllArgs(options::OPT_offload_host_device);
}
/// Returns the canonical name for the offloading architecture when using HIP or
@@ -4309,14 +4309,22 @@
llvm::opt::DerivedArgList &Args,
const InputTy &Input,
Action *HostAction) const {
- if (!isa<CompileJobAction>(HostAction))
+ const Arg *Mode = Args.getLastArg(options::OPT_offload_host_only,
+ options::OPT_offload_device_only,
+ options::OPT_offload_host_device);
+ const bool HostOnly =
+ Mode && Mode->getOption().matches(options::OPT_offload_host_only);
+ const bool DeviceOnly =
+ Mode && Mode->getOption().matches(options::OPT_offload_device_only);
+
+ // Don't build offloading actions if explicitly disabled or we do not have a
+ // compile action to embed it in. If preprocessing only ignore embedding.
+ if (HostOnly || !(isa<CompileJobAction>(HostAction) ||
+ getFinalPhase(Args) == phases::Preprocess))
return HostAction;
OffloadAction::DeviceDependences DDeps;
- types::ID InputType = Input.first;
- const Arg *InputArg = Input.second;
-
const Action::OffloadKind OffloadKinds[] = {
Action::OFK_OpenMP, Action::OFK_Cuda, Action::OFK_HIP};
@@ -4331,6 +4339,9 @@
if (ToolChains.empty())
continue;
+ types::ID InputType = Input.first;
+ const Arg *InputArg = Input.second;
+
// Get the product of all bound architectures and toolchains.
SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
for (const ToolChain *TC : ToolChains)
@@ -4355,7 +4366,8 @@
for (Action *&A : DeviceActions) {
A = ConstructPhaseAction(C, Args, Phase, A, Kind);
- if (isa<CompileJobAction>(A) && Kind == Action::OFK_OpenMP) {
+ if (isa<CompileJobAction>(A) && isa<CompileJobAction>(HostAction) &&
+ Kind == Action::OFK_OpenMP) {
// OpenMP offloading has a dependency on the host compile action to
// identify which declarations need to be emitted. This shouldn't be
// collapsed with any other actions so we can use it in the device.
@@ -4389,6 +4401,9 @@
}
}
+ if (DeviceOnly)
+ return C.MakeAction<OffloadAction>(DDeps, types::TY_Nothing);
+
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/nullptr, DDeps);
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -906,14 +906,6 @@
def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
"'-aux-target-cpu' and '-aux-target-feature'.">;
-def cuda_device_only : Flag<["--"], "cuda-device-only">,
- HelpText<"Compile CUDA code for device only">;
-def cuda_host_only : Flag<["--"], "cuda-host-only">,
- HelpText<"Compile CUDA code for host only. Has no effect on non-CUDA "
- "compilations.">;
-def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">,
- HelpText<"Compile CUDA code for both host and device (default). Has no "
- "effect on non-CUDA compilations.">;
def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOption]>,
HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>,
@@ -2538,6 +2530,19 @@
HelpText<"Use the new driver for offloading compilation.">;
def no_offload_new_driver : Flag<["--"], "no-offload-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
HelpText<"Don't Use the new driver for offloading compilation.">;
+def offload_device_only : Flag<["--"], "offload-device-only">,
+ HelpText<"Only compile for the offloading device.">;
+def offload_host_only : Flag<["--"], "offload-host-only">,
+ HelpText<"Only compile for the offloading host.">;
+def offload_host_device : Flag<["--"], "offload-host-device">,
+ HelpText<"Only compile for the offloading host.">;
+def cuda_device_only : Flag<["--"], "cuda-device-only">, Alias<offload_device_only>,
+ HelpText<"Compile CUDA code for device only">;
+def cuda_host_only : Flag<["--"], "cuda-host-only">, Alias<offload_host_only>,
+ HelpText<"Compile CUDA code for host only. Has no effect on non-CUDA compilations.">;
+def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">, Alias<offload_host_device>,
+ HelpText<"Compile CUDA code for both host and device (default). Has no "
+ "effect on non-CUDA compilations.">;
def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
HelpText<"Use the new driver for OpenMP offloading.">;
def fno_openmp_new_driver : Flag<["-"], "fno-openmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits