Hahnfeld updated this revision to Diff 123649.
Hahnfeld marked 13 inline comments as done.
Hahnfeld added a comment.
Address reviewers' comments.
https://reviews.llvm.org/D40250
Files:
include/clang/Driver/ToolChain.h
lib/Driver/ToolChain.cpp
lib/Driver/ToolChains/Clang.cpp
lib/Driver/ToolChains/Cuda.cpp
lib/Driver/ToolChains/Cuda.h
test/Driver/openmp-offload-gpu.c
Index: test/Driver/openmp-offload-gpu.c
===================================================================
--- test/Driver/openmp-offload-gpu.c
+++ test/Driver/openmp-offload-gpu.c
@@ -28,43 +28,61 @@
/// ###########################################################################
/// Check cubin file generation and usage by nvlink
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-CUBIN %s
+// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
+
+// CHK-CUBIN-NVLINK: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
+
+/// ###########################################################################
-// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}.s"
-// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s"
-// CHK-CUBIN-NEXT: nvlink" {{.*}}.cubin"
+/// Check unbundlink of assembly file, cubin file generation and usage by nvlink
+// RUN: touch %t.s
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %t.s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK %s
+/// Use DAG to ensure that assembly file has been unbundled.
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX:.*\.s]]"
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=s" {{.*}}"-outputs={{.*}}[[PTX]]{{.*}} "-unbundle"
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
-// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-CUBIN-DARWIN %s
+/// Check cubin file generation and bundling
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s
-// CHK-CUBIN-DARWIN: clang{{.*}}" "-o" "{{.*}}.s"
-// CHK-CUBIN-DARWIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s"
-// CHK-CUBIN-DARWIN-NEXT: nvlink" {{.*}}.cubin"
+// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-PTXAS-CUBIN-BUNDLING-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-PTXAS-CUBIN-BUNDLING: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-inputs={{.*}}[[CUBIN]]
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink
-// RUN: touch %t1.o
-// RUN: touch %t2.o
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
+/// Check cubin file unbundling and usage by nvlink
+// RUN: touch %t.o
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %t.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
-// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
+/// Use DAG to ensure that cubin file has been unbundled.
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[CUBIN:.*\.cubin]]"
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-outputs={{.*}}[[CUBIN]]{{.*}} "-unbundle"
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+/// Check cubin file generation and usage by nvlink
// RUN: touch %t1.o
// RUN: touch %t2.o
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN-DARWIN %s
+// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
-// CHK-TWOCUBIN-DARWIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
+// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
/// ###########################################################################
Index: lib/Driver/ToolChains/Cuda.h
===================================================================
--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -137,10 +137,12 @@
const ToolChain &HostTC, const llvm::opt::ArgList &Args,
const Action::OffloadKind OK);
- virtual const llvm::Triple *getAuxTriple() const override {
+ const llvm::Triple *getAuxTriple() const override {
return &HostTC.getTriple();
}
+ std::string getInputFilename(const InputInfo &Input) const override;
+
llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;
Index: lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -301,10 +301,7 @@
CmdArgs.push_back("--gpu-name");
CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
CmdArgs.push_back("--output-file");
- SmallString<256> OutputFileName(Output.getFilename());
- if (JA.isOffloading(Action::OFK_OpenMP))
- llvm::sys::path::replace_extension(OutputFileName, "cubin");
- CmdArgs.push_back(Args.MakeArgString(OutputFileName));
+ CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
for (const auto& II : Inputs)
CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
@@ -431,11 +428,8 @@
if (!II.isFilename())
continue;
- SmallString<256> Name(II.getFilename());
- llvm::sys::path::replace_extension(Name, "cubin");
-
- const char *CubinF =
- C.addTempFile(C.getArgs().MakeArgString(Name));
+ const char *CubinF = C.addTempFile(
+ C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
CmdArgs.push_back(CubinF);
}
@@ -463,6 +457,20 @@
getProgramPaths().push_back(getDriver().Dir);
}
+std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
+ // Only object files are changed, for example assembly files keep their .s
+ // extensions. CUDA also continues to use .o as they don't use nvlink but
+ // fatbinary.
+ if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
+ return ToolChain::getInputFilename(Input);
+
+ // Replace extension for object files with cubin because nvlink relies on
+ // these particular file names.
+ SmallString<256> Filename(ToolChain::getInputFilename(Input));
+ llvm::sys::path::replace_extension(Filename, "cubin");
+ return Filename.str();
+}
+
void CudaToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -5310,12 +5310,15 @@
if (I)
Triples += ',';
+ // Find ToolChain for this input.
Action::OffloadKind CurKind = Action::OFK_Host;
const ToolChain *CurTC = &getToolChain();
const Action *CurDep = JA.getInputs()[I];
if (const auto *OA = dyn_cast<OffloadAction>(CurDep)) {
+ CurTC = nullptr;
OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) {
+ assert(CurTC == nullptr && "Expected one dependence!");
CurKind = A->getOffloadingDeviceKind();
CurTC = TC;
});
@@ -5336,7 +5339,17 @@
for (unsigned I = 0; I < Inputs.size(); ++I) {
if (I)
UB += ',';
- UB += Inputs[I].getFilename();
+
+ // Find ToolChain for this input.
+ const ToolChain *CurTC = &getToolChain();
+ if (const auto *OA = dyn_cast<OffloadAction>(JA.getInputs()[I])) {
+ CurTC = nullptr;
+ OA->doOnEachDependence([&](Action *, const ToolChain *TC, const char *) {
+ assert(CurTC == nullptr && "Expected one dependence!");
+ CurTC = TC;
+ });
+ }
+ UB += CurTC->getInputFilename(Inputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
@@ -5396,13 +5409,7 @@
for (unsigned I = 0; I < Outputs.size(); ++I) {
if (I)
UB += ',';
- SmallString<256> OutputFileName(Outputs[I].getFilename());
- // Change extension of target files for OpenMP offloading
- // to NVIDIA GPUs.
- if (DepInfo[I].DependentToolChain->getTriple().isNVPTX() &&
- JA.isOffloading(Action::OFK_OpenMP))
- llvm::sys::path::replace_extension(OutputFileName, "cubin");
- UB += OutputFileName;
+ UB += DepInfo[I].DependentToolChain->getInputFilename(Outputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
CmdArgs.push_back("-unbundle");
Index: lib/Driver/ToolChain.cpp
===================================================================
--- lib/Driver/ToolChain.cpp
+++ lib/Driver/ToolChain.cpp
@@ -215,6 +215,10 @@
}
}
+std::string ToolChain::getInputFilename(const InputInfo &Input) const {
+ return Input.getFilename();
+}
+
bool ToolChain::IsUnwindTablesDefault(const ArgList &Args) const {
return false;
}
Index: include/clang/Driver/ToolChain.h
===================================================================
--- include/clang/Driver/ToolChain.h
+++ include/clang/Driver/ToolChain.h
@@ -40,6 +40,7 @@
class Compilation;
class CudaInstallationDetector;
class Driver;
+ class InputInfo;
class JobAction;
class RegisterEffectiveTriple;
class SanitizerArgs;
@@ -172,6 +173,11 @@
/// while the aux triple is the host (CPU) toolchain, e.g. x86-linux-gnu.
virtual const llvm::Triple *getAuxTriple() const { return nullptr; }
+ /// Some toolchains need to modify the file name, for example to replace the
+ /// extension for object files with .cubin for OpenMP offloading to Nvidia
+ /// GPUs.
+ virtual std::string getInputFilename(const InputInfo &Input) const;
+
llvm::Triple::ArchType getArch() const { return Triple.getArch(); }
StringRef getArchName() const { return Triple.getArchName(); }
StringRef getPlatform() const { return Triple.getVendorName(); }
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits