[PATCH] D153667: [HIP]: Add gpu-link-output to control link job creation

Jeffrey Byrnes via Phabricator via cfe-commits Mon, 26 Jun 2023 13:11:10 -0700

jrbyrnes updated this revision to Diff 534725.
jrbyrnes marked an inline comment as done.
jrbyrnes added a comment.


Fix tests + add tests. Add phase test for -fgpu-rdc --no-gpu-link-output (these 
are not intended to be used together)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153667/new/

https://reviews.llvm.org/D153667

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/test/Driver/hip-device-compile.hip
  clang/test/Driver/hip-phases.hip
  clang/test/Driver/hip-rdc-device-only.hip

Index: clang/test/Driver/hip-rdc-device-only.hip
===================================================================
--- clang/test/Driver/hip-rdc-device-only.hip
+++ clang/test/Driver/hip-rdc-device-only.hip
@@ -5,7 +5,7 @@
 // RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 // RUN:   -c -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output --gpu-link-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITBC %s
 
 // With `-emit-llvm`, the output should be the same as the aforementioned line
@@ -15,14 +15,14 @@
 // RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 // RUN:   -c -emit-llvm -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output --gpu-link-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITBC %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 // RUN:   -S -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output --gpu-link-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITLL %s
 
 // With `-emit-llvm`, the output should be the same as the aforementioned line
@@ -32,7 +32,7 @@
 // RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 // RUN:   -S -emit-llvm -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output --gpu-link-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITLL %s
 
 // With `-save-temps`, commane lines for each steps are dumped. For assembly
@@ -43,7 +43,7 @@
 // RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 // RUN:   -S -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output --gpu-link-output \
 // RUN: 2>&1 | FileCheck -check-prefix=SAVETEMP %s
 
 // Check output one file without bundling cause error.
@@ -54,6 +54,12 @@
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu -o %t.s --no-gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefix=FAIL %s
 
+// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
+// RUN:   -S -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu -o %t.s --no-gpu-link-output \
+// RUN: 2>&1 | FileCheck -check-prefix=FAIL %s
+
 // COMMON: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // EMITBC-SAME: "-emit-llvm-bc"
Index: clang/test/Driver/hip-phases.hip
===================================================================
--- clang/test/Driver/hip-phases.hip
+++ clang/test/Driver/hip-phases.hip
@@ -244,6 +244,53 @@
 // DASM-NOT: clang-offload-bundler
 // DASM-NOT: host
 
+//
+// Test single gpu architecture with compile to relocatable in device-only
+// compilation mode.
+//
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only --no-gpu-link-output 2>&1 \
+// RUN: | FileCheck -check-prefixes=RELOC %s
+// RELOC-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// RELOC-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
+// RELOC-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+// RELOC-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// RELOC-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
+// RELOC-NOT: linker
+// RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
+
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only --no-gpu-link-output -fgpu-rdc 2>&1 \
+// RUN: | FileCheck -check-prefixes=RELOC3 %s
+// RELOC3-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// RELOC3-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
+// RELOC3-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+// RELOC3-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-[[T]], [[ARCH]])
+// RELOC3-NOT: linker
+// RELOC3-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, ir
+
+//
+// Test two gpu architectures with compile to relocatable in device-only
+// compilation mode.
+//
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only --no-gpu-link-output 2>&1 \
+// RUN: | FileCheck -check-prefixes=RELOC2 %s
+// RELOC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
+// RELOC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
+// RELOC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+// RELOC2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// RELOC2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
+// RELOC2-NOT: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
+// RELOC2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
+// RELOC2-DAG: [[P6:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH2:gfx900]])
+// RELOC2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
+// RELOC2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
+// RELOC2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]])
+// RELOC2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
+// RELOC2-NOT: linker
+// RELOC2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P10]]}, object
+
 //
 // Test two gpu architectures with complete compilation in device-only
 // compilation mode.
Index: clang/test/Driver/hip-device-compile.hip
===================================================================
--- clang/test/Driver/hip-device-compile.hip
+++ clang/test/Driver/hip-device-compile.hip
@@ -45,6 +45,14 @@
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASM,NBUN %s
 
+// Output relocatable.
+// RUN: %clang -c --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   -o a.o -x hip --cuda-gpu-arch=gfx900 --no-gpu-link-output \
+// RUN:   --hip-device-lib=lib1.bc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,NBUN,RELOC %s
+
 // Output bundled assembly.
 // RUN: %clang -c -S --cuda-device-only -### --target=x86_64-linux-gnu \
 // RUN:   -o a.s -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
@@ -68,6 +76,7 @@
 // LLBUN-SAME: "-o" "{{.*}}.ll"
 // ASM-SAME: "-o" "a.s"
 // ASMBUN-SAME: "-o" "{{.*}}.s"
+// RELOC-SAME: "-o" "a.o"
 // CHECK-SAME: {{".*a.cu"}}
 
 // CHECK-NOT: {{"*.llvm-link"}}
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -3322,16 +3322,23 @@
     // only compilation. Bundle other type of output files only if
     // --gpu-bundle-output is specified for device only compilation.
     std::optional<bool> BundleOutput;
+    std::optional<bool> LinkOutput;
 
   public:
     HIPActionBuilder(Compilation &C, DerivedArgList &Args,
                      const Driver::InputList &Inputs)
         : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
       DefaultCudaArch = CudaArch::GFX906;
+      if (Args.hasArg(options::OPT_gpu_link_output,
+                      options::OPT_no_gpu_link_output))
+        LinkOutput = Args.hasFlag(options::OPT_gpu_link_output,
+                                  options::OPT_no_gpu_link_output, true);
+
       if (Args.hasArg(options::OPT_gpu_bundle_output,
                       options::OPT_no_gpu_bundle_output))
         BundleOutput = Args.hasFlag(options::OPT_gpu_bundle_output,
-                                    options::OPT_no_gpu_bundle_output, true);
+                                    options::OPT_no_gpu_bundle_output, true) &&
+                       (!LinkOutput || *LinkOutput);
     }
 
     bool canUseBundlerUnbundler() const override { return true; }
@@ -3378,8 +3385,10 @@
       assert(!CompileHostOnly &&
              "Not expecting HIP actions in host-only compilation.");
 
+      bool ShouldLink = !LinkOutput || *LinkOutput;
+
       if (!Relocatable && CurPhase == phases::Backend && !EmitLLVM &&
-          !EmitAsm) {
+          !EmitAsm && ShouldLink) {
         // If we are in backend phase, we attempt to generate the fat binary.
         // We compile each arch to IR and use a link action to generate code
         // object containing ISA. Then we use a special "link" action to create
@@ -3455,6 +3464,8 @@
 
         return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
       } else if (CurPhase == phases::Link) {
+        if (!ShouldLink)
+          return ABRT_Success;
         // Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch.
         // This happens to each device action originated from each input file.
         // Later on, device actions in DeviceLinkerInputs are used to create
@@ -3492,8 +3503,11 @@
         CudaDeviceActions.clear();
       }
 
-      return (CompileDeviceOnly && CurPhase == FinalPhase) ? ABRT_Ignore_Host
-                                                           : ABRT_Success;
+      return (CompileDeviceOnly &&
+              (CurPhase == FinalPhase ||
+               (!ShouldLink && CurPhase == phases::Assemble)))
+                 ? ABRT_Ignore_Host
+                 : ABRT_Success;
     }
 
     void appendLinkDeviceActions(ActionList &AL) override {
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -1124,6 +1124,10 @@
   Group<f_Group>, HelpText<"Bundle output files of HIP device compilation">;
 def no_gpu_bundle_output : Flag<["--"], "no-gpu-bundle-output">,
   Group<f_Group>, HelpText<"Do not bundle output files of HIP device compilation">;
+def gpu_link_output : Flag<["--"], "gpu-link-output">,
+  Group<f_Group>, HelpText<"Link output files of HIP device compilation">;
+def no_gpu_link_output : Flag<["--"], "no-gpu-link-output">,
+  Group<f_Group>, HelpText<"Do not link output files of HIP device compilation">;
 def cuid_EQ : Joined<["-"], "cuid=">, Flags<[CC1Option]>,
   HelpText<"An ID for compilation unit, which should be the same for the same "
            "compilation unit but different for different compilation units. "

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D153667: [HIP]: Add gpu-link-output to control link job creation

Reply via email to