This revision was automatically updated to reflect the committed changes.
Closed by commit rG77df5a8283ed: [HIP] Move HIP Linking Logic into HIP
ToolChain (authored by ashi1).
Herald added a subscriber: cfe-commits.
Changed prior to commit:
https://reviews.llvm.org/D81963?vs=271210&id=272525#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D81963/new/
https://reviews.llvm.org/D81963
Files:
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/CommonArgs.cpp
clang/lib/Driver/ToolChains/CommonArgs.h
clang/lib/Driver/ToolChains/Gnu.cpp
clang/lib/Driver/ToolChains/HIP.cpp
clang/lib/Driver/ToolChains/HIP.h
clang/test/Driver/hip-binding.hip
clang/test/Driver/hip-link-save-temps.hip
clang/test/Driver/hip-link-shared-library.hip
clang/test/Driver/hip-phases.hip
clang/test/Driver/hip-save-temps.hip
clang/test/Driver/hip-toolchain-rdc-separate.hip
clang/test/Driver/hip-toolchain-rdc.hip
Index: clang/test/Driver/hip-toolchain-rdc.hip
===================================================================
--- clang/test/Driver/hip-toolchain-rdc.hip
+++ clang/test/Driver/hip-toolchain-rdc.hip
@@ -12,7 +12,23 @@
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s
-// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// emit objects for host side path
+// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: "-emit-obj"
+// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
+// CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip"
+// CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]]
+
+// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: "-emit-obj"
+// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
+// CHECK-SAME: {{.*}} "-o" [[B_OBJ_HOST:".*o"]] "-x" "hip"
+// CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]]
+
+// generate image for device side path on gfx803
+// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
// CHECK-SAME: "-emit-llvm-bc"
// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
@@ -21,7 +37,7 @@
// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
// CHECK-SAME: "-target-cpu" "gfx803"
// CHECK-SAME: {{.*}} "-o" [[A_BC1:".*bc"]] "-x" "hip"
-// CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]]
+// CHECK-SAME: {{.*}} [[A_SRC]]
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
@@ -32,7 +48,7 @@
// CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
// CHECK-SAME: "-target-cpu" "gfx803"
// CHECK-SAME: {{.*}} "-o" [[B_BC1:".*bc"]] "-x" "hip"
-// CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]]
+// CHECK-SAME: {{.*}} [[B_SRC]]
// CHECK-NOT: "*.llvm-link"
// CHECK-NOT: ".*opt"
@@ -40,6 +56,7 @@
// CHECK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-o" "[[IMG_DEV1:.*.out]]" [[A_BC1]] [[B_BC1]]
+// generate image for device side path on gfx900
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
// CHECK-SAME: "-emit-llvm-bc"
@@ -66,23 +83,13 @@
// CHECK: {{".*lld.*"}} {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]]
-// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
-// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-emit-obj"
-// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
-// CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip"
-// CHECK-SAME: {{.*}} [[A_SRC]]
-
-// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
-// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-emit-obj"
-// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
-// CHECK-SAME: {{.*}} "-o" [[B_OBJ_HOST:".*o"]] "-x" "hip"
-// CHECK-SAME: {{.*}} [[B_SRC]]
-
+// combine images generated into hip fat binary object
// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
// CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
// CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]"
-// CHECK: [[LD:".*ld.*"]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]]
-// CHECK-SAME: {{.*}} "-T" "{{.*}}.lk"
+// CHECK: [[MC:".*llvm-mc"]] "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: "-o" [[OBJBUNDLE:".*o"]] "{{.*}}.mcin" "--filetype=obj"
+
+// output the executable
+// CHECK: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
Index: clang/test/Driver/hip-toolchain-rdc-separate.hip
===================================================================
--- clang/test/Driver/hip-toolchain-rdc-separate.hip
+++ clang/test/Driver/hip-toolchain-rdc-separate.hip
@@ -86,12 +86,22 @@
// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
-// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],[[A_BC1:.*o]],[[A_BC2:.*o]]"
+// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],{{.*o}},{{.*o}}"
// LINK: "-unbundle"
// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
-// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],[[B_BC1:.*o]],[[B_BC2:.*o]]"
+// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],{{.*o}},{{.*o}}"
+// LINK: "-unbundle"
+
+// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
+// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
+// LINK-SAME: "-inputs=[[A_O]]" "-outputs={{.*o}},[[A_BC1:.*o]],[[A_BC2:.*o]]"
+// LINK: "-unbundle"
+
+// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
+// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
+// LINK-SAME: "-inputs=[[B_O]]" "-outputs={{.*o}},[[B_BC1:.*o]],[[B_BC2:.*o]]"
// LINK: "-unbundle"
// LINK-NOT: "*.llvm-link"
@@ -110,5 +120,8 @@
// LINK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
// LINK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]"
-// LINK: [[LD:".*ld.*"]] {{.*}} "[[A_OBJ_HOST]]" "[[B_OBJ_HOST]]"
-// LINK-SAME: {{.*}} "-T" "{{.*}}.lk"
+// LINK: {{".*llvm-mc.*"}} "-triple" "amdgcn-amd-amdhsa" "-o"
+// LINK-SAME: "[[OBJBUNDLE:.*o]]" "{{.*}}.mcin" "--filetype=obj"
+
+// LINK: [[LD:".*ld.*"]] {{.*}} "-o" "a.out" {{.*}} "[[A_OBJ_HOST]]"
+// LINK-SAME: "[[B_OBJ_HOST]]" "[[OBJBUNDLE]]"
Index: clang/test/Driver/hip-save-temps.hip
===================================================================
--- clang/test/Driver/hip-save-temps.hip
+++ clang/test/Driver/hip-save-temps.hip
@@ -25,13 +25,20 @@
// -fgpu-rdc without -o
// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \
// RUN: -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \
-// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,RDC-NOUT,NOUT %s
+// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,NOUT %s
// -fgpu-rdc with -o
-// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \
-// RUN: -o executable -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \
-// RUN: FileCheck -check-prefixes=CHECK,RDC,RDCL,RDC-WOUT,WOUT %s
+// UN: %clang -### -target x86_64-linux-gnu -nogpulib -save-temps \
+// UN: -o executable -fgpu-rdc --cuda-gpu-arch=gfx900 %s 2>&1 | \
+// UN: FileCheck -check-prefixes=CHECK,RDC,RDCL,WOUT %s
+
+// -fgpu-rdc host object path
+// RDCL: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui"
+// RDCL: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc"
+// RDCL: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s"
+// RDCL: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.o"
+// device object paths
// CHECK: {{".*clang.*"}} "-cc1" {{.*}} "-E" {{.*}} [[CPU:"-target-cpu" "gfx900"]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.cui"
// NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc"
// RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-disable-llvm-passes" {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.bc"
@@ -43,22 +50,26 @@
// RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc"
// NORDC: {{".*clang.*"}} "-cc1as" {{.*}} "-filetype" "obj" {{.*}} [[CPU]] {{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o"
-// CHECK-NOT: llvm-link
-// CHECK-NOT: opt
-// CHECK-NOT: llc
+// CHECK-NOT: "{{.*}}llvm-link"
+// CHECK-NOT: "{{.*}}opt"
+// CHECK-NOT: "{{.*}}llc"
// NORDC: {{.*lld.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.out"
-
// RDCL: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// RDCL-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900"
+// RDCC: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui"
+// RDCC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc"
+// RDCC: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s"
+// RDCC: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.o"
+// RDCC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.o"
+// RDCL: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps-hip-amdgcn-amd-amdhsa.hipfb"
+// RDCL: {{.*}}llvm-mc{{.*}}"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa.o" "hip-save-temps-hip-amdgcn-amd-amdhsa.mcin" "--filetype=obj"
-// NORDC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.hip-hip-amdgcn-amd-amdhsa.hipfb"
-// CHECK: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui"
+// -fno-gpu-rdc host object path
+// NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-E" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.cui"
// NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-fcuda-include-gpubinary" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc"
-// RDC: "{{.*clang.*}}" "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.bc"
-// CHECK: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s"
-// CHECK: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps{{.*}}.o"
-// RDCC: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=hip-save-temps.o"
-// RDC-NOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=a.out.hipfb"
-// RDC-WOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=executable.hipfb"
-// NOUT: "{{.*ld.*}}" {{.*}} "-o" "a.out"
-// WOUT: "{{.*ld.*}}" {{.*}} "-o" "executable"
\ No newline at end of file
+// NORDC: "{{.*clang.*}}" "-cc1" {{.*}} "-S" {{.*}} "-o" "hip-save-temps-host-x86_64-unknown-linux-gnu.s"
+// NORDC: "{{.*clang.*}}" "-cc1as" {{.*}} "-o" "hip-save-temps{{.*}}.o"
+
+// output to default a.out or -o specified file name
+// NOUT: {{.*}}ld{{.*}}"-o" "a.out"
+// WOUT: {{.*}}ld{{.*}}"-o" "executable"
Index: clang/test/Driver/hip-phases.hip
===================================================================
--- clang/test/Driver/hip-phases.hip
+++ clang/test/Driver/hip-phases.hip
@@ -21,25 +21,27 @@
// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+// RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// RDC-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]])
// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
// NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
// NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
-// NRD-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
-// NRD-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
+// RDC-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
+// BIN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
+// BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
// NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
-// RDC-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
-// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH]])
+// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])
-// NRD-DAG: [[P12:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
+// NRD-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
+// RDC-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object
+// NRD-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
+// NRD-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
+// NRD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
+// RDC-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]])
-// NRD-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]])
-// RDC-DAG: [[P13:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
-// BIN-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (host-[[T]])
-// BIN-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (host-[[T]])
-// RDC-DAG: [[P16:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P15]]}, "device-[[T]] (amdgcn-amd-amdhsa:gfx803)" {[[P10]]}, image
//
// Test single gpu architecture up to the assemble phase.
//
@@ -56,59 +58,84 @@
// ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]])
//
-// Test two gpu architectures with complete compilation.
+// Test two gpu architectures with complete compilation with -fno-gpu-rdc.
//
// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN2,NRD2,CL2 %s
+// RUN: | FileCheck -check-prefixes=NRD2,NCL2 %s
// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -c 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN2,NRD2 %s
+// RUN: | FileCheck -check-prefixes=NRD2 %s
+// NRD2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
+// NRD2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
+// NRD2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+
+// NRD2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]])
+// NRD2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
+// NRD2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
+// NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]])
+// NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]])
+// NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]])
+// NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
+
+// NRD2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
+// NRD2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
+// NRD2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
+// NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]])
+// NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]])
+// NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]])
+// NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
+// NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-[[T]])
+// NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir
+// NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
+// NRD2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
+// NCL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]])
+
+//
+// Test two gpu architectures with complete compilation with -fgpu-rdc.
+//
// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN2,RDC2,CL2,RCL2 %s
+// RUN: | FileCheck -check-prefixes=RDC2,CL2,RCL2 %s
// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc -c 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN2,RDC2,RC2 %s
+// RUN: | FileCheck -check-prefixes=RDC2,RC2 %s
-// BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
-// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
-// BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+// RCL2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
+// RCL2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
+// RCL2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+// RCL2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// RCL2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
-// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]])
-// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
-// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
-// NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]])
+// RDC2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
+// RDC2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
+// RDC2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
// RDC2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH1]])
-// NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]])
-// NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]])
// RCL2-DAG: [[P8:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH1]])
-// NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
+// RCL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
// RC2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, ir
-// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
-// BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
-// BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
-// NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]])
+// RDC2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
+// RDC2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
+// RDC2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
// RDC2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, ir, (device-[[T]], [[ARCH2]])
-// NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]])
-// NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]])
// RCL2-DAG: [[P15:[0-9]+]]: linker, {[[P13]]}, image, (device-[[T]], [[ARCH2]])
-// NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
+// RCL2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
// RC2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P13]]}, ir
-// NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-[[T]])
+// RC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
+// RC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
+// RC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
+// RC2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// RC2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
-// NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir
-// NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
-// RDC2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
-// BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
-// CL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]])
-// RCL2-DAG: [[P22:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P21]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
-// RC2-DAG: [[P22:[0-9]+]]: clang-offload-bundler, {[[P9]], [[P16]], [[P20]]}, object, (host-[[T]])
+// RCL2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, object, (device-[[T]])
+// RCL2-DAG: [[P22:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, object
+// RCL2-DAG: [[P23:[0-9]+]]: linker, {[[P20]], [[P22]]}, image, (host-[[T]])
+// RC2-DAG: [[P23:[0-9]+]]: clang-offload-bundler, {[[P9]], [[P16]], [[P20]]}, object, (host-[[T]])
//
// Test two gpu architecturess up to the assemble phase.
@@ -253,8 +280,13 @@
// RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object, (host-[[T]])
// L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object, (host-[[T]])
// RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object, (host-[[T]])
-// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T]])
-// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (host-[[T]])
-// RL2-DAG: [[P5:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH1:gfx803]])
+
+// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH1:gfx803]])
+// RL2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P4]]}, image
// RL2-DAG: [[P6:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH2:gfx900]])
-// RL2-DAG: [[P7:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P4]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P5]]}, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image
+// RL2-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image
+// RL2-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, object, (device-[[T]])
+// RL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, object
+
+// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T]])
+// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]])
Index: clang/test/Driver/hip-link-shared-library.hip
===================================================================
--- clang/test/Driver/hip-link-shared-library.hip
+++ clang/test/Driver/hip-link-shared-library.hip
@@ -3,10 +3,13 @@
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o %S/Inputs/in.so \
// RUN: -fgpu-rdc 2>&1 | FileCheck %s
-// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"]
-// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]"
+// CHECK: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[HOSTOBJ:.*o]]", "{{.*o}}", "{{.*o}}"]
+// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN]]"], outputs: ["{{.*o}}", "[[DOBJ1:.*o]]", "[[DOBJ2:.*o]]"]
+// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ1]]"], output: "[[IMG1:.*out]]"
// CHECK-NOT: offload bundler
-// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]"
+// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ2]]"], output: "[[IMG2:.*out]]"
// CHECK-NOT: offload bundler
-// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "{{.*}}/Inputs/in.so", "[[IMG2]]", "[[IMG3]]"], output: "a.out"
+// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBINOBJ:.*o]]"
+// CHECK-NOT: offload bundler
+// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOSTOBJ]]", "{{.*}}/Inputs/in.so", "[[FATBINOBJ]]"], output: "a.out"
Index: clang/test/Driver/hip-link-save-temps.hip
===================================================================
--- clang/test/Driver/hip-link-save-temps.hip
+++ clang/test/Driver/hip-link-save-temps.hip
@@ -27,7 +27,7 @@
// CHECK-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx900" "obj1-hip-amdgcn-amd-amdhsa-gfx900.o" "obj2-hip-amdgcn-amd-amdhsa-gfx900.o"
// CHECK: "{{.*lld.*}}" {{.*}} "-mllvm" "-amdgpu-internalize-symbols"
// CHECK-SAME: "-o" "a.out-hip-amdgcn-amd-amdhsa-gfx906" "obj1-hip-amdgcn-amd-amdhsa-gfx906.o" "obj2-hip-amdgcn-amd-amdhsa-gfx906.o"
-// OUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=executable.hipfb"
-// OUT: "{{.*ld.*}}" {{.*}} "-o" "executable" {{.*}} "-T" "executable.lk"
-// NOUT: "{{.*clang-offload-bundler.*}}" {{.*}} "-outputs=a.out.hipfb"
-// NOUT: "{{.*ld.*}}" {{.*}} "-o" "a.out" {{.*}} "-T" "a.out.lk"
+// CHECK: {{".*llvm-mc.*"}} "-triple" "amdgcn-amd-amdhsa" "-o"
+// CHECK-SAME: "[[OBJBUNDLE:.*.o]]" "{{.*}}.mcin" "--filetype=obj"
+// OUT: "{{.*ld.*}}" {{.*}} "-o" "executable" {{.*}} "[[OBJBUNDLE]]"
+// NOUT: "{{.*ld.*}}" {{.*}} "-o" "a.out" {{.*}} "[[OBJBUNDLE]]"
Index: clang/test/Driver/hip-binding.hip
===================================================================
--- clang/test/Driver/hip-binding.hip
+++ clang/test/Driver/hip-binding.hip
@@ -25,12 +25,15 @@
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\
// RUN: 2>&1 | FileCheck %s
-// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"]
-// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]"
+// CHECK: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[HOSTOBJ:.*o]]", "{{.*o}}", "{{.*o}}"]
+// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN]]"], outputs: ["{{.*o}}", "[[DOBJ1:.*o]]", "[[DOBJ2:.*o]]"]
+// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ1]]"], output: "[[IMG1:.*out]]"
// CHECK-NOT: offload bundler
-// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]"
+// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ2]]"], output: "[[IMG2:.*out]]"
// CHECK-NOT: offload bundler
-// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "[[IMG2]]", "[[IMG3]]"], output: "a.out"
+// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBINOBJ:.*o]]"
+// CHECK-NOT: offload bundler
+// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOSTOBJ]]", "[[FATBINOBJ]]"], output: "a.out"
// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
Index: clang/lib/Driver/ToolChains/HIP.h
===================================================================
--- clang/lib/Driver/ToolChains/HIP.h
+++ clang/lib/Driver/ToolChains/HIP.h
@@ -42,6 +42,13 @@
void constructLldCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs, const InputInfo &Output,
const llvm::opt::ArgList &Args) const;
+
+ // Construct command for creating Object from HIP fatbin.
+ void constructGenerateObjFileFromHIPFatBinary(Compilation &C,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const llvm::opt::ArgList &Args,
+ const JobAction &JA) const;
};
} // end namespace AMDGCN
Index: clang/lib/Driver/ToolChains/HIP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -104,6 +104,76 @@
C.addCommand(std::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));
}
+/// Add Generated HIP Object File which has device images embedded into the
+/// host to the argument list for linking. Using MC directives, embed the
+/// device code and also define symbols required by the code generation so that
+/// the image can be retrieved at runtime.
+void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
+ Compilation &C, const InputInfo &Output,
+ const InputInfoList &Inputs, const ArgList &Args,
+ const JobAction &JA) const {
+ const ToolChain &TC = getToolChain();
+ std::string Name =
+ std::string(llvm::sys::path::stem(Output.getFilename()));
+
+ // Create Temp Object File Generator,
+ // Offload Bundled file and Bundled Object file.
+ // Keep them if save-temps is enabled.
+ const char *McinFile;
+ const char *BundleFile;
+ if (C.getDriver().isSaveTempsEnabled()) {
+ McinFile = C.getArgs().MakeArgString(Name + ".mcin");
+ BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
+ } else {
+ auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin");
+ McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin));
+ auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb");
+ BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb));
+ }
+ constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this);
+
+ // Create a buffer to write the contents of the temp obj generator.
+ std::string ObjBuffer;
+ llvm::raw_string_ostream ObjStream(ObjBuffer);
+
+ // Add MC directives to embed target binaries. We ensure that each
+ // section and image is 16-byte aligned. This is not mandatory, but
+ // increases the likelihood of data to be aligned with a cache block
+ // in several main host machines.
+ ObjStream << "# HIP Object Generator\n";
+ ObjStream << "# *** Automatically generated by Clang ***\n";
+ ObjStream << " .type __hip_fatbin,@object\n";
+ ObjStream << " .section .hip_fatbin,\"aMS\",@progbits,1\n";
+ ObjStream << " .data\n";
+ ObjStream << " .globl __hip_fatbin\n";
+ ObjStream << " .p2align 3\n";
+ ObjStream << "__hip_fatbin:\n";
+ ObjStream << " .incbin \"" << BundleFile << "\"\n";
+ ObjStream.flush();
+
+ // Dump the contents of the temp object file gen if the user requested that.
+ // We support this option to enable testing of behavior with -###.
+ if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
+ llvm::errs() << ObjBuffer;
+
+ // Open script file and write the contents.
+ std::error_code EC;
+ llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None);
+
+ if (EC) {
+ C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
+ return;
+ }
+
+ Objf << ObjBuffer;
+
+ ArgStringList McArgs{"-triple", Args.MakeArgString(TC.getTripleString()),
+ "-o", Output.getFilename(),
+ McinFile, "--filetype=obj"};
+ const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc"));
+ C.addCommand(std::make_unique<Command>(JA, *this, Mc, McArgs, Inputs));
+}
+
// For amdgcn the inputs of the linker job are device bitcode and output is
// object file. It calls llvm-link, opt, llc, then lld steps.
void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -111,6 +181,10 @@
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
+ if (Inputs.size() > 0 &&
+ Inputs[0].getType() == types::TY_Image &&
+ JA.getType() == types::TY_Object)
+ return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA);
if (JA.getType() == types::TY_HIP_FATBIN)
return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
Index: clang/lib/Driver/ToolChains/Gnu.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Gnu.cpp
+++ clang/lib/Driver/ToolChains/Gnu.cpp
@@ -625,10 +625,6 @@
}
}
- // Add HIP offloading linker script args if required.
- AddHIPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA,
- *this);
-
Args.AddAllArgs(CmdArgs, options::OPT_T);
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
Index: clang/lib/Driver/ToolChains/CommonArgs.h
===================================================================
--- clang/lib/Driver/ToolChains/CommonArgs.h
+++ clang/lib/Driver/ToolChains/CommonArgs.h
@@ -45,12 +45,6 @@
llvm::opt::ArgStringList &CmdArgs,
const llvm::opt::ArgList &Args);
-void AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
- const InputInfo &Output, const InputInfoList &Inputs,
- const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs, const JobAction &JA,
- const Tool &T);
-
const char *SplitDebugName(const llvm::opt::ArgList &Args,
const InputInfo &Input, const InputInfo &Output);
Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===================================================================
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -152,14 +152,12 @@
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
for (const auto &II : Inputs) {
- // If the current tool chain refers to an OpenMP or HIP offloading host, we
- // should ignore inputs that refer to OpenMP or HIP offloading devices -
+ // If the current tool chain refers to an OpenMP offloading host, we
+ // should ignore inputs that refer to OpenMP offloading devices -
// they will be embedded according to a proper linker script.
if (auto *IA = II.getAction())
if ((JA.isHostOffloading(Action::OFK_OpenMP) &&
- IA->isDeviceOffloading(Action::OFK_OpenMP)) ||
- (JA.isHostOffloading(Action::OFK_HIP) &&
- IA->isDeviceOffloading(Action::OFK_HIP)))
+ IA->isDeviceOffloading(Action::OFK_OpenMP)))
continue;
if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType()))
@@ -1298,115 +1296,6 @@
}
}
-/// Add HIP linker script arguments at the end of the argument list so that
-/// the fat binary is built by embedding the device images into the host. The
-/// linker script also defines a symbol required by the code generation so that
-/// the image can be retrieved at runtime. This should be used only in tool
-/// chains that support linker scripts.
-void tools::AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
- const InputInfo &Output,
- const InputInfoList &Inputs, const ArgList &Args,
- ArgStringList &CmdArgs, const JobAction &JA,
- const Tool &T) {
-
- // If this is not a HIP host toolchain, we don't need to do anything.
- if (!JA.isHostOffloading(Action::OFK_HIP))
- return;
-
- InputInfoList DeviceInputs;
- for (const auto &II : Inputs) {
- const Action *A = II.getAction();
- // Is this a device linking action?
- if (A && isa<LinkJobAction>(A) && A->isDeviceOffloading(Action::OFK_HIP)) {
- DeviceInputs.push_back(II);
- }
- }
-
- if (DeviceInputs.empty())
- return;
-
- // Create temporary linker script. Keep it if save-temps is enabled.
- const char *LKS;
- std::string Name =
- std::string(llvm::sys::path::filename(Output.getFilename()));
- if (C.getDriver().isSaveTempsEnabled()) {
- LKS = C.getArgs().MakeArgString(Name + ".lk");
- } else {
- auto TmpName = C.getDriver().GetTemporaryPath(Name, "lk");
- LKS = C.addTempFile(C.getArgs().MakeArgString(TmpName));
- }
-
- // Add linker script option to the command.
- CmdArgs.push_back("-T");
- CmdArgs.push_back(LKS);
-
- // Create a buffer to write the contents of the linker script.
- std::string LksBuffer;
- llvm::raw_string_ostream LksStream(LksBuffer);
-
- // Get the HIP offload tool chain.
- auto *HIPTC = static_cast<const toolchains::HIPToolChain *>(
- C.getSingleOffloadToolChain<Action::OFK_HIP>());
- assert(HIPTC->getTriple().getArch() == llvm::Triple::amdgcn &&
- "Wrong platform");
- (void)HIPTC;
-
- const char *BundleFile;
- if (C.getDriver().isSaveTempsEnabled()) {
- BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
- } else {
- auto TmpName = C.getDriver().GetTemporaryPath(Name, "hipfb");
- BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpName));
- }
- AMDGCN::constructHIPFatbinCommand(C, JA, BundleFile, DeviceInputs, Args, T);
-
- // Add commands to embed target binaries. We ensure that each section and
- // image is 16-byte aligned. This is not mandatory, but increases the
- // likelihood of data to be aligned with a cache block in several main host
- // machines.
- LksStream << "/*\n";
- LksStream << " HIP Offload Linker Script\n";
- LksStream << " *** Automatically generated by Clang ***\n";
- LksStream << "*/\n";
- LksStream << "TARGET(binary)\n";
- LksStream << "INPUT(" << BundleFile << ")\n";
- LksStream << "SECTIONS\n";
- LksStream << "{\n";
- LksStream << " .hip_fatbin :\n";
- LksStream << " ALIGN(0x10)\n";
- LksStream << " {\n";
- LksStream << " PROVIDE_HIDDEN(__hip_fatbin = .);\n";
- LksStream << " " << BundleFile << "\n";
- LksStream << " }\n";
- LksStream << " /DISCARD/ :\n";
- LksStream << " {\n";
- LksStream << " * ( __CLANG_OFFLOAD_BUNDLE__* )\n";
- LksStream << " }\n";
- LksStream << "}\n";
- LksStream << "INSERT BEFORE .data\n";
- LksStream.flush();
-
- // Dump the contents of the linker script if the user requested that. We
- // support this option to enable testing of behavior with -###.
- if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
- llvm::errs() << LksBuffer;
-
- // If this is a dry run, do not create the linker script file.
- if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
- return;
-
- // Open script file and write the contents.
- std::error_code EC;
- llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::OF_None);
-
- if (EC) {
- C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
- return;
- }
-
- Lksf << LksBuffer;
-}
-
SmallString<128> tools::getStatsFileName(const llvm::opt::ArgList &Args,
const InputInfo &Output,
const InputInfo &Input,
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -2325,8 +2325,11 @@
/// Append top level actions generated by the builder.
virtual void appendTopLevelActions(ActionList &AL) {}
- /// Append linker actions generated by the builder.
- virtual void appendLinkActions(ActionList &AL) {}
+ /// Append linker device actions generated by the builder.
+ virtual void appendLinkDeviceActions(ActionList &AL) {}
+
+ /// Append linker host action generated by the builder.
+ virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; }
/// Append linker actions generated by the builder.
virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
@@ -2796,17 +2799,45 @@
: ABRT_Success;
}
- void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
+ void appendLinkDeviceActions(ActionList &AL) override {
+ if (DeviceLinkerInputs.size() == 0)
+ return;
+
+ assert(DeviceLinkerInputs.size() == GpuArchList.size() &&
+ "Linker inputs and GPU arch list sizes do not match.");
+
// Append a new link action for each device.
unsigned I = 0;
for (auto &LI : DeviceLinkerInputs) {
+ // Each entry in DeviceLinkerInputs corresponds to a GPU arch.
auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
- DA.add(*DeviceLinkAction, *ToolChains[0],
- CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
+ // Linking all inputs for the current GPU arch.
+ // LI contains all the inputs for the linker.
+ OffloadAction::DeviceDependences DeviceLinkDeps;
+ DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0],
+ CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
+ AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
+ DeviceLinkAction->getType()));
++I;
}
+ DeviceLinkerInputs.clear();
+
+ // Create a host object from all the device images by embedding them
+ // in a fat binary.
+ OffloadAction::DeviceDependences DDeps;
+ auto *TopDeviceLinkAction =
+ C.MakeAction<LinkJobAction>(AL, types::TY_Object);
+ DDeps.add(*TopDeviceLinkAction, *ToolChains[0],
+ nullptr, AssociatedOffloadKind);
+
+ // Offload the host object to the host linker.
+ AL.push_back(C.MakeAction<OffloadAction>(DDeps, TopDeviceLinkAction->getType()));
}
+
+ Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); }
+
+ void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
};
/// OpenMP action builder. The host bitcode is passed to the device frontend
@@ -2934,7 +2965,7 @@
OpenMPDeviceActions.clear();
}
- void appendLinkActions(ActionList &AL) override {
+ void appendLinkDeviceActions(ActionList &AL) override {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");
@@ -2953,6 +2984,14 @@
DeviceLinkerInputs.clear();
}
+ Action* appendLinkHostActions(ActionList &AL) override {
+ // Create wrapper bitcode from the result of device link actions and compile
+ // it to an object which will be added to the host link command.
+ auto *BC = C.MakeAction<OffloadWrapperJobAction>(AL, types::TY_LLVM_BC);
+ auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
+ return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
+ }
+
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
bool initialize() override {
@@ -3185,17 +3224,20 @@
for (DeviceActionBuilder *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
- SB->appendLinkActions(DeviceAL);
+ SB->appendLinkDeviceActions(DeviceAL);
}
if (DeviceAL.empty())
return nullptr;
- // Create wrapper bitcode from the result of device link actions and compile
- // it to an object which will be added to the host link command.
- auto *BC = C.MakeAction<OffloadWrapperJobAction>(DeviceAL, types::TY_LLVM_BC);
- auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
- return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
+ // Let builders add host linking actions.
+ Action* HA;
+ for (DeviceActionBuilder *SB : SpecializedBuilders) {
+ if (!SB->isValid())
+ continue;
+ HA = SB->appendLinkHostActions(DeviceAL);
+ }
+ return HA;
}
/// Processes the host linker action. This currently consists of replacing it
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits