yaxunl updated this revision to Diff 134107.
yaxunl added a comment.
Update with Greg's change.
https://reviews.llvm.org/D42800
Files:
include/clang/Basic/Cuda.h
include/clang/Driver/ToolChain.h
lib/Basic/Cuda.cpp
lib/Basic/Targets/AMDGPU.cpp
lib/Basic/Targets/AMDGPU.h
lib/Basic/Targets/NVPTX.cpp
lib/Driver/Driver.cpp
lib/Driver/SanitizerArgs.cpp
lib/Driver/ToolChain.cpp
lib/Driver/ToolChains/Clang.cpp
lib/Driver/ToolChains/Cuda.cpp
lib/Driver/ToolChains/Cuda.h
test/Driver/cuda-phases.cu
Index: test/Driver/cuda-phases.cu
===================================================================
--- test/Driver/cuda-phases.cu
+++ test/Driver/cuda-phases.cu
@@ -7,22 +7,25 @@
// REQUIRES: clang-driver
// REQUIRES: powerpc-registered-target
// REQUIRES: nvptx-registered-target
-
+// REQUIRES: amdgpu-registered-target
//
// Test single gpu architecture with complete compilation.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 %s 2>&1 \
-// RUN: | FileCheck -check-prefix=BIN %s
+// RUN: | FileCheck -check-prefixes=BIN,BIN_NV %s
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=BIN,BIN_AMD %s
// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda)
// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (host-cuda)
// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-cuda)
-// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30)
-// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, cuda-cpp-output, (device-cuda, sm_30)
-// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-cuda, sm_30)
-// BIN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-cuda, sm_30)
-// BIN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-cuda, sm_30)
-// BIN-DAG: [[P8:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P7]]}, object
-// BIN-DAG: [[P9:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P6]]}, assembler
+// BIN_NV-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH:sm_30]])
+// BIN_AMD-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH:gfx803]])
+// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, cuda-cpp-output, (device-cuda, [[ARCH]])
+// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-cuda, [[ARCH]])
+// BIN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-cuda, [[ARCH]])
+// BIN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-cuda, [[ARCH]])
+// BIN-DAG: [[P8:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH]])" {[[P7]]}, object
+// BIN-DAG: [[P9:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH]])" {[[P6]]}, assembler
// BIN-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-cuda)
// BIN-DAG: [[P11:[0-9]+]]: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-cuda (nvptx64-nvidia-cuda)" {[[P10]]}, ir
// BIN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-cuda)
@@ -34,11 +37,13 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 %s -S 2>&1 \
// RUN: | FileCheck -check-prefix=ASM %s
-// ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30)
-// ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, sm_30)
-// ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, sm_30)
-// ASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, sm_30)
-// ASM-DAG: [[P4:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P3]]}, assembler
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 %s -S 2>&1 \
+// RUN: | FileCheck -check-prefix=ASM %s
+// ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH:sm_30|gfx803]])
+// ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, [[ARCH]])
+// ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, [[ARCH]])
+// ASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, [[ARCH]])
+// ASM-DAG: [[P4:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH]])" {[[P3]]}, assembler
// ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda)
// ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, cuda-cpp-output, (host-cuda)
// ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-cuda)
@@ -49,23 +54,25 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
// RUN: | FileCheck -check-prefix=BIN2 %s
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=BIN2 %s
// BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda)
// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (host-cuda)
// BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-cuda)
-// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30)
-// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, cuda-cpp-output, (device-cuda, sm_30)
-// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-cuda, sm_30)
-// BIN2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-cuda, sm_30)
-// BIN2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-cuda, sm_30)
-// BIN2-DAG: [[P8:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P7]]}, object
-// BIN2-DAG: [[P9:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P6]]}, assembler
-// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_35)
-// BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, cuda-cpp-output, (device-cuda, sm_35)
-// BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-cuda, sm_35)
-// BIN2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-cuda, sm_35)
-// BIN2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-cuda, sm_35)
-// BIN2-DAG: [[P15:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {[[P14]]}, object
-// BIN2-DAG: [[P16:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {[[P13]]}, assembler
+// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH1:sm_30|gfx803]])
+// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, cuda-cpp-output, (device-cuda, [[ARCH1]])
+// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-cuda, [[ARCH1]])
+// BIN2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-cuda, [[ARCH1]])
+// BIN2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-cuda, [[ARCH1]])
+// BIN2-DAG: [[P8:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH1]])" {[[P7]]}, object
+// BIN2-DAG: [[P9:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH1]])" {[[P6]]}, assembler
+// BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH2:sm_35|gfx900]])
+// BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, cuda-cpp-output, (device-cuda, [[ARCH2]])
+// BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-cuda, [[ARCH2]])
+// BIN2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-cuda, [[ARCH2]])
+// BIN2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-cuda, [[ARCH2]])
+// BIN2-DAG: [[P15:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH2]])" {[[P14]]}, object
+// BIN2-DAG: [[P16:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH2]])" {[[P13]]}, assembler
// BIN2-DAG: [[P17:[0-9]+]]: linker, {[[P8]], [[P9]], [[P15]], [[P16]]}, cuda-fatbin, (device-cuda)
// BIN2-DAG: [[P18:[0-9]+]]: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-cuda (nvptx64-nvidia-cuda)" {[[P17]]}, ir
// BIN2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-cuda)
@@ -77,16 +84,18 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
// RUN: | FileCheck -check-prefix=ASM2 %s
-// ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30)
-// ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, sm_30)
-// ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, sm_30)
-// ASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, sm_30)
-// ASM2-DAG: [[P4:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P3]]}, assembler
-// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_35)
-// ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, cuda-cpp-output, (device-cuda, sm_35)
-// ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-cuda, sm_35)
-// ASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-cuda, sm_35)
-// ASM2-DAG: [[P9:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {[[P8]]}, assembler
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -S 2>&1 \
+// RUN: | FileCheck -check-prefix=ASM2 %s
+// ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH1:sm_30|gfx803]])
+// ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, [[ARCH1]])
+// ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, [[ARCH1]])
+// ASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, [[ARCH1]])
+// ASM2-DAG: [[P4:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH1]])" {[[P3]]}, assembler
+// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH2:sm_35|gfx900]])
+// ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, cuda-cpp-output, (device-cuda, [[ARCH2]])
+// ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-cuda, [[ARCH2]])
+// ASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-cuda, [[ARCH2]])
+// ASM2-DAG: [[P9:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH2]])" {[[P8]]}, assembler
// ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda)
// ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, cuda-cpp-output, (host-cuda)
// ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-cuda)
@@ -98,6 +107,8 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \
// RUN: | FileCheck -check-prefix=HBIN %s
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 %s --cuda-host-only 2>&1 \
+// RUN: | FileCheck -check-prefix=HBIN %s
// HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda)
// HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (host-cuda)
// HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-cuda)
@@ -110,6 +121,8 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \
// RUN: | FileCheck -check-prefix=HASM %s
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 %s --cuda-host-only -S 2>&1 \
+// RUN: | FileCheck -check-prefix=HASM %s
// HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda)
// HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (host-cuda)
// HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-cuda)
@@ -121,6 +134,8 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \
// RUN: | FileCheck -check-prefix=HBIN2 %s
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only 2>&1 \
+// RUN: | FileCheck -check-prefix=HBIN2 %s
// HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda)
// HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (host-cuda)
// HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-cuda)
@@ -134,6 +149,8 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S 2>&1 \
// RUN: | FileCheck -check-prefix=HASM2 %s
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only -S 2>&1 \
+// RUN: | FileCheck -check-prefix=HASM2 %s
// HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda)
// HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (host-cuda)
// HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-cuda)
@@ -145,57 +162,65 @@
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \
// RUN: | FileCheck -check-prefix=DBIN %s
-// DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30)
-// DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, sm_30)
-// DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, sm_30)
-// DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, sm_30)
-// DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-cuda, sm_30)
-// DBIN-DAG: [[P5:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P4]]}, object
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \
+// RUN: | FileCheck -check-prefix=DBIN %s
+// DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH:sm_30|gfx803]])
+// DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, [[ARCH]])
+// DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, [[ARCH]])
+// DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, [[ARCH]])
+// DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-cuda, [[ARCH]])
+// DBIN-DAG: [[P5:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object
//
// Test single gpu architecture up to the assemble phase in device-only
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \
// RUN: | FileCheck -check-prefix=DASM %s
-// DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30)
-// DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, sm_30)
-// DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, sm_30)
-// DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, sm_30)
-// DASM-DAG: [[P4:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P3]]}, assembler
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 %s --cuda-device-only -S 2>&1 \
+// RUN: | FileCheck -check-prefix=DASM %s
+// DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH:sm_30|gfx803]])
+// DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, [[ARCH]])
+// DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, [[ARCH]])
+// DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, [[ARCH]])
+// DASM-DAG: [[P4:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH]])" {[[P3]]}, assembler
//
// Test two gpu architectures with complete compilation in device-only
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
// RUN: | FileCheck -check-prefix=DBIN2 %s
-// DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30)
-// DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, sm_30)
-// DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, sm_30)
-// DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, sm_30)
-// DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-cuda, sm_30)
-// DBIN2-DAG: [[P5:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P4]]}, object
-// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_35)
-// DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, cuda-cpp-output, (device-cuda, sm_35)
-// DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-cuda, sm_35)
-// DBIN2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-cuda, sm_35)
-// DBIN2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-cuda, sm_35)
-// DBIN2-DAG: [[P11:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {[[P10]]}, object
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only 2>&1 \
+// RUN: | FileCheck -check-prefix=DBIN2 %s
+// DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH:sm_30|gfx803]])
+// DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, [[ARCH]])
+// DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, [[ARCH]])
+// DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, [[ARCH]])
+// DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-cuda, [[ARCH]])
+// DBIN2-DAG: [[P5:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object
+// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH2:sm_35|gfx900]])
+// DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, cuda-cpp-output, (device-cuda, [[ARCH2]])
+// DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-cuda, [[ARCH2]])
+// DBIN2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-cuda, [[ARCH2]])
+// DBIN2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-cuda, [[ARCH2]])
+// DBIN2-DAG: [[P11:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH2]])" {[[P10]]}, object
//
// Test two gpu architectures up to the assemble phase in device-only
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
// RUN: | FileCheck -check-prefix=DASM2 %s
-// DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30)
-// DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, sm_30)
-// DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, sm_30)
-// DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, sm_30)
-// DASM2-DAG: [[P4:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P3]]}, assembler
-// DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_35)
-// DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, cuda-cpp-output, (device-cuda, sm_35)
-// DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-cuda, sm_35)
-// DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-cuda, sm_35)
-// DASM2-DAG: [[P9:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {[[P8]]}, assembler
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -S 2>&1 \
+// RUN: | FileCheck -check-prefix=DASM2 %s
+// DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH:sm_30|gfx803]])
+// DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, [[ARCH]])
+// DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, [[ARCH]])
+// DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, [[ARCH]])
+// DASM2-DAG: [[P4:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH]])" {[[P3]]}, assembler
+// DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, [[ARCH2:sm_35|gfx900]])
+// DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, cuda-cpp-output, (device-cuda, [[ARCH2]])
+// DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-cuda, [[ARCH2]])
+// DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-cuda, [[ARCH2]])
+// DASM2-DAG: [[P9:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:[[ARCH2]])" {[[P8]]}, assembler
Index: lib/Driver/ToolChains/Cuda.h
===================================================================
--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -12,6 +12,7 @@
#include "clang/Basic/Cuda.h"
#include "clang/Basic/VersionTuple.h"
+#include "clang/Basic/DebugInfoOptions.h"
#include "clang/Driver/Action.h"
#include "clang/Driver/Multilib.h"
#include "clang/Driver/ToolChain.h"
@@ -30,6 +31,7 @@
private:
const Driver &D;
bool IsValid = false;
+ bool UseOpenHeaders = false;
CudaVersion Version = CudaVersion::UNKNOWN;
std::string InstallPath;
std::string BinPath;
@@ -57,6 +59,7 @@
/// \brief Check whether we detected a valid Cuda install.
bool isValid() const { return IsValid; }
+ bool useOpenHeaders() const { return UseOpenHeaders; }
/// \brief Print information about the detected CUDA installation.
void print(raw_ostream &OS) const;
@@ -81,6 +84,20 @@
namespace tools {
namespace NVPTX {
+// for amdgcn the backend is llvm-link + opt
+class LLVM_LIBRARY_VISIBILITY Backend : public Tool {
+ public:
+ Backend(const ToolChain &TC)
+ : Tool("NVPTX::Backend", "GPU-backend", TC, RF_Full, llvm::sys::WEM_UTF8,
+ "--options-file") {}
+ virtual bool hasIntegratedCPP() const override { return false; }
+ virtual void ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const llvm::opt::ArgList &TCArgs,
+ const char *LinkingOutput) const override;
+};
+
// Run ptxas, the NVPTX assembler.
class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
public:
@@ -127,6 +144,16 @@
};
} // end namespace NVPTX
+
+void addBCLib(Compilation &C, const llvm::opt::ArgList &Args,
+ llvm::opt::ArgStringList &CmdArgs,
+ llvm::opt::ArgStringList LibraryPaths,
+ const char *BCName) ;
+
+void addEnvListWithSpaces(const llvm::opt::ArgList &Args,
+ llvm::opt::ArgStringList &CmdArgs,
+ const char *EnvVar) ;
+
} // end namespace tools
namespace toolchains {
@@ -184,6 +211,7 @@
CudaInstallationDetector CudaInstallation;
protected:
+ Tool *buildBackend() const override; // for amdgcn, link and opt
Tool *buildAssembler() const override; // ptxas
Tool *buildLinker() const override; // fatbinary (ok, not really a linker)
Index: lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -221,6 +221,49 @@
IsValid = true;
break;
}
+
+ // Search for GCN Device Libraries
+ std::string GCNPath;
+ for (Arg *A : Args) {
+ if( A->getOption().matches(options::OPT_cuda_gpu_arch_EQ) &&
+ StringRef(A->getValue()).startswith("gfx") ) {
+ SmallVector<std::string, 4> GCNPathCandidates;
+ if (const char *libamdgcn = getenv("LIBAMDGCN"))
+ GCNPathCandidates.push_back(D.SysRoot + libamdgcn);
+ else
+ GCNPathCandidates.push_back(D.SysRoot + "/opt/rocm/libamdgcn");
+ for (const auto &CPath : GCNPathCandidates) {
+ if (CPath.empty() || !D.getVFS().exists(CPath))
+ continue;
+ GCNPath = CPath;
+ }
+ break;
+ }
+ }
+
+ // The directory names of GCN device libraries are the gfxnames.
+ // e.g. /opt/rocm/libamdgcn/gfx701
+ bool no_cuda_install = !IsValid;
+ if (! GCNPath.empty()) {
+ auto &FS = D.getVFS();
+ std::error_code EC;
+ for (llvm::sys::fs::directory_iterator LI(GCNPath, EC), LE;
+ !EC && LI != LE; LI = LI.increment(EC)) {
+ StringRef Dirname = LI->path();
+ StringRef GCNname = Dirname.rsplit('/').second;
+ if(GCNname.startswith("gfx")) {
+ std::string OCLFilePath = Dirname.str() + "/lib/opencl.amdgcn.bc";
+ if (FS.exists(OCLFilePath)) {
+ if (no_cuda_install)
+ UseOpenHeaders=true;
+ LibDeviceMap[GCNname.str()] = OCLFilePath;
+ IsValid = true;
+ }
+ }
+ continue;
+ }
+ }
+
}
void CudaInstallationDetector::AddCudaIncludeArgs(
@@ -246,7 +289,12 @@
CC1Args.push_back("-internal-isystem");
CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
CC1Args.push_back("-include");
- CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
+ if (useOpenHeaders()) {
+ CC1Args.push_back("__clang_cuda_runtime_wrapper_open.h");
+ CC1Args.push_back("-D__USE_OPEN_HEADERS__");
+ } else {
+ CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
+ }
}
void CudaInstallationDetector::CheckCudaVersionSupportsArch(
@@ -272,14 +320,157 @@
<< CudaVersionToString(Version) << "\n";
}
+void tools::addBCLib(Compilation &C, const ArgList &Args,
+ ArgStringList &CmdArgs, ArgStringList LibraryPaths, const char *BCName) {
+ std::string FullName;
+ bool FoundLibDevice = false;
+ for (std::string LibraryPath : LibraryPaths) {
+ LibraryPath = LibraryPath.substr(2, LibraryPath.length() - 2); // -L
+ FullName = Args.MakeArgString(LibraryPath + "/" + BCName );
+ if (llvm::sys::fs::exists( FullName.c_str())) {
+ FoundLibDevice = true;
+ break;
+ }
+ }
+ if (!FoundLibDevice)
+ C.getDriver().Diag(diag::err_drv_no_such_file) << BCName ;
+ CmdArgs.push_back(Args.MakeArgString(FullName));
+}
+
+void tools::addEnvListWithSpaces(const ArgList &Args, ArgStringList &CmdArgs,
+ const char *EnvVar) {
+ const char *DirList = ::getenv(EnvVar);
+ if (!DirList) return;
+ StringRef Dirs(DirList);
+ if (Dirs.empty()) return;
+ StringRef::size_type Delim;
+ while ((Delim = Dirs.find(" ")) != StringRef::npos) {
+ if (Delim != 0)
+ CmdArgs.push_back(Args.MakeArgString(Dirs.substr(0, Delim)));
+ Dirs = Dirs.substr(Delim + 1); // Trim front space
+ }
+ if (!Dirs.empty()) // Last arg may have no spaces
+ CmdArgs.push_back(Args.MakeArgString(Dirs));
+}
+
+void NVPTX::Backend::ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const ArgList &Args,
+ const char *LinkingOutput) const {
+
+ assert(StringRef(JA.getOffloadingArch()).startswith("gfx") &&
+ " unless gfx processor, backend should be clang") ;
+
+ // For amdgcn the Backend Job will call 3.9 llvm-link & opt steps
+ ArgStringList CmdArgs;
+ // Add the input bc's created by compile step
+ for (InputInfoList::const_iterator
+ it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
+ const InputInfo &II = *it;
+ CmdArgs.push_back(II.getFilename());
+ }
+
+ std::string GFXNAME = JA.getOffloadingArch();
+
+ ArgStringList LibraryPaths;
+
+ // Always search for bc libs in libamdgcn first
+ const char * libamdgcn;
+ libamdgcn = getenv("LIBAMDGCN");
+ if (!libamdgcn) libamdgcn = "/opt/rocm/libamdgcn";
+ LibraryPaths.push_back(Args.MakeArgString(
+ "-L" + std::string(libamdgcn) + "/" + std::string(GFXNAME) + "/lib"));
+
+ // Find in -L<path> and LIBRARY_PATH.
+ for (auto Arg : Args) {
+ if (Arg->getSpelling() == "-L") {
+ std::string Current = "-L";
+ Current += Arg->getValue();
+ LibraryPaths.push_back(Args.MakeArgString(Current.c_str()));
+ }
+ }
+ addDirectoryList(Args, LibraryPaths, "-L", "LIBRARY_PATH");
+
+ LibraryPaths.push_back(
+ Args.MakeArgString("-L" + C.getDriver().Dir + "/../lib/libdevice"));
+ addBCLib(C, Args, CmdArgs, LibraryPaths,
+ Args.MakeArgString("libicuda2gcn-" + std::string(GFXNAME) + ".bc"));
+
+ addBCLib(C, Args, CmdArgs, LibraryPaths, "cuda2gcn.amdgcn.bc");
+ addBCLib(C, Args, CmdArgs, LibraryPaths, "opencl.amdgcn.bc");
+ addBCLib(C, Args, CmdArgs, LibraryPaths, "ockl.amdgcn.bc");
+ addBCLib(C, Args, CmdArgs, LibraryPaths, "irif.amdgcn.bc");
+ addBCLib(C, Args, CmdArgs, LibraryPaths, "ocml.amdgcn.bc");
+ addBCLib(C, Args, CmdArgs, LibraryPaths, "oclc_finite_only_off.amdgcn.bc");
+ addBCLib(C, Args, CmdArgs, LibraryPaths, "oclc_daz_opt_off.amdgcn.bc");
+ addBCLib(C, Args, CmdArgs, LibraryPaths,
+ "oclc_correctly_rounded_sqrt_on.amdgcn.bc");
+ addBCLib(C, Args, CmdArgs, LibraryPaths, "oclc_unsafe_math_off.amdgcn.bc");
+ addBCLib(C, Args, CmdArgs, LibraryPaths, "hc.amdgcn.bc");
+ addBCLib(C, Args, CmdArgs, LibraryPaths, "oclc_isa_version.amdgcn.bc");
+
+ addEnvListWithSpaces(Args, CmdArgs, "CLANG_TARGET_LINK_OPTS");
+ CmdArgs.push_back("-suppress-warnings");
+
+ // Add an intermediate output file which is input to opt
+ CmdArgs.push_back("-o");
+ std::string TmpName = C.getDriver().GetTemporaryPath("OPT_INPUT", "bc");
+ const char *ResultingBitcodeF =
+ C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
+ CmdArgs.push_back(ResultingBitcodeF);
+ const char *Exec = Args.MakeArgString(C.getDriver().Dir + "/llvm-link");
+ C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+
+ ArgStringList OptArgs;
+ // The input to opt is the output from llvm-link
+ OptArgs.push_back(ResultingBitcodeF);
+ // Add CLANG_TARGETOPT_OPTS override options to opt
+ if (getenv("CLANG_TARGET_OPT_OPTS"))
+ addEnvListWithSpaces(Args, OptArgs, "CLANG_TARGET_OPT_OPTS");
+ else {
+ if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
+ StringRef OOpt = "3";
+ if (A->getOption().matches(options::OPT_O4) ||
+ A->getOption().matches(options::OPT_Ofast))
+ OOpt = "3";
+ else if (A->getOption().matches(options::OPT_O0))
+ OOpt = "0";
+ else if (A->getOption().matches(options::OPT_O)) {
+ // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
+ OOpt = llvm::StringSwitch<const char *>(A->getValue())
+ .Case("1", "1")
+ .Case("2", "2")
+ .Case("3", "3")
+ .Case("s", "2")
+ .Case("z", "2")
+ .Default("2");
+ }
+ OptArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
+ }
+ OptArgs.push_back("-S");
+ const char *mcpustr = Args.MakeArgString("-mcpu=" + GFXNAME);
+ OptArgs.push_back(mcpustr);
+ OptArgs.push_back("-dce");
+ OptArgs.push_back("-sroa");
+ OptArgs.push_back("-globaldce");
+ }
+ OptArgs.push_back("-o");
+ OptArgs.push_back(Output.getFilename());
+ const char *OptExec = Args.MakeArgString(C.getDriver().Dir + "/opt");
+ C.addCommand(llvm::make_unique<Command>(JA, *this, OptExec, OptArgs, Inputs));
+
+}
+
void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const auto &TC =
static_cast<const toolchains::CudaToolChain &>(getToolChain());
- assert(TC.getTriple().isNVPTX() && "Wrong platform");
+ assert((TC.getTriple().isNVPTX() ||
+ TC.getTriple().getArch()==llvm::Triple::amdgcn) && "Wrong platform");
StringRef GPUArchName;
// If this is an OpenMP action we need to extract the device architecture
@@ -296,6 +487,41 @@
assert(gpu_arch != CudaArch::UNKNOWN &&
"Device action expected to have an architecture.");
+ // For amdgcn this job will call llc (Lightning Compiler)
+ if (StringRef(JA.getOffloadingArch()).startswith("gfx")) {
+ ArgStringList CmdArgs;
+ for (InputInfoList::const_iterator
+ it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) {
+ const InputInfo &II = *it;
+ CmdArgs.push_back(II.getFilename());
+ }
+ CmdArgs.push_back("-mtriple=amdgcn--cuda");
+ CmdArgs.push_back("-filetype=obj");
+ addEnvListWithSpaces(Args, CmdArgs, "CLANG_TARGET_LLC_OPTS");
+ std::string GFXNAME = JA.getOffloadingArch();
+ CmdArgs.push_back(Args.MakeArgString("-mcpu=" + GFXNAME));
+ CmdArgs.push_back("-o");
+ std::string TmpName = C.getDriver().GetTemporaryPath("LC_OUTPUT", "o");
+ const char *llcOutputFile = C.addTempFile(
+ C.getArgs().MakeArgString(TmpName.c_str()));
+ CmdArgs.push_back(llcOutputFile);
+ const char *Exec = Args.MakeArgString(C.getDriver().Dir + "/llc");
+ C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+
+ ArgStringList CmdArgs2;
+ CmdArgs2.push_back("-flavor");
+ CmdArgs2.push_back("gnu");
+ CmdArgs2.push_back("--no-undefined");
+ CmdArgs2.push_back("-shared");
+ // The output from ld.lld is an HSA code object file
+ CmdArgs2.push_back("-o");
+ CmdArgs2.push_back(Output.getFilename());
+ CmdArgs2.push_back(llcOutputFile);
+ const char *lld = Args.MakeArgString(C.getDriver().Dir + "/lld");
+ C.addCommand(llvm::make_unique<Command>(JA, *this, lld, CmdArgs2, Inputs));
+ return;
+ }
+
// Check that our installation's ptxas supports gpu_arch.
if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
@@ -386,7 +612,123 @@
const char *LinkingOutput) const {
const auto &TC =
static_cast<const toolchains::CudaToolChain &>(getToolChain());
- assert(TC.getTriple().isNVPTX() && "Wrong platform");
+ assert((TC.getTriple().isNVPTX() ||
+ TC.getTriple().getArch()==llvm::Triple::amdgcn) && "Wrong platform");
+
+ // This job builds composite cubin file from each output of the assemble step
+ // There are 3 scenarios and the command(s) needed for each.
+ // 1 mixed targets - run "fatinary" and clang tool "clang-fixup-fatbin"
+ // 2 only amdgpu targets - Run clang tool "clang-assemble-fatbin"
+ // 3 only nvptx targets - Run cuda SDK "fatbinary" program
+ bool found_amdgcn=false;
+ bool found_nvptx=false;
+ for (const auto& II : Inputs) {
+ if(StringRef(II.getAction()->getOffloadingArch()).startswith("gfx"))
+ found_amdgcn=true;
+ else
+ found_nvptx=true;
+ }
+
+ // 1 Mixed targets, need fatbinary and clang-fixup-fatbin commands -----------
+ if (found_amdgcn && found_nvptx) {
+
+ const char *fbOutputFile ;
+ ArgStringList CmdArgs;
+ CmdArgs.push_back("--cuda");
+ CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
+ CmdArgs.push_back(Args.MakeArgString("--create"));
+ std::string TmpName = C.getDriver().GetTemporaryPath("FB_FIXUP", "fatbin");
+ fbOutputFile = C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
+ CmdArgs.push_back(fbOutputFile);
+
+ for (const auto& II : Inputs) {
+ auto *A = II.getAction();
+ if(StringRef(A->getOffloadingArch()).startswith("gfx")) {
+ if (II.getType() != types::TY_PP_Asm) {
+ CmdArgs.push_back(Args.MakeArgString("--no-asm"));
+ // LIE to avoid unknown profile in fatbinary
+ CmdArgs.push_back(Args.MakeArgString(
+ llvm::Twine("--image=profile=sm_37,file=") + + II.getFilename()));
+ }
+ } else {
+ assert(A->getInputs().size() == 1 &&
+ "Device offload action is expected to have a single input");
+ const char *gpu_arch_str = A->getOffloadingArch();
+ assert(gpu_arch_str &&
+ "Device action expected to have associated a GPU architecture!");
+ CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
+
+ // We need to pass an Arch of the form "sm_XX" for cubin files and
+ // "compute_XX" for ptx.
+ const char *Arch =
+ (II.getType() == types::TY_PP_Asm)
+ ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
+ : gpu_arch_str;
+ CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
+ Arch + ",file=" + II.getFilename()));
+ }
+ }
+
+ for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
+ CmdArgs.push_back(Args.MakeArgString(A));
+
+ const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
+ C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+
+ // Call clang-fixup-fatbin to correct the sm_37 labels to real gfx labels
+ std::string subarchs = "-offload-archs=";
+ bool first = true;
+ for (const auto& II : Inputs)
+ if (II.getType() != types::TY_PP_Asm) {
+ if (first) {
+ subarchs = subarchs +
+ StringRef(II.getAction()->getOffloadingArch()).str();
+ first = false ;
+ } else {
+ subarchs = subarchs + "," +
+ StringRef(II.getAction()->getOffloadingArch()).str();
+ }
+ }
+ ArgStringList CmdArgs2;
+ CmdArgs2.push_back(Args.MakeArgString(subarchs));
+ CmdArgs2.push_back(fbOutputFile);
+ CmdArgs2.push_back(Args.MakeArgString(Output.getFilename()));
+ const char *Exec2 = Args.MakeArgString(C.getDriver().Dir +
+ "/clang-fixup-fatbin");
+ C.addCommand(llvm::make_unique<Command>(JA, *this, Exec2, CmdArgs2,
+ Inputs));
+ return;
+ }
+
+ // 2 Only amdgcn targets, call clang-assemble-fatbin ------------------------
+ if (found_amdgcn) {
+
+ ArgStringList CmdArgs;
+ std::string subarchs = "-offload-archs=";
+ bool first = true;
+ for (const auto& II : Inputs) {
+ if (II.getType() != types::TY_PP_Asm) {
+ if (first) {
+ subarchs = subarchs +
+ StringRef(II.getAction()->getOffloadingArch()).str();
+ first = false ;
+ } else {
+ subarchs = subarchs + "," +
+ StringRef(II.getAction()->getOffloadingArch()).str();
+ }
+ CmdArgs.push_back(II.getFilename());
+ }
+ }
+ CmdArgs.push_back(Args.MakeArgString(subarchs));
+ CmdArgs.push_back("-o");
+ CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
+ const char *Exec = Args.MakeArgString(C.getDriver().Dir +
+ "/clang-assemble-fatbin");
+ C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+ return;
+
+ }
+ // 3 Only nvptx targets found, so just call fatbinary ------------------------
ArgStringList CmdArgs;
CmdArgs.push_back("--cuda");
@@ -566,6 +908,10 @@
return;
}
+ // Do not add -link-cuda-bitcode or ptx42 features if gfx
+ if (GpuArch.startswith("gfx"))
+ return;
+
CC1Args.push_back("-mlink-cuda-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
@@ -670,6 +1016,10 @@
return DAL;
}
+Tool *CudaToolChain::buildBackend() const {
+ return new tools::NVPTX::Backend(*this);
+}
+
Tool *CudaToolChain::buildAssembler() const {
return new tools::NVPTX::Assembler(*this);
}
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -2322,9 +2322,10 @@
ArgStringList &CmdArgs, bool KernelOrKext) {
const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();
- // NVPTX doesn't support stack protectors; from the compiler's perspective, it
- // doesn't even have a stack!
- if (EffectiveTriple.isNVPTX())
+ // NVPTX and GCN don't support stack protectors; from the compiler's
+ // perspective, it doesn't even have a stack!
+ if (EffectiveTriple.isNVPTX() ||
+ EffectiveTriple.getArch()==llvm::Triple::amdgcn)
return;
// -stack-protector=0 is default.
@@ -3065,7 +3066,14 @@
const ArgList &Args, const char *LinkingOutput) const {
const llvm::Triple &RawTriple = getToolChain().getTriple();
const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
- const std::string &TripleStr = Triple.getTriple();
+
+ bool Is_amdgcn = StringRef(JA.getOffloadingArch()).startswith("gfx") ||
+ (getToolChain().getArch() == llvm::Triple::amdgcn) ;
+ // Currently cuda driver only support offload triple nvptx64-nvidia-cuda.
+ // Switch this from nvptx to amdgcn iff the subarch is a gfx processor.
+ const std::string &TripleStr = Is_amdgcn &&
+ (JA.isOffloading(Action::OFK_Cuda) || JA.isOffloading(Action::OFK_OpenMP))
+ ? "amdgcn--cuda" : Triple.getTriple();
bool KernelOrKext =
Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
@@ -3477,7 +3485,8 @@
// Enable -mconstructor-aliases except on darwin, where we have to work around
// a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
// aliases aren't supported.
- if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
+ if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() &&
+ RawTriple.getArch()!=llvm::Triple::amdgcn)
CmdArgs.push_back("-mconstructor-aliases");
// Darwin's kernel doesn't support guard variables; just die if we
Index: lib/Driver/ToolChain.cpp
===================================================================
--- lib/Driver/ToolChain.cpp
+++ lib/Driver/ToolChain.cpp
@@ -241,6 +241,10 @@
return Clang.get();
}
+Tool *ToolChain::buildBackend() const {
+ return new tools::Clang(*this);
+}
+
Tool *ToolChain::buildAssembler() const {
return new tools::ClangAs(*this);
}
@@ -255,6 +259,12 @@
return Assemble.get();
}
+Tool *ToolChain::getBackend() const {
+ if(!Backend)
+ Backend.reset(buildBackend());
+ return Backend.get();
+}
+
Tool *ToolChain::getClangAs() const {
if (!Assemble)
Assemble.reset(new tools::ClangAs(*this));
@@ -295,8 +305,9 @@
case Action::AnalyzeJobClass:
case Action::MigrateJobClass:
case Action::VerifyPCHJobClass:
- case Action::BackendJobClass:
return getClang();
+ case Action::BackendJobClass:
+ return getBackend();
case Action::OffloadBundlingJobClass:
case Action::OffloadUnbundlingJobClass:
@@ -390,8 +401,21 @@
}
Tool *ToolChain::SelectTool(const JobAction &JA) const {
- if (getDriver().ShouldUseClangCompiler(JA)) return getClang();
Action::ActionClass AC = JA.getKind();
+ // The amdgcn Backend needs buildBackend()
+ //if ( StringRef(JA.getOffloadingArch()).startswith("gfx") &&
+ if((JA.isOffloading(Action::OFK_Cuda) ||
+ JA.isOffloading(Action::OFK_OpenMP)) &&
+ (StringRef(JA.getOffloadingArch()).startswith("gfx") ||
+ (getTriple().getArch() == llvm::Triple::amdgcn)) &&
+ (AC == Action::BackendJobClass) ) {
+ if ( (Args.hasArg(options::OPT_emit_llvm)) ||
+ (Args.hasArg(options::OPT_emit_llvm_bc)) )
+ return getClang(); // Dont run amdgcn backend if we just want LLVM IR
+ else
+ return getTool(AC);
+ };
+ if (getDriver().ShouldUseClangCompiler(JA)) return getClang();
if (AC == Action::AssembleJobClass && useIntegratedAs())
return getClangAs();
return getTool(AC);
Index: lib/Driver/SanitizerArgs.cpp
===================================================================
--- lib/Driver/SanitizerArgs.cpp
+++ lib/Driver/SanitizerArgs.cpp
@@ -720,7 +720,8 @@
// NVPTX doesn't currently support sanitizers. Bailing out here means that
// e.g. -fsanitize=address applies only to host code, which is what we want
// for now.
- if (TC.getTriple().isNVPTX())
+ if (TC.getTriple().isNVPTX() ||
+ TC.getTriple().getArch()==llvm::Triple::amdgcn)
return;
// Translate available CoverageFeatures to corresponding clang-cc1 flags.
Index: lib/Driver/Driver.cpp
===================================================================
--- lib/Driver/Driver.cpp
+++ lib/Driver/Driver.cpp
@@ -2301,7 +2301,8 @@
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
assert(HostTC && "No toolchain for host compilation.");
- if (HostTC->getTriple().isNVPTX()) {
+ if (HostTC->getTriple().isNVPTX() ||
+ HostTC->getTriple().getArch()==llvm::Triple::amdgcn) {
// We do not support targeting NVPTX for host compilation. Throw
// an error and abort pipeline construction early so we don't trip
// asserts that assume device-side compilation.
@@ -3198,6 +3199,9 @@
bool SaveTemps;
bool EmbedBitcode;
+ /// Type of the input file for the tool
+ types::ID InputType;
+
/// Get previous dependent action or null if that does not exist. If
/// \a CanBeCollapsed is false, that action must be legal to collapse or
/// null will be returned.
@@ -3255,6 +3259,8 @@
bool canCollapsePreprocessorAction() const {
return !C.getArgs().hasArg(options::OPT_no_integrated_cpp) &&
!C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps &&
+ (InputType != types::TY_LLVM_IR) &&
+ (InputType != types::TY_LLVM_BC) &&
!C.getArgs().hasArg(options::OPT_rewrite_objc);
}
@@ -3300,6 +3306,14 @@
if (!AJ || !BJ || !CJ)
return nullptr;
+ // Cannot combine compilation with backend for amdgcn backend
+ if(( AJ->isOffloading(Action::OFK_Cuda) ||
+ AJ->isOffloading(Action::OFK_OpenMP)) &&
+ (StringRef(AJ->getOffloadingArch()).startswith("gfx") ||
+ TC.getTriple().getArch() == llvm::Triple::amdgcn)) {
+ return nullptr;
+ }
+
// Get compiler tool.
const Tool *T = TC.SelectTool(*CJ);
if (!T)
@@ -3331,6 +3345,14 @@
if (!AJ || !BJ)
return nullptr;
+ // Cannot combine assemble with backend for amdgcn backend
+ if(( AJ->isOffloading(Action::OFK_Cuda) ||
+ AJ->isOffloading(Action::OFK_OpenMP)) &&
+ (StringRef(AJ->getOffloadingArch()).startswith("gfx") ||
+ TC.getTriple().getArch() == llvm::Triple::amdgcn)) {
+ return nullptr;
+ }
+
// Retrieve the compile job, backend action must always be preceded by one.
ActionList CompileJobOffloadActions;
auto *CJ = getPrevDependentAction(BJ->getInputs(), CompileJobOffloadActions,
@@ -3364,6 +3386,20 @@
if (!BJ || !CJ)
return nullptr;
+ // Cannot combine compilation with backend for amdgcn backend
+ if((BJ->isOffloading(Action::OFK_Cuda) ||
+ BJ->isOffloading(Action::OFK_OpenMP)) &&
+ (StringRef(BJ->getOffloadingArch()).startswith("gfx") ||
+ TC.getTriple().getArch() == llvm::Triple::amdgcn)) {
+ // It is necessary to combine when generating IR for compile-only with
+ // flags "-c -S -emit-llvm". If only flags "-c -S" the gcn backend is
+ // needed to generate linked and opt IR for llc, so do not combine.
+ if( ! ( C.getArgs().hasArg(options::OPT_c) &&
+ C.getArgs().hasArg(options::OPT_S) &&
+ C.getArgs().hasArg(options::OPT_emit_llvm)) )
+ return nullptr;
+ }
+
// Get compiler tool.
const Tool *T = TC.SelectTool(*CJ);
if (!T)
@@ -3407,6 +3443,14 @@
EmbedBitcode(EmbedBitcode) {
assert(BaseAction && "Invalid base action.");
IsHostSelector = BaseAction->getOffloadingDeviceKind() == Action::OFK_None;
+ // Store the InputType to check if Compile and Backend can collapse
+ for(Arg* A : C.getInputArgs()) {
+ if (A->getOption().getKind() == Option::InputClass) {
+ const char *Value = A->getValue();
+ if (const char *Ext = strrchr(Value, '.'))
+ InputType = TC.LookupTypeForExtension(Ext + 1);
+ }
+ }
}
/// Check if a chain of actions can be combined and return the tool that can
@@ -3907,7 +3951,10 @@
JA.getType() == types::TY_LLVM_BC)
Suffixed += ".tmp";
Suffixed += '.';
- Suffixed += Suffix;
+ if (((StringRef)BaseInput).endswith(".a"))
+ Suffixed += "a";
+ else
+ Suffixed += Suffix;
NamedOutput = C.getArgs().MakeArgString(Suffixed.c_str());
}
Index: lib/Basic/Targets/NVPTX.cpp
===================================================================
--- lib/Basic/Targets/NVPTX.cpp
+++ lib/Basic/Targets/NVPTX.cpp
@@ -190,6 +190,23 @@
return "700";
case CudaArch::SM_72:
return "720";
+ case CudaArch::GFX600:
+ case CudaArch::GFX601:
+ case CudaArch::GFX700:
+ case CudaArch::GFX701:
+ case CudaArch::GFX702:
+ case CudaArch::GFX703:
+ case CudaArch::GFX704:
+ case CudaArch::GFX800:
+ case CudaArch::GFX801:
+ case CudaArch::GFX802:
+ case CudaArch::GFX803:
+ case CudaArch::GFX810:
+ case CudaArch::GFX900:
+ case CudaArch::GFX901:
+ case CudaArch::GFX902:
+ return "320";
+
}
llvm_unreachable("unhandled CudaArch");
}();
Index: lib/Basic/Targets/AMDGPU.h
===================================================================
--- lib/Basic/Targets/AMDGPU.h
+++ lib/Basic/Targets/AMDGPU.h
@@ -14,6 +14,7 @@
#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
#define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
+#include "clang/Basic/Cuda.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "llvm/ADT/StringSet.h"
@@ -143,6 +144,8 @@
return TT.getArch() == llvm::Triple::amdgcn;
}
+ CudaArch GCN_Subarch;
+
static bool isGenericZero(const llvm::Triple &TT) { return true; }
public:
@@ -292,7 +295,7 @@
GPU = parseAMDGCNName(Name);
else
GPU = parseR600Name(Name);
-
+ GCN_Subarch = StringToCudaArch(Name);
return GPU != GK_NONE;
}
@@ -377,6 +380,7 @@
// address space has value 0 but in private and local address space has
// value ~0.
uint64_t getNullPointerValue(LangAS AS) const override {
+ if(getTriple().getOS()==llvm::Triple::CUDA) return 0;
return AS == LangAS::opencl_local ? ~0 : 0;
}
};
Index: lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- lib/Basic/Targets/AMDGPU.cpp
+++ lib/Basic/Targets/AMDGPU.cpp
@@ -293,6 +293,7 @@
: DataLayoutStringSIPrivateIsZero)
: DataLayoutStringR600);
assert(DataLayout->getAllocaAddrSpace() == AS.Private);
+ GCN_Subarch = CudaArch::GFX803; // Default to fiji
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
Triple.getEnvironment() == llvm::Triple::OpenCL ||
@@ -324,15 +325,96 @@
void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
- if (getTriple().getArch() == llvm::Triple::amdgcn)
- Builder.defineMacro("__AMDGCN__");
- else
+ if (getTriple().getArch() == llvm::Triple::amdgcn) {
+ if(getTriple().getOS() == llvm::Triple::CUDA) {
+ std::string GFXProcCode = [this] {
+ switch (GCN_Subarch) {
+ case CudaArch::SM_20: return "000";
+ case CudaArch::SM_21: return "000";
+ case CudaArch::SM_30: return "000";
+ case CudaArch::SM_32: return "000";
+ case CudaArch::SM_35: return "000";
+ case CudaArch::SM_37: return "000";
+ case CudaArch::SM_50: return "000";
+ case CudaArch::SM_52: return "000";
+ case CudaArch::SM_53: return "000";
+ case CudaArch::SM_60: return "000";
+ case CudaArch::SM_61: return "000";
+ case CudaArch::SM_62: return "000";
+ case CudaArch::SM_70: return "000";
+ case CudaArch::SM_72: return "000";
+ case CudaArch::GFX600: return "600";
+ case CudaArch::GFX601: return "601";
+ case CudaArch::GFX700: return "700";
+ case CudaArch::GFX701: return "701";
+ case CudaArch::GFX702: return "702";
+ case CudaArch::GFX703: return "703";
+ case CudaArch::GFX704: return "704";
+ case CudaArch::GFX800: return "800";
+ case CudaArch::GFX801: return "801";
+ case CudaArch::GFX802: return "802";
+ case CudaArch::GFX803: return "803";
+ case CudaArch::GFX810: return "810";
+ case CudaArch::GFX900: return "900";
+ case CudaArch::GFX901: return "901";
+ case CudaArch::GFX902: return "902";
+ case CudaArch::UNKNOWN:;
+ case CudaArch::LAST:;
+ }
+ llvm_unreachable("unhandled GFX processor");
+ }();
+ Builder.defineMacro("__AMDGCN__", GFXProcCode);
+ } else {
+ Builder.defineMacro("__AMDGCN__");
+ }
+ } else
Builder.defineMacro("__R600__");
if (hasFMAF)
Builder.defineMacro("__HAS_FMAF__");
if (hasLDEXPF)
Builder.defineMacro("__HAS_LDEXPF__");
if (hasFP64)
Builder.defineMacro("__HAS_FP64__");
+ if(getTriple().getOS() == llvm::Triple::CUDA) {
+ // Set __CUDA_ARCH__ for the GPU specified.
+ std::string CUDAArchCode = [this] {
+ switch (GCN_Subarch) {
+ case CudaArch::SM_20: return "200";
+ case CudaArch::SM_21: return "210";
+ case CudaArch::SM_30: return "300";
+ case CudaArch::SM_32: return "320";
+ case CudaArch::SM_35: return "350";
+ case CudaArch::SM_37: return "370";
+ case CudaArch::SM_50: return "500";
+ case CudaArch::SM_52: return "520";
+ case CudaArch::SM_53: return "530";
+ case CudaArch::SM_60: return "600";
+ case CudaArch::SM_61: return "610";
+ case CudaArch::SM_62: return "620";
+ case CudaArch::SM_70: return "700";
+ case CudaArch::SM_72: return "720";
+ case CudaArch::GFX600: return "320";
+ case CudaArch::GFX601: return "320";
+ case CudaArch::GFX700: return "320";
+ case CudaArch::GFX701: return "320";
+ case CudaArch::GFX702: return "320";
+ case CudaArch::GFX703: return "320";
+ case CudaArch::GFX704: return "320";
+ case CudaArch::GFX800: return "320";
+ case CudaArch::GFX801: return "320";
+ case CudaArch::GFX802: return "320";
+ case CudaArch::GFX803: return "320";
+ case CudaArch::GFX810: return "320";
+ case CudaArch::GFX900: return "320";
+ case CudaArch::GFX901: return "320";
+ case CudaArch::GFX902: return "320";
+ case CudaArch::UNKNOWN:;
+ case CudaArch::LAST:;
+ }
+ llvm_unreachable("unhandled GCN Subarch");
+ }();
+ // Set __CUDA_ARCH__ for the GPU specified.
+ Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
+ }
}
Index: lib/Basic/Cuda.cpp
===================================================================
--- lib/Basic/Cuda.cpp
+++ lib/Basic/Cuda.cpp
@@ -58,6 +58,36 @@
return "sm_70";
case CudaArch::SM_72:
return "sm_72";
+ case CudaArch::GFX600://tahiti
+ return "gfx600";
+ case CudaArch::GFX601://pitcairn, verde, oland,hainan
+ return "gfx601";
+ case CudaArch::GFX700://kaveri
+ return "gfx700";
+ case CudaArch::GFX701://hawaii
+ return "gfx701";
+ case CudaArch::GFX702://290,290x,R390,R390x
+ return "gfx702";
+ case CudaArch::GFX703://kabini mullins
+ return "gfx703";
+ case CudaArch::GFX704://bonaire
+ return "gfx704";
+ case CudaArch::GFX800://iceland
+ return "gfx800";
+ case CudaArch::GFX801://carrizo
+ return "gfx801";
+ case CudaArch::GFX802://tonga,iceland
+ return "gfx802";
+ case CudaArch::GFX803://fiji,polaris10
+ return "gfx803";
+ case CudaArch::GFX810://stoney
+ return "gfx810";
+ case CudaArch::GFX900: //vega, instinct
+ return "gfx900";
+ case CudaArch::GFX901: //
+ return "gfx901";
+ case CudaArch::GFX902: // TBA
+ return "gfx902";
}
llvm_unreachable("invalid enum");
}
@@ -78,6 +108,21 @@
.Case("sm_62", CudaArch::SM_62)
.Case("sm_70", CudaArch::SM_70)
.Case("sm_72", CudaArch::SM_72)
+ .Case("gfx600", CudaArch::GFX600)
+ .Case("gfx601", CudaArch::GFX601)
+ .Case("gfx700", CudaArch::GFX700)
+ .Case("gfx701", CudaArch::GFX701)
+ .Case("gfx702", CudaArch::GFX702)
+ .Case("gfx703", CudaArch::GFX703)
+ .Case("gfx704", CudaArch::GFX704)
+ .Case("gfx800", CudaArch::GFX800)
+ .Case("gfx801", CudaArch::GFX801)
+ .Case("gfx802", CudaArch::GFX802)
+ .Case("gfx803", CudaArch::GFX803)
+ .Case("gfx810", CudaArch::GFX810)
+ .Case("gfx900", CudaArch::GFX900)
+ .Case("gfx901", CudaArch::GFX901)
+ .Case("gfx902", CudaArch::GFX902)
.Default(CudaArch::UNKNOWN);
}
@@ -111,6 +156,8 @@
return "compute_70";
case CudaVirtualArch::COMPUTE_72:
return "compute_72";
+ case CudaVirtualArch::COMPUTE_AMDGCN:
+ return "compute_amdgcn";
}
llvm_unreachable("invalid enum");
}
@@ -130,6 +177,7 @@
.Case("compute_62", CudaVirtualArch::COMPUTE_62)
.Case("compute_70", CudaVirtualArch::COMPUTE_70)
.Case("compute_72", CudaVirtualArch::COMPUTE_72)
+ .Case("compute_amdgcn", CudaVirtualArch::COMPUTE_AMDGCN)
.Default(CudaVirtualArch::UNKNOWN);
}
@@ -166,6 +214,22 @@
return CudaVirtualArch::COMPUTE_70;
case CudaArch::SM_72:
return CudaVirtualArch::COMPUTE_72;
+ case CudaArch::GFX600:
+ case CudaArch::GFX601:
+ case CudaArch::GFX700:
+ case CudaArch::GFX701:
+ case CudaArch::GFX702:
+ case CudaArch::GFX703:
+ case CudaArch::GFX704:
+ case CudaArch::GFX800:
+ case CudaArch::GFX801:
+ case CudaArch::GFX802:
+ case CudaArch::GFX803:
+ case CudaArch::GFX810:
+ case CudaArch::GFX900:
+ case CudaArch::GFX901:
+ case CudaArch::GFX902:
+ return CudaVirtualArch::COMPUTE_AMDGCN;
}
llvm_unreachable("invalid enum");
}
@@ -194,6 +258,22 @@
return CudaVersion::CUDA_90;
case CudaArch::SM_72:
return CudaVersion::CUDA_91;
+ case CudaArch::GFX600:
+ case CudaArch::GFX601:
+ case CudaArch::GFX700:
+ case CudaArch::GFX701:
+ case CudaArch::GFX702:
+ case CudaArch::GFX703:
+ case CudaArch::GFX704:
+ case CudaArch::GFX800:
+ case CudaArch::GFX801:
+ case CudaArch::GFX802:
+ case CudaArch::GFX803:
+ case CudaArch::GFX810:
+ case CudaArch::GFX900:
+ case CudaArch::GFX901:
+ case CudaArch::GFX902:
+ return CudaVersion::CUDA_70;
}
llvm_unreachable("invalid enum");
}
@@ -204,6 +284,21 @@
return CudaVersion::UNKNOWN;
case CudaArch::SM_20:
case CudaArch::SM_21:
+ case CudaArch::GFX600:
+ case CudaArch::GFX601:
+ case CudaArch::GFX700:
+ case CudaArch::GFX701:
+ case CudaArch::GFX702:
+ case CudaArch::GFX703:
+ case CudaArch::GFX704:
+ case CudaArch::GFX800:
+ case CudaArch::GFX801:
+ case CudaArch::GFX802:
+ case CudaArch::GFX803:
+ case CudaArch::GFX810:
+ case CudaArch::GFX900:
+ case CudaArch::GFX901:
+ case CudaArch::GFX902:
return CudaVersion::CUDA_80;
default:
return CudaVersion::LATEST;
Index: include/clang/Driver/ToolChain.h
===================================================================
--- include/clang/Driver/ToolChain.h
+++ include/clang/Driver/ToolChain.h
@@ -108,10 +108,12 @@
path_list ProgramPaths;
mutable std::unique_ptr<Tool> Clang;
+ mutable std::unique_ptr<Tool> Backend;
mutable std::unique_ptr<Tool> Assemble;
mutable std::unique_ptr<Tool> Link;
mutable std::unique_ptr<Tool> OffloadBundler;
Tool *getClang() const;
+ Tool *getBackend() const;
Tool *getAssemble() const;
Tool *getLink() const;
Tool *getClangAs() const;
@@ -139,6 +141,7 @@
void setTripleEnvironment(llvm::Triple::EnvironmentType Env);
virtual Tool *buildAssembler() const;
+ virtual Tool *buildBackend() const;
virtual Tool *buildLinker() const;
virtual Tool *getTool(Action::ActionClass AC) const;
Index: include/clang/Basic/Cuda.h
===================================================================
--- include/clang/Basic/Cuda.h
+++ include/clang/Basic/Cuda.h
@@ -46,6 +46,21 @@
SM_62,
SM_70,
SM_72,
+ GFX600,
+ GFX601,
+ GFX700,
+ GFX701,
+ GFX702,
+ GFX703,
+ GFX704,
+ GFX800,
+ GFX801,
+ GFX802,
+ GFX803,
+ GFX810,
+ GFX900,
+ GFX901,
+ GFX902,
LAST,
};
const char *CudaArchToString(CudaArch A);
@@ -68,6 +83,7 @@
COMPUTE_62,
COMPUTE_70,
COMPUTE_72,
+ COMPUTE_AMDGCN,
};
const char *CudaVirtualArchToString(CudaVirtualArch A);
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits