arsenm updated this revision to Diff 254229.
arsenm marked an inline comment as done.
arsenm added a comment.
Rebase again
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D59321/new/
https://reviews.llvm.org/D59321
Files:
clang/include/clang/Basic/DiagnosticDriverKinds.td
clang/include/clang/Driver/Options.td
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/lib/Driver/ToolChains/AMDGPU.h
clang/lib/Driver/ToolChains/HIP.h
clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
clang/test/Driver/Inputs/rocm-device-libs/lib/hip.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/ockl.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1010.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1011.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1012.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_803.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_900.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/ocml.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/opencl.amdgcn.bc
clang/test/Driver/amdgpu-visibility.cl
clang/test/Driver/rocm-detect.cl
clang/test/Driver/rocm-device-libs.cl
clang/test/Driver/rocm-not-found.cl
llvm/include/llvm/Support/TargetParser.h
llvm/lib/Support/TargetParser.cpp
Index: llvm/lib/Support/TargetParser.cpp
===================================================================
--- llvm/lib/Support/TargetParser.cpp
+++ llvm/lib/Support/TargetParser.cpp
@@ -99,9 +99,9 @@
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
- {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
};
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
Index: llvm/include/llvm/Support/TargetParser.h
===================================================================
--- llvm/include/llvm/Support/TargetParser.h
+++ llvm/include/llvm/Support/TargetParser.h
@@ -151,7 +151,10 @@
// Common features.
FEATURE_FAST_FMA_F32 = 1 << 4,
- FEATURE_FAST_DENORMAL_F32 = 1 << 5
+ FEATURE_FAST_DENORMAL_F32 = 1 << 5,
+
+ // Wavefront 32 is available.
+ FEATURE_WAVE32 = 1 << 6
};
StringRef getArchNameAMDGCN(GPUKind AK);
Index: clang/test/Driver/rocm-not-found.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/rocm-not-found.cl
@@ -0,0 +1,11 @@
+// REQUIRES: clang-driver
+
+// Check that we raise an error if we're trying to compile OpenCL for amdhsa code but can't
+// find a ROCm install, unless -nogpulib was passed.
+
+// RUN: %clang -### --sysroot=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR
+// RUN: %clang -### --rocm-path=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR
+// ERR: cannot find ROCm installation. Provide its path via --rocm-path, or pass -nogpulib.
+
+// RUN: %clang -### -nogpulib --rocm-path=%s/no-rocm-there %s 2>&1 | FileCheck %s --check-prefix OK
+// OK-NOT: cannot find ROCm installation.
Index: clang/test/Driver/rocm-device-libs.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/rocm-device-libs.cl
@@ -0,0 +1,163 @@
+// REQUIRES: clang-driver
+// REQUIRES: amdgpu-registered-target
+
+// Test flush-denormals-to-zero enabled uses oclc_daz_opt_on
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx900 \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
+
+
+
+// Make sure the different denormal default is respected for gfx8
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx803 \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
+
+
+
+// Make sure the non-canonical name works
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=fiji \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx900 \
+// RUN: -cl-denorms-are-zero \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx803 \
+// RUN: -cl-denorms-are-zero \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx803 \
+// RUN: -cl-finite-math-only \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803,WAVE64 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx803 \
+// RUN: -cl-fp32-correctly-rounded-divide-sqrt \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803,WAVE64 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx803 \
+// RUN: -cl-fast-relaxed-math \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803,WAVE64 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx803 \
+// RUN: -cl-unsafe-math-optimizations \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-UNSAFE,GFX803,WAVE64 %s
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx1010 \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx1011 \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1011,WAVE32 %s
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx1012 \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1012,WAVE32 %s
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx1010 -mwavefrontsize64 \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE64 %s
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx1010 -mwavefrontsize64 -mno-wavefrontsize64 \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s
+
+// Ignore -mno-wavefrontsize64 without wave32 support
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN: -x cl -mcpu=gfx803 -mno-wavefrontsize64 \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN: %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX803,WAVE64 %s
+
+
+
+// COMMON: "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/opencl.amdgcn.bc"
+// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/ocml.amdgcn.bc"
+// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/ockl.amdgcn.bc"
+
+// GFX900-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_off.amdgcn.bc"
+// GFX803-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc"
+// GFX700-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc"
+// COMMON-DAZ-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc"
+
+
+// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc"
+// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc"
+// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+
+// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc"
+// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_on.amdgcn.bc"
+// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+
+// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc"
+// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc"
+// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc"
+
+
+// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_on.amdgcn.bc"
+// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_on.amdgcn.bc"
+// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+
+// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_on.amdgcn.bc"
+// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc"
+// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+// WAVE64: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_wavefrontsize64_on.amdgcn.bc"
+// WAVE32: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_wavefrontsize64_off.amdgcn.bc"
+
+
+// GFX900: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_isa_version_900.amdgcn.bc"
+// GFX803: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_isa_version_803.amdgcn.bc"
Index: clang/test/Driver/rocm-detect.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/rocm-detect.cl
@@ -0,0 +1,21 @@
+// REQUIRES: clang-driver
+// REQUIRES: amdgpu-registered-target
+
+// Make sure the appropriate device specific library is available.
+
+// We don't include every target in the test directory, so just pick a valid
+// target not included in the test.
+
+// RUN: %clang -### -v -target amdgcn-amd-amdhsa -mcpu=gfx902 \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s
+
+
+// RUN: %clang -### -v -target amdgcn-amd-amdhsa -mcpu=gfx902 -nogpulib \
+// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=COMMON,GFX902,NODEFAULTLIBS %s
+
+
+// GFX902-DEFAULTLIBS: error: cannot find device library for gfx902. Provide path to different ROCm installation via --rocm-path, or pass -nogpulib to build without linking default libraries.
+
+// NODEFAULTLIBS-NOT: error: cannot find
Index: clang/test/Driver/amdgpu-visibility.cl
===================================================================
--- clang/test/Driver/amdgpu-visibility.cl
+++ clang/test/Driver/amdgpu-visibility.cl
@@ -2,6 +2,10 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -fvisibility=protected %s 2>&1 | FileCheck -check-prefix=OVERRIDE-PROTECTED %s
// RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -fvisibility-ms-compat %s 2>&1 | FileCheck -check-prefix=OVERRIDE-MS %s
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm %s 2>&1 | FileCheck -check-prefix=DEFAULT %s
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm -fvisibility=protected %s 2>&1 | FileCheck -check-prefix=OVERRIDE-PROTECTED %s
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm -fvisibility-ms-compat %s 2>&1 | FileCheck -check-prefix=OVERRIDE-MS %s
+
// DEFAULT-DAG: "-fvisibility" "hidden"
// DEFAULT-DAG: "-fapply-global-visibility-to-externs"
Index: clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
+++ clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
@@ -1,5 +1,5 @@
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
// CHECK-DAG: ![[FILEVAR0:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
// CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR0]], expr: !DIExpression())
Index: clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
+++ clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
@@ -1,5 +1,5 @@
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
// CHECK-DAG: ![[DWARF_ADDRESS_SPACE_NONE:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}})
// CHECK-DAG: ![[DWARF_ADDRESS_SPACE_LOCAL:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}}, dwarfAddressSpace: 2)
Index: clang/lib/Driver/ToolChains/HIP.h
===================================================================
--- clang/lib/Driver/ToolChains/HIP.h
+++ clang/lib/Driver/ToolChains/HIP.h
@@ -73,7 +73,7 @@
namespace toolchains {
-class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public AMDGPUToolChain {
+class LLVM_LIBRARY_VISIBILITY HIPToolChain final : public ROCMToolChain {
public:
HIPToolChain(const Driver &D, const llvm::Triple &Triple,
const ToolChain &HostTC, const llvm::opt::ArgList &Args);
Index: clang/lib/Driver/ToolChains/AMDGPU.h
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.h
+++ clang/lib/Driver/ToolChains/AMDGPU.h
@@ -13,12 +13,154 @@
#include "clang/Driver/Options.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Support/TargetParser.h"
#include <map>
namespace clang {
namespace driver {
+
+/// A class to find a viable ROCM installation
+/// TODO: Generalize to handle libclc.
+class RocmInstallationDetector {
+private:
+ struct ConditionalLibrary {
+ SmallString<0> On;
+ SmallString<0> Off;
+
+ bool isValid() const {
+ return !On.empty() && !Off.empty();
+ }
+
+ StringRef get(bool Enabled) const {
+ assert(isValid());
+ return Enabled ? On : Off;
+ }
+ };
+
+ const Driver &D;
+ bool IsValid = false;
+ //RocmVersion Version = RocmVersion::UNKNOWN;
+ SmallString<0> InstallPath;
+ //SmallString<0> BinPath;
+ SmallString<0> LibPath;
+ SmallString<0> LibDevicePath;
+ SmallString<0> IncludePath;
+ llvm::StringMap<std::string> LibDeviceMap;
+
+ // Libraries that are always linked.
+ SmallString<0> OCML;
+ SmallString<0> OCKL;
+
+ // Libraries that are always linked depending on the language
+ SmallString<0> OpenCL;
+ SmallString<0> HIP;
+
+ // Libraries swapped based on compile flags.
+ ConditionalLibrary WavefrontSize64;
+ ConditionalLibrary FiniteOnly;
+ ConditionalLibrary UnsafeMath;
+ ConditionalLibrary DenormalsAreZero;
+ ConditionalLibrary CorrectlyRoundedSqrt;
+
+ bool allGenericLibsValid() const {
+ return !OCML.empty() && !OCKL.empty() && !OpenCL.empty() && !HIP.empty() &&
+ WavefrontSize64.isValid() && FiniteOnly.isValid() &&
+ UnsafeMath.isValid() && DenormalsAreZero.isValid() &&
+ CorrectlyRoundedSqrt.isValid();
+ }
+
+ // CUDA architectures for which we have raised an error in
+ // CheckRocmVersionSupportsArch.
+ mutable llvm::SmallSet<CudaArch, 4> ArchsWithBadVersion;
+
+public:
+ RocmInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
+ const llvm::opt::ArgList &Args);
+
+ /// Add arguments needed to link default bitcode libraries.
+ void addCommonBitcodeLibCC1Args(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ StringRef LibDeviceFile, bool Wave64,
+ bool DAZ, bool FiniteOnly, bool UnsafeMathOpt,
+ bool FastRelaxedMath, bool CorrectSqrt) const;
+
+ /// Emit an error if Version does not support the given Arch.
+ ///
+ /// If either Version or Arch is unknown, does not emit an error. Emits at
+ /// most one error per Arch.
+ void CheckRocmVersionSupportsArch(CudaArch Arch) const;
+
+ /// Check whether we detected a valid Rocm install.
+ bool isValid() const { return IsValid; }
+ /// Print information about the detected CUDA installation.
+ void print(raw_ostream &OS) const;
+
+ /// Get the detected Rocm install's version.
+ // RocmVersion version() const { return Version; }
+
+ /// Get the detected Rocm installation path.
+ StringRef getInstallPath() const { return InstallPath; }
+
+ /// Get the detected path to Rocm's bin directory.
+ // StringRef getBinPath() const { return BinPath; }
+
+ /// Get the detected Rocm Include path.
+ StringRef getIncludePath() const { return IncludePath; }
+
+ /// Get the detected Rocm library path.
+ StringRef getLibPath() const { return LibPath; }
+
+ /// Get the detected Rocm device library path.
+ StringRef getLibDevicePath() const { return LibDevicePath; }
+
+ StringRef getOCMLPath() const {
+ assert(!OCML.empty());
+ return OCML;
+ }
+
+ StringRef getOCKLPath() const {
+ assert(!OCKL.empty());
+ return OCKL;
+ }
+
+ StringRef getOpenCLPath() const {
+ assert(!OpenCL.empty());
+ return OpenCL;
+ }
+
+ StringRef getHIPPath() const {
+ assert(!HIP.empty());
+ return HIP;
+ }
+
+ StringRef getWavefrontSize64Path(bool Enabled) const {
+ return WavefrontSize64.get(Enabled);
+ }
+
+ StringRef getFiniteOnlyPath(bool Enabled) const {
+ return FiniteOnly.get(Enabled);
+ }
+
+ StringRef getUnsafeMathPath(bool Enabled) const {
+ return UnsafeMath.get(Enabled);
+ }
+
+ StringRef getDenormalsAreZeroPath(bool Enabled) const {
+ return DenormalsAreZero.get(Enabled);
+ }
+
+ StringRef getCorrectlyRoundedSqrtPath(bool Enabled) const {
+ return CorrectlyRoundedSqrt.get(Enabled);
+ }
+
+ /// Get libdevice file for given architecture
+ std::string getLibDeviceFile(StringRef Gpu) const {
+ return LibDeviceMap.lookup(Gpu);
+ }
+};
+
namespace tools {
namespace amdgpu {
@@ -42,11 +184,9 @@
namespace toolchains {
class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
-
-private:
+protected:
const std::map<options::ID, const StringRef> OptionsDefault;
-protected:
Tool *buildLinker() const override;
const StringRef getOptionDefault(options::ID OptID) const {
auto opt = OptionsDefault.find(OptID);
@@ -79,6 +219,19 @@
const llvm::fltSemantics *FPType = nullptr) const override;
};
+class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
+private:
+ RocmInstallationDetector RocmInstallation;
+
+public:
+ ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
+ const llvm::opt::ArgList &Args);
+ void
+ addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ Action::OffloadKind DeviceOffloadKind) const override;
+};
+
} // end namespace toolchains
} // end namespace driver
} // end namespace clang
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -12,7 +12,8 @@
#include "clang/Driver/Compilation.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "llvm/Option/ArgList.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
using namespace clang::driver;
using namespace clang::driver::tools;
@@ -20,6 +21,162 @@
using namespace clang;
using namespace llvm::opt;
+RocmInstallationDetector::RocmInstallationDetector(
+ const Driver &D, const llvm::Triple &HostTriple,
+ const llvm::opt::ArgList &Args)
+ : D(D) {
+ struct Candidate {
+ std::string Path;
+ bool StrictChecking;
+
+ Candidate(std::string Path, bool StrictChecking = false)
+ : Path(Path), StrictChecking(StrictChecking) {}
+ };
+
+ SmallVector<Candidate, 4> Candidates;
+
+ if (Args.hasArg(clang::driver::options::OPT_rocm_path_EQ)) {
+ Candidates.emplace_back(
+ Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ).str());
+ } else {
+ // Try to find relative to the compiler binary.
+ const char *InstallDir = D.getInstalledDir();
+
+ // Check both a normal Unix prefix position of the clang binary, as well as
+ // the Windows-esque layout the ROCm packages use with the host architecture
+ // subdirectory of bin.
+
+ StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
+ if (ParentDir == HostTriple.getArchName())
+ ParentDir = llvm::sys::path::parent_path(ParentDir);
+
+ if (ParentDir == "bin") {
+ Candidates.emplace_back(llvm::sys::path::parent_path(ParentDir).str(),
+ /*StrictChecking=*/true);
+ }
+
+ Candidates.emplace_back(D.SysRoot + "/opt/rocm");
+ }
+
+ bool NoBuiltinLibs = Args.hasArg(options::OPT_nogpulib);
+
+ for (const auto &Candidate : Candidates) {
+ InstallPath = Candidate.Path;
+ if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
+ continue;
+
+ // FIXME: The install path situation is a real mess.
+
+ // For a cmake install, these are placed directly in
+ // ${INSTALL_PREFIX}/lib
+
+ // In the separate OpenCL builds, the bitcode libraries are placed in
+ // ${OPENCL_ROOT}/lib/x86_64/bitcode/*
+
+ // For the rocm installed packages, these are placed at
+ // /opt/rocm/opencl/lib/x86_64/bitcode
+
+ // An additional copy is installed, in scattered locations between
+ // /opt/rocm/hcc/rocdl/oclc
+ // /opt/rocm/hcc/rocdl/ockl
+ // /opt/rocm/hcc/rocdl/lib
+ //
+ // Yet another complete set is installed to
+ // /opt/rocm/hcc/rocdl/lib
+
+ // For now just recognize the opencl package layout.
+
+ // BinPath = InstallPath + "/bin";
+ llvm::sys::path::append(IncludePath, InstallPath, "include");
+ llvm::sys::path::append(LibDevicePath, InstallPath, "lib");
+
+ auto &FS = D.getVFS();
+
+ // We don't need the include path for OpenCL, since clang already ships with
+ // the default header.
+
+ bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
+ if (CheckLibDevice && !FS.exists(LibDevicePath))
+ continue;
+
+ const StringRef Suffix(".amdgcn.bc");
+
+ std::error_code EC;
+ for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
+ !EC && LI != LE; LI = LI.increment(EC)) {
+ StringRef FilePath = LI->path();
+ StringRef FileName = llvm::sys::path::filename(FilePath);
+ if (!FileName.endswith(Suffix))
+ continue;
+
+ StringRef BaseName = FileName.drop_back(Suffix.size());
+
+ if (BaseName == "ocml") {
+ OCML = FilePath;
+ } else if (BaseName == "ockl") {
+ OCKL = FilePath;
+ } else if (BaseName == "opencl") {
+ OpenCL = FilePath;
+ } else if (BaseName == "hip") {
+ HIP = FilePath;
+ } else if (BaseName == "oclc_finite_only_off") {
+ FiniteOnly.Off = FilePath;
+ } else if (BaseName == "oclc_finite_only_on") {
+ FiniteOnly.On = FilePath;
+ } else if (BaseName == "oclc_daz_opt_on") {
+ DenormalsAreZero.On = FilePath;
+ } else if (BaseName == "oclc_daz_opt_off") {
+ DenormalsAreZero.Off = FilePath;
+ } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
+ CorrectlyRoundedSqrt.On = FilePath;
+ } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
+ CorrectlyRoundedSqrt.Off = FilePath;
+ } else if (BaseName == "oclc_unsafe_math_on") {
+ UnsafeMath.On = FilePath;
+ } else if (BaseName == "oclc_unsafe_math_off") {
+ UnsafeMath.Off = FilePath;
+ } else if (BaseName == "oclc_wavefrontsize64_on") {
+ WavefrontSize64.On = FilePath;
+ } else if (BaseName == "oclc_wavefrontsize64_off") {
+ WavefrontSize64.Off = FilePath;
+ } else {
+ // Process all bitcode filenames that look like
+ // ocl_isa_version_XXX.amdgcn.bc
+ const StringRef DeviceLibPrefix = "oclc_isa_version_";
+ if (!BaseName.startswith(DeviceLibPrefix))
+ continue;
+
+ StringRef IsaVersionNumber =
+ BaseName.drop_front(DeviceLibPrefix.size());
+
+ llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
+ SmallString<8> Tmp;
+ LibDeviceMap.insert(
+ std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
+ }
+ }
+
+ if (!NoBuiltinLibs) {
+ // Check that the required non-target libraries are all available.
+ if (!allGenericLibsValid())
+ continue;
+
+ // Check that we have found at least one libdevice that we can link in if
+ // -nobuiltinlib hasn't been specified.
+ if (LibDeviceMap.empty())
+ continue;
+ }
+
+ IsValid = true;
+ break;
+ }
+}
+
+void RocmInstallationDetector::print(raw_ostream &OS) const {
+ if (isValid())
+ OS << "Found ROCm installation: " << InstallPath << '\n';
+}
+
void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
@@ -142,6 +299,12 @@
llvm::DenormalMode::getIEEE();
}
+/// ROCM Toolchain
+ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
+ const ArgList &Args)
+ : AMDGPUToolChain(D, Triple, Args),
+ RocmInstallation(D, Triple, Args) { }
+
void AMDGPUToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
@@ -155,3 +318,89 @@
CC1Args.push_back("-fapply-global-visibility-to-externs");
}
}
+
+void ROCMToolChain::addClangTargetOptions(
+ const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
+ Action::OffloadKind DeviceOffloadingKind) const {
+ AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
+ DeviceOffloadingKind);
+
+ if (DriverArgs.hasArg(options::OPT_nogpulib))
+ return;
+
+ if (!RocmInstallation.isValid()) {
+ getDriver().Diag(diag::err_drv_no_rocm_installation);
+ return;
+ }
+
+ // Get the device name and canonicalize it
+ const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
+ auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
+ const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
+ std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
+ if (LibDeviceFile.empty()) {
+ getDriver().Diag(diag::err_drv_no_rocm_device_lib) << GpuArch;
+ return;
+ }
+
+ const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
+ static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
+
+ bool Wave64 = !HasWave32 || DriverArgs.hasFlag(
+ options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
+
+ // TODO: There are way too many flags that change this. Do we need to check
+ // them all?
+ bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
+ getDefaultDenormsAreZeroForTarget(Kind);
+ bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
+
+ bool UnsafeMathOpt =
+ DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
+ bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
+ bool CorrectSqrt =
+ DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
+
+ // Add the OpenCL specific bitcode library.
+ CC1Args.push_back("-mlink-builtin-bitcode");
+ CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));
+
+ // Add the generic set of libraries.
+ RocmInstallation.addCommonBitcodeLibCC1Args(
+ DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
+ UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
+}
+
+void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
+ const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
+ StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
+ bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
+ static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
+
+ CC1Args.push_back(LinkBitcodeFlag);
+ CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));
+
+ CC1Args.push_back(LinkBitcodeFlag);
+ CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));
+
+ CC1Args.push_back(LinkBitcodeFlag);
+ CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));
+
+ CC1Args.push_back(LinkBitcodeFlag);
+ CC1Args.push_back(DriverArgs.MakeArgString(
+ getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));
+
+ CC1Args.push_back(LinkBitcodeFlag);
+ CC1Args.push_back(DriverArgs.MakeArgString(
+ getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));
+
+ CC1Args.push_back(LinkBitcodeFlag);
+ CC1Args.push_back(
+ DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));
+
+ CC1Args.push_back(LinkBitcodeFlag);
+ CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));
+
+ CC1Args.push_back(LinkBitcodeFlag);
+ CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
+}
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4857,6 +4857,8 @@
TC = std::make_unique<toolchains::Solaris>(*this, Target, Args);
break;
case llvm::Triple::AMDHSA:
+ TC = std::make_unique<toolchains::ROCMToolChain>(*this, Target, Args);
+ break;
case llvm::Triple::AMDPAL:
case llvm::Triple::Mesa3D:
TC = std::make_unique<toolchains::AMDGPUToolChain>(*this, Target, Args);
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -608,6 +608,8 @@
def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">;
def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;
+def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<Link_Group>,
+ HelpText<"ROCm installation path">;
def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group<Link_Group>,
HelpText<"HIP device library path">;
def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group<Link_Group>,
Index: clang/include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -55,6 +55,14 @@
def err_drv_no_cuda_libdevice : Error<
"cannot find libdevice for %0. Provide path to different CUDA installation "
"via --cuda-path, or pass -nocudalib to build without linking with libdevice.">;
+
+def err_drv_no_rocm_installation : Error<
+ "cannot find ROCm installation. Provide its path via --rocm-path, or pass "
+ "-nogpulib.">;
+def err_drv_no_rocm_device_lib : Error<
+ "cannot find device library for %0. Provide path to different ROCm installation "
+ "via --rocm-path, or pass -nogpulib to build without linking default libraries.">;
+
def err_drv_cuda_version_unsupported : Error<
"GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
"but installation at %3 is %4. Use --cuda-path to specify a different CUDA "
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits