https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/79873
>From 35e12c3d83f3be93618805ffaf05e3424689f32f Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Mon, 29 Jan 2024 11:08:04 -0600 Subject: [PATCH 1/2] [NVPTX] Allow compiling LLVM-IR without `-march` set Summary: The NVPTX tools require an architecture to be used, however if we are creating generic LLVM-IR we should be able to leave it unspecified. This will result in the `target-cpu` attributes not being set on the functions so it can be changed when linked into code. This allows the standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR simmilar to how CUDA's deviceRTL looks from C/C++ --- .../clang/Basic/DiagnosticDriverKinds.td | 2 ++ clang/lib/Basic/Targets/NVPTX.cpp | 7 +++++- clang/lib/Basic/Targets/NVPTX.h | 3 ++- clang/lib/Driver/ToolChains/Cuda.cpp | 19 ++++++++++----- clang/test/Driver/cuda-cross-compiling.c | 24 +++++++++++-------- .../Preprocessor/predefined-arch-macros.c | 12 ++++++++++ 6 files changed, 49 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 094fe19509412..476528375fb88 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -57,6 +57,8 @@ def warn_drv_avr_stdlib_not_linked: Warning< InGroup<AVRRtlibLinkingQuirks>; def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">; def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: %1">; +def err_drv_offload_missing_gpu_arch : Error< + "Must pass in an explicit %0 gpu architecture to '%1'">; def err_drv_no_cuda_installation : Error< "cannot find CUDA installation; provide its path via '--cuda-path', or pass " "'-nocudainc' to build without CUDA includes">; diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 0b9d97f69d146..7687e3faad770 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, // Define available target features // These must be defined in sorted order! NoAsmVariants = true; - GPU = CudaArch::SM_20; + GPU = CudaArch::UNUSED; if (TargetPointerWidth == 32) resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); @@ -169,6 +169,11 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__PTX__"); Builder.defineMacro("__NVPTX__"); + + // Skip setting architecture dependent macros if undefined. + if (GPU == CudaArch::UNUSED && !HostTarget) + return; + if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. std::string CUDAArchCode = [this] { diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 20d76b702a942..f476d49047c01 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -79,7 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector<std::string> &FeaturesVec) const override { - Features[CudaArchToString(GPU)] = true; + if (GPU != CudaArch::UNUSED) + Features[CudaArchToString(GPU)] = true; Features["ptx" + std::to_string(PTXVersion)] = true; return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 8a9d0caaccf30..ca54d2d55426b 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -389,7 +389,11 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, GPUArchName = JA.getOffloadingArch(); } else { GPUArchName = Args.getLastArgValue(options::OPT_march_EQ); - assert(!GPUArchName.empty() && "Must have an architecture passed in."); + if (GPUArchName.empty()) { + C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) + << getToolChain().getArchName() << getShortName(); + return; + } } // Obtain architecture from the action. @@ -593,7 +597,11 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-v"); StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); - assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink."); + if (GPUArch.empty()) { + C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) + << getToolChain().getArchName() << getShortName(); + return; + } CmdArgs.push_back("-arch"); CmdArgs.push_back(Args.MakeArgString(GPUArch)); @@ -726,9 +734,8 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, llvm::opt::DerivedArgList * NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, - Action::OffloadKind DeviceOffloadKind) const { - DerivedArgList *DAL = - ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind); + Action::OffloadKind OffloadKind) const { + DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BoundArch, OffloadKind); if (!DAL) DAL = new DerivedArgList(Args.getBaseArgs()); @@ -738,7 +745,7 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, if (!llvm::is_contained(*DAL, A)) DAL->append(A); - if (!DAL->hasArg(options::OPT_march_EQ)) { + if (!DAL->hasArg(options::OPT_march_EQ) && OffloadKind != Action::OFK_None) { DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), CudaArchToString(CudaArch::CudaDefault)); } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") { diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c index 12d0af3b45f32..6c9e2cac736b7 100644 --- a/clang/test/Driver/cuda-cross-compiling.c +++ b/clang/test/Driver/cuda-cross-compiling.c @@ -59,16 +59,6 @@ // LINK: nvlink{{.*}}"-o" "a.out" "-arch" "sm_61" {{.*}} "{{.*}}.cubin" -// -// Test the generated arguments default to a value with no architecture. -// -// RUN: %clang --target=nvptx64-nvidia-cuda -### --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ -// RUN: | FileCheck -check-prefix=DEFAULT %s - -// DEFAULT: -cc1" "-triple" "nvptx64-nvidia-cuda" "-S" {{.*}} "-target-cpu" "sm_52" "-target-feature" "+ptx{{[0-9]+}}" {{.*}} "-o" "[[PTX:.+]].s" -// DEFAULT-NEXT: ptxas{{.*}}"-m64" "-O0" "--gpu-name" "sm_52" "--output-file" "[[CUBIN:.+]].cubin" "[[PTX]].s" "-c" -// DEFAULT-NEXT: nvlink{{.*}}"-o" "a.out" "-arch" "sm_52" {{.*}} "[[CUBIN]].cubin" - // // Test to ensure that we enable handling global constructors in a freestanding // Nvidia compilation. @@ -77,3 +67,17 @@ // RUN: | FileCheck -check-prefix=LOWERING %s // LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor" + +// +// Tests for handling a missing architecture. +// +// RUN: not %clang -target nvptx64-nvidia-cuda %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=MISSING %s + +// MISSING: error: Must pass in an explicit nvptx64 gpu architecture to 'ptxas' +// MISSING: error: Must pass in an explicit nvptx64 gpu architecture to 'nvlink' + +// RUN: %clang -target nvptx64-nvidia-cuda -flto -c %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=GENERIC %s + +// GENERIC-NOT: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-target-cpu" diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 27c7b4a271fee..be9323adb4f64 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -4292,6 +4292,18 @@ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ZVECTOR // CHECK_SYSTEMZ_ZVECTOR: #define __VEC__ 10304 +// Begin nvptx tests ---------------- + +// RUN: %clang -march=sm_75 -E -dM %s -o - 2>&1 \ +// RUN: -target nvptx64-unknown-unknown \ +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_NVPTX,CHECK_ARCH_SM_75 +// RUN: %clang -E -dM %s -o - 2>&1 \ +// RUN: -target nvptx64-unknown-unknown \ +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_NVPTX,CHECK_ARCH_UNSET +// CHECK_ARCH_SM_75: #define __CUDA_ARCH__ 750 +// CHECK_ARCH_UNSET-NOT: #define __CUDA_ARCH__ +// CHECK_NVPTX: #define __NVPTX__ 1 + // Begin amdgcn tests ---------------- // RUN: %clang -march=amdgcn -E -dM %s -o - 2>&1 \ >From a76203edb9d46ac0e0e75a21dcfe4993b3fc6cfd Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Tue, 30 Jan 2024 17:16:59 -0600 Subject: [PATCH 2/2] Add TODO --- clang/lib/Driver/ToolChains/Cuda.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index ca54d2d55426b..ed5924c3b73b5 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -745,6 +745,7 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, if (!llvm::is_contained(*DAL, A)) DAL->append(A); + // TODO: We should accept 'generic' as a valid architecture. if (!DAL->hasArg(options::OPT_march_EQ) && OffloadKind != Action::OFK_None) { DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), CudaArchToString(CudaArch::CudaDefault)); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits