Author: Joseph Huber Date: 2025-01-10T10:58:26-06:00 New Revision: 953beb9fe969bf8ab1857924ea0d3dd6ea506ab1
URL: https://github.com/llvm/llvm-project/commit/953beb9fe969bf8ab1857924ea0d3dd6ea506ab1 DIFF: https://github.com/llvm/llvm-project/commit/953beb9fe969bf8ab1857924ea0d3dd6ea506ab1.diff LOG: [CUDA] Move CUDA to new driver by default (#122312) Summary: This patch updates the --offload-new-driver flag to be default for CUDA. This mostly just required updating a lot of tests to use the old format. I tried to update them where possible, but some were directly checking the old format. https://discourse.llvm.org/t/rfc-use-the-new-offloding-driver-for-cuda-and-hip-compilation-by-default/77468/18 Added: Modified: clang/docs/ReleaseNotes.rst clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/Cuda.cpp clang/test/Driver/cuda-arch-translation.cu clang/test/Driver/cuda-bindings.cu clang/test/Driver/cuda-options.cu clang/test/Driver/cuda-output-asm.cu Removed: ################################################################################ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 440b045399d991..5a48d6fbc01fa3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1083,6 +1083,12 @@ CUDA Support - Clang now supports CUDA SDK up to 12.6 - Added support for sm_100 - Added support for `__grid_constant__` attribute. +- CUDA now uses the new offloading driver by default. The new driver supports + device-side LTO, interoperability with OpenMP and other languages, and native ``-fgpu-rdc`` + support with static libraries. The old behavior can be returned using the + ``--no-offload-new-driver`` flag. The binary format is no longer compatible + with the NVIDIA compiler's RDC-mode support. More information can be found at: + https://clang.llvm.org/docs/OffloadingDesign.html AIX Support ^^^^^^^^^^^ diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 04f2664ffeaddb..4d9492ea08f647 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4339,7 +4339,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, Args.hasFlag(options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false) || Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false); + options::OPT_no_offload_new_driver, + C.isOffloadingHostKind(Action::OFK_Cuda)); // Builder to be used to build offloading actions. std::unique_ptr<OffloadingActionBuilder> OffloadBuilder = @@ -5089,7 +5090,8 @@ Action *Driver::ConstructPhaseAction( offloadDeviceOnly() || (TargetDeviceOffloadKind == Action::OFK_HIP && !Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false))) + options::OPT_no_offload_new_driver, + C.isOffloadingHostKind(Action::OFK_Cuda)))) ? types::TY_LLVM_IR : types::TY_LLVM_BC; return C.MakeAction<BackendJobAction>(Input, Output); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c4b5374d3fff9b..f81691f8aeaf95 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5064,7 +5064,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, JA.isHostOffloading(Action::OFK_SYCL) || (JA.isHostOffloading(C.getActiveOffloadKinds()) && Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)); + options::OPT_no_offload_new_driver, + C.isOffloadingHostKind(Action::OFK_Cuda))); bool IsRDCMode = Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false); @@ -5419,7 +5420,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (IsUsingLTO) { if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) && !Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false) && + options::OPT_no_offload_new_driver, + C.isOffloadingHostKind(Action::OFK_Cuda)) && !Triple.isAMDGPU()) { D.Diag(diag::err_drv_unsupported_opt_for_target) << Args.getLastArg(options::OPT_foffload_lto, @@ -6896,7 +6898,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_offload_via_llvm, false)) { CmdArgs.append({"--offload-new-driver", "-foffload-via-llvm"}); } else if (Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)) { + options::OPT_no_offload_new_driver, + C.isOffloadingHostKind(Action::OFK_Cuda))) { CmdArgs.push_back("--offload-new-driver"); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 214f1e5d83478f..8967115bcc73d9 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -506,7 +506,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) { // The new driver does not include PTX by default to avoid overhead. bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false); + options::OPT_no_offload_new_driver, true); for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ, options::OPT_no_cuda_include_ptx_EQ)) { A->claim(); diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu index e96191cc9d4183..a0ae16452692bf 100644 --- a/clang/test/Driver/cuda-arch-translation.cu +++ b/clang/test/Driver/cuda-arch-translation.cu @@ -68,19 +68,19 @@ // HIP: clang-offload-bundler -// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20 -// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20 -// SM30:--image=profile=sm_30{{.*}}--image=profile=compute_30 -// SM32:--image=profile=sm_32{{.*}}--image=profile=compute_32 -// SM35:--image=profile=sm_35{{.*}}--image=profile=compute_35 -// SM37:--image=profile=sm_37{{.*}}--image=profile=compute_37 -// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50 -// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52 -// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53 -// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60 -// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61 -// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62 -// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70 +// SM20:--image=profile=sm_20{{.*}} +// SM21:--image=profile=sm_21{{.*}} +// SM30:--image=profile=sm_30{{.*}} +// SM32:--image=profile=sm_32{{.*}} +// SM35:--image=profile=sm_35{{.*}} +// SM37:--image=profile=sm_37{{.*}} +// SM50:--image=profile=sm_50{{.*}} +// SM52:--image=profile=sm_52{{.*}} +// SM53:--image=profile=sm_53{{.*}} +// SM60:--image=profile=sm_60{{.*}} +// SM61:--image=profile=sm_61{{.*}} +// SM62:--image=profile=sm_62{{.*}} +// SM70:--image=profile=sm_70{{.*}} // GFX600:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx600 // GFX601:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx601 // GFX602:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx602 diff --git a/clang/test/Driver/cuda-bindings.cu b/clang/test/Driver/cuda-bindings.cu index 8ee1884936c069..5b6f944621439e 100644 --- a/clang/test/Driver/cuda-bindings.cu +++ b/clang/test/Driver/cuda-bindings.cu @@ -23,14 +23,14 @@ // BIN-NOT: cuda-bindings-device-cuda-nvptx64 // BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: // BIN-NOT: cuda-bindings-device-cuda-nvptx64 -// BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out" +// BIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out" // // Test single gpu architecture up to the assemble phase. // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=ASM %s -// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s" +// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[PTX:.+]].s" // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s" // @@ -61,40 +61,21 @@ // BIN2-NOT: cuda-bindings-device-cuda-nvptx64 // BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: // BIN2-NOT: cuda-bindings-device-cuda-nvptx64 -// AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out" -// TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out" +// AOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out" +// TOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "{{.*}}/out" // .. same, but with -fsyntax-only // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \ -// RUN: | FileCheck -check-prefix=SYN %s -// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \ -// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \ -// RUN: | FileCheck -check-prefix=SYN %s -// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \ -// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \ -// RUN: | FileCheck -check-prefix=SYN %s -// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \ -// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \ -// RUN: | FileCheck -check-prefix=SYN %s -// SYN-NOT: inputs: -// SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing) -// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing) -// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing) -// SYN-NOT: inputs - -// .. and with --offload-new-driver -// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \ -// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 --offload-new-driver %s 2>&1 \ // RUN: | FileCheck -check-prefix=NDSYN %s // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \ -// RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \ +// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \ // RUN: | FileCheck -check-prefix=NDSYN %s // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \ -// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --offload-new-driver 2>&1 \ +// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \ // RUN: | FileCheck -check-prefix=NDSYN %s // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \ -// RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \ +// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \ // RUN: | FileCheck -check-prefix=NDSYN %s // NDSYN-NOT: inputs: // NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing) @@ -109,8 +90,8 @@ // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=ASM2 %s -// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s" -// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s" +// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM30:.+]].s" +// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM35:.+]].s" // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s" // @@ -125,7 +106,7 @@ // RUN: | FileCheck -check-prefix=HBIN %s // HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: // HBIN-NOT: cuda-bindings-device-cuda-nvptx64 -// HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out" +// HBIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out" // // Test one or more gpu architecture up to the assemble phase in host-only @@ -163,7 +144,7 @@ // Test two gpu architectures with complete compilation in device-only // compilation mode. // -// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \ +// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \ // RUN: | FileCheck -check-prefix=DBIN2 %s // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: @@ -177,7 +158,7 @@ // Test two gpu architectures up to the assemble phase in device-only // compilation mode. // -// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \ +// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \ // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \ // RUN: | FileCheck -check-prefix=DASM2 %s // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s" diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu index 67facf77f6c68a..db6536ca9e03b4 100644 --- a/clang/test/Driver/cuda-options.cu +++ b/clang/test/Driver/cuda-options.cu @@ -2,13 +2,13 @@ // Simple compilation case. Compile device-side to PTX assembly and make sure // we use it on the host side. -// RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \ +// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ // RUN: -check-prefix NOLINK %s // Typical compilation + link case. -// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \ +// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ // RUN: -check-prefix LINK %s @@ -33,7 +33,7 @@ // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \ -// RUN: --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \ +// RUN: --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s @@ -47,13 +47,13 @@ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix NOHOST -check-prefix NOLINK %s -// RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \ +// RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-host-only \ // RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ // RUN: -check-prefix LINK %s -// RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ +// RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-device-only \ // RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ @@ -61,14 +61,14 @@ // Verify that --cuda-gpu-arch option passes the correct GPU architecture to // device compilation. -// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \ +// RUN: %clang -### -nogpulib -nogpuinc --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \ // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ // RUN: -check-prefix DEVICE-SM52 -check-prefix HOST \ // RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s // Verify that there is one device-side compilation per --cuda-gpu-arch args // and that all results are included on the host side. -// RUN: %clang -### --target=x86_64-linux-gnu \ +// RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu \ // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \ // RUN: -check-prefixes DEVICE-SM52,DEVICE2-SM60 \ @@ -128,9 +128,9 @@ // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ -// RUN: --no-cuda-gpu-arch=all \ +// RUN: --no-cuda-version-check --no-cuda-gpu-arch=all \ // RUN: --cuda-gpu-arch=sm_70 \ -// RUN: -c -nogpulib -nogpuinc %s 2>&1 \ +// RUN: -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s // g) There's no --cuda-gpu-arch=all @@ -141,9 +141,9 @@ // Verify that --[no-]cuda-include-ptx arguments are handled correctly. -// a) by default we're including PTX for all GPUs. +// a) by default we're not including PTX for all GPUs. // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \ -// RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ +// RUN: --cuda-include-ptx=all --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ // RUN: -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s @@ -157,12 +157,12 @@ // c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only. // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \ // RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ -// RUN: --no-cuda-include-ptx=sm_60 \ +// RUN: --no-cuda-include-ptx=sm_60 --cuda-include-ptx=sm_52 \ // RUN: -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM60,PTX-SM52 %s // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \ // RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ -// RUN: --no-cuda-include-ptx=sm_52 \ +// RUN: --no-cuda-include-ptx=sm_52 --cuda-include-ptx=sm_60 \ // RUN: -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,NOPTX-SM52 %s @@ -183,8 +183,8 @@ // Verify -flto=thin -fwhole-program-vtables handling. This should result in // both options being passed to the host compilation, with neither passed to // the device compilation. -// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \ -// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s +// RUN: %clang -### --cuda-include-ptx=sm_60 --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \ +// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,NOLINK,THINLTOWPD %s // THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' // ARCH-SM52: "-cc1"{{.*}}"-target-cpu" "sm_52" diff --git a/clang/test/Driver/cuda-output-asm.cu b/clang/test/Driver/cuda-output-asm.cu index 6b944d18917247..9d5b86bcbc1b46 100644 --- a/clang/test/Driver/cuda-output-asm.cu +++ b/clang/test/Driver/cuda-output-asm.cu @@ -17,13 +17,9 @@ // SM30-DAG: "-cc1" "-triple" "nvptx64-nvidia-cuda" // SM30-same: "-target-cpu" "sm_30" -// RUN: not %clang -### -S --target=x86_64-linux-gnu -o foo.s %s 2>&1 \ -// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s // RUN: not %clang -### -S --target=x86_64-linux-gnu --cuda-device-only \ // RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 -o foo.s %s 2>&1 \ // RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s -// RUN: not %clang -### -emit-llvm -c --target=x86_64-linux-gnu -o foo.s %s 2>&1 \ -// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s // MULTIPLE-OUTPUT-FILES: error: cannot specify -o when generating multiple output files // Make sure we do not get duplicate diagnostics. // MULTIPLE-OUTPUT-FILES-NOT: error: cannot specify -o when generating multiple output files _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits