https://github.com/bratpiorka updated https://github.com/llvm/llvm-project/pull/170679
From c0b4ba3ae32f436be7f5dd8c0ad1c0943965f008 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki <[email protected]> Date: Thu, 4 Dec 2025 09:53:57 +0100 Subject: [PATCH 1/4] bump default SM for CUDA to 75 --- clang/include/clang/Basic/OffloadArch.h | 2 +- mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h | 4 ++-- mlir/include/mlir/Dialect/GPU/Transforms/Passes.td | 4 ++-- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 8 ++++---- mlir/test/python/dialects/gpu/dialect.py | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h index befb69ff22d49..08e50698d72ef 100644 --- a/clang/include/clang/Basic/OffloadArch.h +++ b/clang/include/clang/Basic/OffloadArch.h @@ -113,7 +113,7 @@ enum class OffloadArch { BMG_G21, LAST, - CudaDefault = OffloadArch::SM_52, + CudaDefault = OffloadArch::SM_75, HIPDefault = OffloadArch::GFX906, }; diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h index 34c85de3418ec..f27c9048c63e9 100644 --- a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h @@ -28,11 +28,11 @@ struct GPUToNVVMPipelineOptions llvm::cl::init("nvptx64-nvidia-cuda")}; PassOptions::Option<std::string> cubinChip{ *this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."), - llvm::cl::init("sm_50")}; + llvm::cl::init("sm_75")}; PassOptions::Option<std::string> cubinFeatures{ *this, "cubin-features", llvm::cl::desc("Features to use to serialize to cubin."), - llvm::cl::init("+ptx60")}; + llvm::cl::init("+ptx63")}; PassOptions::Option<std::string> cubinFormat{ *this, "cubin-format", llvm::cl::desc("Compilation format to use to serialize to cubin."), diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td index 0c8a0c7a677ab..5a3e50cb1b6b4 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td @@ -143,10 +143,10 @@ def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> { /*default=*/ "\"nvptx64-nvidia-cuda\"", "Target triple.">, Option<"chip", "chip", "std::string", - /*default=*/"\"sm_50\"", + /*default=*/"\"sm_75\"", "Target chip.">, Option<"features", "features", "std::string", - /*default=*/"\"+ptx60\"", + /*default=*/"\"+ptx63\"", "Target features.">, Option<"optLevel", "O", "unsigned", /*default=*/"2", diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index c4eb4872af2c6..d323aa08274cc 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -5849,8 +5849,8 @@ def NVVM_TargetAttr : NVVM_Attr<"NVVMTarget", "target", let parameters = (ins DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O, StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple, - StringRefParameter<"Target chip.", "\"sm_50\"">:$chip, - StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features, + StringRefParameter<"Target chip.", "\"sm_75\"">:$chip, + StringRefParameter<"Target chip features.", "\"+ptx63\"">:$features, OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags, OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link, DefaultValuedParameter<"bool", "true", "Perform SM version check on Ops.">:$verifyTarget @@ -5861,8 +5861,8 @@ def NVVM_TargetAttr : NVVM_Attr<"NVVMTarget", "target", let builders = [ AttrBuilder<(ins CArg<"int", "2">:$optLevel, CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple, - CArg<"StringRef", "\"sm_50\"">:$chip, - CArg<"StringRef", "\"+ptx60\"">:$features, + CArg<"StringRef", "\"sm_75\"">:$chip, + CArg<"StringRef", "\"+ptx63\"">:$features, CArg<"DictionaryAttr", "nullptr">:$targetFlags, CArg<"ArrayAttr", "nullptr">:$linkFiles, CArg<"bool", "true">:$verifyTarget), [{ diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index 1a009b7dfa30d..ba1746c402743 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -51,10 +51,10 @@ def testObjectAttr(): print(o) object = ( - b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.0\n.target sm_50" + b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.3\n.target sm_75" ) o = gpu.ObjectAttr.get(target, format, object) - # CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50"> + # CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.3\0A.target sm_75"> print(o) assert o.object == object From cf28420e72d39d74b404a92675213fd0abf02a1e Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki <[email protected]> Date: Thu, 4 Dec 2025 09:55:03 +0100 Subject: [PATCH 2/4] make CUDA 10.0 the default toolkit for tests --- .../CUDA/v10.0/bin/.keep | 0 .../CUDA/v10.0/bin/version.txt | 2 ++ .../CUDA/v10.0/include/.keep | 0 .../CUDA/v10.0/lib/.keep | 0 .../CUDA/v10.0/nvvm/libdevice/libdevice.10.bc | 0 .../nvvm/libdevice/libdevice.compute_30.10.bc | 0 .../nvvm/libdevice/libdevice.compute_35.10.bc | 0 .../nvvm/libdevice/libdevice.compute_50.10.bc | 0 .../CUDA/v10.0/version.txt | 2 ++ .../Inputs/CUDA/usr/local/cuda/bin/fatbinary | 0 .../Inputs/CUDA/usr/local/cuda/include/cuda.h | 7 +++++++ .../local/cuda/nvvm/libdevice/libdevice.10.bc | 0 .../nvvm/libdevice/libdevice.compute_20.10.bc | 0 .../nvvm/libdevice/libdevice.compute_50.10.bc | 0 .../Inputs/CUDA_100/usr/local/cuda/bin/.keep | 0 .../Inputs/CUDA_100/usr/local/cuda/include/.keep | 0 .../CUDA_100/usr/local/cuda/include/cuda.h | 7 +++++++ .../Inputs/CUDA_100/usr/local/cuda/lib/.keep | 0 .../Inputs/CUDA_100/usr/local/cuda/lib64/.keep | 0 .../local/cuda/nvvm/libdevice/libdevice.10.bc | 0 .../nvvm/libdevice/libdevice.compute_20.10.bc | 0 .../nvvm/libdevice/libdevice.compute_30.10.bc | 0 .../nvvm/libdevice/libdevice.compute_35.10.bc | 0 .../nvvm/libdevice/libdevice.compute_50.10.bc | 0 .../Inputs/CUDA_70/usr/local/cuda/bin/.keep | 0 .../Inputs/CUDA_70/usr/local/cuda/include/.keep | 0 .../Inputs/CUDA_70/usr/local/cuda/include/cuda.h | 7 +++++++ .../Inputs/CUDA_70/usr/local/cuda/lib/.keep | 0 .../Inputs/CUDA_70/usr/local/cuda/lib64/.keep | 0 .../nvvm/libdevice/libdevice.compute_20.10.bc | 0 .../nvvm/libdevice/libdevice.compute_30.10.bc | 0 .../nvvm/libdevice/libdevice.compute_35.10.bc | 0 .../nvvm/libdevice/libdevice.compute_50.10.bc | 0 clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o | Bin 0 -> 3736 bytes 34 files changed, 25 insertions(+) create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/.keep create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/version.txt create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include/.keep create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/.keep create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_30.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_35.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_50.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/version.txt create mode 100755 clang/test/Driver/Inputs/CUDA/usr/local/cuda/bin/fatbinary create mode 100644 clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h create mode 100644 clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/bin/.keep create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/.keep create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib/.keep create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib64/.keep create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/bin/.keep create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/.keep create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib/.keep create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib64/.keep create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc create mode 100644 clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc create mode 100644 clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/.keep b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/version.txt b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/version.txt new file mode 100644 index 0000000000000..16a5a2337f83f --- /dev/null +++ b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/version.txt @@ -0,0 +1,2 @@ +CUDA Version 10.0.130 + diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include/.keep b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/include/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/.keep b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/lib/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_30.10.bc b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_30.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_35.10.bc b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_35.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/nvvm/libdevice/libdevice.compute_50.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/version.txt b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/version.txt new file mode 100644 index 0000000000000..16a5a2337f83f --- /dev/null +++ b/clang/test/Driver/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/version.txt @@ -0,0 +1,2 @@ +CUDA Version 10.0.130 + diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/bin/fatbinary b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/bin/fatbinary new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h new file mode 100644 index 0000000000000..c576bebd470dc --- /dev/null +++ b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/include/cuda.h @@ -0,0 +1,7 @@ +// +// Placeholder file for testing CUDA version detection +// + +#define CUDA_VERSION 10000 + +// diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/bin/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/bin/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h new file mode 100644 index 0000000000000..c576bebd470dc --- /dev/null +++ b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/include/cuda.h @@ -0,0 +1,7 @@ +// +// Placeholder file for testing CUDA version detection +// + +#define CUDA_VERSION 10000 + +// diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib64/.keep b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/lib64/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA_100/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/bin/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/bin/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h new file mode 100644 index 0000000000000..558f2e2d02093 --- /dev/null +++ b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/include/cuda.h @@ -0,0 +1,7 @@ +// +// Placeholder file for testing CUDA version detection +// + +#define CUDA_VERSION 7000 + +// diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib64/.keep b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/lib64/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_30.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc b/clang/test/Driver/Inputs/CUDA_70/usr/local/cuda/nvvm/libdevice/libdevice.compute_50.10.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o b/clang/test/Driver/Inputs/SYCL/objnvptx64-sm_75.o new file mode 100644 index 0000000000000000000000000000000000000000..bfac6d6de63cc8fc8da5cc948350dcae84d15ec6 GIT binary patch literal 3736 zcmcf^ZA@F&^%;YoN!({Ap)GD~_c^eLwlSDTLTqd%e<TiTY2$oN*+gah8N6WguwRUE ztJcOek7}0YMMy(crWq~jvZ<QL6iu0wO~9knrB>>qsX{G6c4G*sXhPGhs<cSmxyD`z zOQrpocBFgHJ?EbD?z!j7*RLP$v{dkTSQ&WO53%p4G8e<fwtu%a6qg1}1d^vZ-duj~ z@B_qYo_v`L4mx{PE_cZ5boY84s-C_egJI5IyZ;OsatDImK0irOjp|1A881UN^e_w^ zY-?%)qUW5W(b?x~as=!Q?d=PkwKw(lhJ1}kj#9T$R4e85*r}ES%?H&7w9RgZUE8c_ z)jHJF0XOAv9&|Rl?azP_HAUMnj2AhM@xl_`ZV@g_!i8kOM+1QvO-4d;zQX*N;CT>0 z^!LC~;8$W_e*36xK34G`?3aPFa}&RK7VyZDB+__D%Vslo8eckMJ8H3X9yK1anNGcQ zsPnMRhFqr#Yg%os`&H-sXZ?KxepRp6e=e*#<3EQrGVU;gA&>doeptBNH26D+G43E^ z3;HzbMkh^UKD*bCHTwG)cjJ+lPN{;7-FX&kM1Hl|T`ma68LTnj?zK0%du*P7-RH&{ zgG_+2I{***5dJ>vigEUbXeJEi`9ofp*RFD&bJ<lvpY7lQ%w(QiIsl=lg4;So>iQRd z^0&8r|Gav1ce?%%kE+6yB8U*UVS9zL=#4!fhsWxEX-rrCNFbrK7kI;>A5;mYiprlm zqN2kUrVHCmE(ukm5!Sx2jiN?6CR|GM`H{*aGRjonpBC<@f5vb=A$fsExrT)oWMrLr zWEqo6O_kDT8=g%b>muv^VmL37a`*?2OQ}Sn9+y&%ecE(#M_qd72qABC9XgVh>@iy^ zq)%aQl3*9uv&T~@`IBkreaUf{lKs=xj&0R@&9hY|YOhfARvi|9zhZpbM}|omuL0Y& zcVGe!r|?%N_-#G>feDr&Sw_gV*0K?jr57gf<*Z=68;_{*Knf3%xGzbB40xm)XR?CZ z5<C#aBgGl`0V^I#;b_hZzLubMe>d*y7K0X2&kF7onTQ1S)Q;(bHS*D?>rehw|GU?3 z`DB>`=Q;M|<iU4LaD@$!U{8}*jH$~p3^P0-#89?ViZO|FM`tooz4yp$rTO{l8k07G z)fjnaDeDo~zfy}~UjS|qMrAX3nWu)=K4R7ka|u%v9}MHcv3}7CCjLS~+>a71B9mfz zN`7Np=22G_#p1g&{PwiDJ;86!gcA&nzaqp3J6M{=BQ!r0W@(a@W_A#3Ng}Tn`_<x` z>%_X1Xpzjwmz1&vrF`11@TlL^U61RsX6;qJ{(4+L%-3bsYeL=lD?RwZ9yS1Tl%<m_ z-HnGD@X$9^@Y#VVYZJ2lwQOLF9UP18yR%N*$r1mUB-Ye?M|xaV7?)*=^P*igr<Bda z<a2hx$4Rp`ZPtzz=WCp1)T|ld>qfaIIP}4g?(clv1SfjW%hIRW03By~@NhTF)Ur&9 zmEJfZzJrPHS&2fDxM>jQNn#CdK{m)2rxeph#k^9s7*j0fWQDx^bDqLedrUK7Zo3%Q zWvun3i*&OCVLUu`tn+?Sj69(DnPSc^TUE-JrsS{j6v$wueBCZM`9@rm;dB$`c5ujp zX??-08v&9_yt@r<$_{`%LOjxd2gYJuHzmZ{B(ZEIzEams%^MYk{F4iL`4Sk=A=OR9 z+porV4|AG?S)1YX*QT^-4(_PWbX@Hh;$T%jIPf$p{dAUC>lS~J6ReZsWwm&j6n||c z{1W1(ngH^voZxnjC|HSa)ODY&*=5r)`9l85HKlw-DW6fwA%d&Qr#<Aab=S@9*ZA7e zxHfb8Iz%5X8v#2z*a4cAesGJps}|>T#BGE4b_!16c>LgH9PVNpgH%BtX0i~<*sxKX z<G^+85UGq|CMBO6m*<R%r74-G-l-<?D86F&$g<k3jGwE;gB`3b#l9K^-7BL!eS*`E za+<8U4bmwC9usvaA2-<*VCs!2`SDndr}K=OxJ!!f<^*@G;```R#BxrsLW=WQxa@BY z@lX#wC}gF_pSs5BAP+B@H5u?BUz?2UGkiTbbCeTV9-D>Kg^U`Py%tm4<3Su}uLvLm z;X4qEmYvWFXsGhb_+SHsZ0f2}Gr`wg=QIy0+OKnMqhMQdv0IT-$_rD9#b^zr!IDui z9aGHb<%>`by?tu&dXl);E%syE^BcCol}Yg>D*@$m4=OQ9+|3bRVb~yvVZBhh7Ze?& zc|;_wcDd5;ld(i4WwBUx#Z0N>mosUh)^tAM7+Muz*fy8y#WvB0`~hc=JzxuZBW`~v z<WmLQo?f?;QF)liLo^+5J5|B{0JG_*zWCdrqWipl#vS;-{o2E6eLlaJhMzY-Ln7-> zcztxQn>0W79C^}YG-6nt(5N1g2<wDXfFwXQ4@qk4gzq+Usmc#T{JQ-uqD(fUZWyMx z$(kFYq0chpz7IvQW^*#5CZ+p1x+g2*{%oXnj-?U{3u~WreX^APd||FDvoL*Q#qvz5 z<E_gVmvVE9vkP4-x%ooZ+FW5}ereHC{RbDK*Sg+b`n)hZXQ_VYWKn%SJ-4`Oss7`u zMQW~4SX}B_otd7tRJUTkho0L6aofHv&;Y#*kDR8*png#5YqQ+YifnfLu~o>4DO6?~ zJ>rS|H@k^jR%kuq0%}0wTUO%!L^Oty40XmZ3ssH!!fE;<r@6}0U5dAlausKx$;uY< zvPDu>u%mf3rkJ+Nvg5M!Mlsu`c;fqzWKa)LV*AKOQB{4Yff8DPM#(`*>!5=`e^2vu z`x>DB=izJc%KAuxVUSW7p3z^5>n=hg<+rEedT6Ri4z{s=a^v+w5t2QXKqGL&Ti|KH zJNcvQh~~pUGzFs*2GTnX0|CN_rV#;f48|@PE*LvuAUy=-eY8gMWhn0>{4Id6eq-ll zut5n+u;FXOvHi4JW3vTmZy;prbN1Tk03@5odaA{Ef_lLj`R)n(-#$$r@l<MJe00;p z@3jqM{wMP<;48?JTOnF<$H!YXHhT$e9fW~+D4LtWP$J6j+VY;HyeMG{;L6T#`_x<b zM@w}1onGE9pI^r9uz}WF^_?Y!vt`8+F6))C1Ni0jX3-m)iB0RRvTtnY)9^@r2(l$g z^AHy+zN$8}qClVwH^A5oJ*9)7*o#mNGx9Ndl_4ne1F+_U{{Tf{{2P9}#UP}QV5|MV z0^SzUqMWcK*j=PH^iZB`J^werD~U!+qD53bUtd1Bw7K<sL_bRZLg|0xqvSWA^Dicp B-+TZ7 literal 0 HcmV?d00001 From 78e725d44c982b704aaf6eaf0ee6dee7f15dab28 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki <[email protected]> Date: Thu, 4 Dec 2025 16:27:05 +0100 Subject: [PATCH 3/4] x --- clang/tools/clang-repl/ClangRepl.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 4 ++-- llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/tools/clang-repl/ClangRepl.cpp b/clang/tools/clang-repl/ClangRepl.cpp index 066f526cba9ae..a70f6a3ded174 100644 --- a/clang/tools/clang-repl/ClangRepl.cpp +++ b/clang/tools/clang-repl/ClangRepl.cpp @@ -297,7 +297,7 @@ int main(int argc, const char **argv) { CB.SetCudaSDK(CudaPath); if (OffloadArch.empty()) { - OffloadArch = "sm_35"; + OffloadArch = "sm_75"; } CB.SetOffloadArch(OffloadArch); diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index f11d331862081..0ce1df8ce5a42 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -228,9 +228,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { return getFullSmVersion() % 10 == 2 ? PTXVersion >= 88 : hasArchAccelFeatures(); } - // If the user did not provide a target we default to the `sm_30` target. + // If the user did not provide a target we default to the `sm_75` target. std::string getTargetName() const { - return TargetName.empty() ? "sm_30" : TargetName; + return TargetName.empty() ? "sm_75" : TargetName; } bool hasTargetName() const { return !TargetName.empty(); } diff --git a/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml b/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml index 703c93b24dcc0..0b535627eee2b 100644 --- a/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml +++ b/llvm/test/tools/llvm-objdump/Offloading/Inputs/binary.yaml @@ -20,11 +20,11 @@ Members: - Key: "triple" Value: "nvptx64-nvidia-cuda" - Key: "arch" - Value: "sm_52" + Value: "sm_75" - ImageKind: IMG_None OffloadKind: OFK_None String: - Key: "triple" Value: "nvptx64-nvidia-cuda" - Key: "arch" - Value: "sm_70" + Value: "sm_75" From afedfa5584c0fca36d6b1ce3d8ca85d0bec765dd Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki <[email protected]> Date: Thu, 4 Dec 2025 09:56:05 +0100 Subject: [PATCH 4/4] update tests to use CUDA sm_75 / ptx63 as default --- clang/test/Driver/cuda-detect.cu | 35 +++++++++++-------- .../Driver/cuda-flush-denormals-to-zero.cu | 8 ++--- clang/test/Driver/cuda-march.cu | 6 ++-- clang/test/Driver/cuda-options.cu | 6 ++-- clang/test/Driver/cuda-ptxas-path.cu | 4 +-- clang/test/Driver/cuda-short-ptr.cu | 2 +- clang/test/Driver/cuda-version-check.cu | 20 +++++------ clang/test/Driver/cuda-windows.cu | 2 +- clang/test/Driver/lto.cu | 2 +- 9 files changed, 47 insertions(+), 38 deletions(-) diff --git a/clang/test/Driver/cuda-detect.cu b/clang/test/Driver/cuda-detect.cu index 23b6ba2fcc09d..66e1a25e70eda 100644 --- a/clang/test/Driver/cuda-detect.cu +++ b/clang/test/Driver/cuda-detect.cu @@ -60,7 +60,7 @@ // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON \ -// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE30 +// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \ // RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON \ @@ -96,14 +96,14 @@ // Verify that -nocudainc prevents adding include path to CUDA headers. -// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \ // RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \ -// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35 -// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \ +// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10 +// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \ // RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \ -// RUN: -check-prefixes PTX42,LIBDEVICE,LIBDEVICE35 +// RUN: -check-prefixes PTX63,LIBDEVICE,LIBDEVICE10 // We should not add any CUDA include paths if there's no valid CUDA installation // RUN: not %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ @@ -123,10 +123,10 @@ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE // Verify that -nocudalib prevents linking libdevice bitcode in. -// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ +// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_75 \ // RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON -// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_35 \ +// RUN: %clang -### -v --target=i386-apple-macosx --cuda-gpu-arch=sm_75 \ // RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix COMMON @@ -152,10 +152,10 @@ // RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ // RUN: | FileCheck %s -check-prefix CUDA80 -// Verify that if no version file is found, we report the default of 7.0. +// Verify that if no version file is found, we report the default of 10.0. // RUN: %clang -### -v --target=x86_64-linux-gnu --cuda-gpu-arch=sm_50 \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ -// RUN: | FileCheck %s -check-prefix CUDA70 +// RUN: | FileCheck %s -check-prefix CUDA100 // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda // NO-LIBDEVICE: Found CUDA installation: {{.*}}/Inputs/CUDA-nolibdevice/usr/local/cuda @@ -174,6 +174,7 @@ // LIBDEVICE50-SAME: libdevice.compute_50.10.bc // PTX42-SAME: "-target-feature" "+ptx42" // PTX60-SAME: "-target-feature" "+ptx60" +// PTX63-SAME: "-target-feature" "+ptx63" // CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h" // NOCUDAINC-NOT: "-include" "__clang_cuda_runtime_wrapper.h" // CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA{{[_0-9]+}}/usr/local/cuda/include" @@ -188,14 +189,20 @@ // CHECK-CXXINCLUDE-SAME: {{.*}}"-internal-isystem" "{{.+}}/include/c++/4.8" // CHECK-CXXINCLUDE: ld{{.*}}" +// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA70-SAME: -target-sdk-version=7.0 +// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CUDA70-SAME: -target-sdk-version=7.0 +// CUDA70: ld{{.*}}" + // CUDA80: "-cc1" "-triple" "nvptx64-nvidia-cuda" // CUDA80-SAME: -target-sdk-version=8.0 // CUDA80: "-cc1" "-triple" "x86_64-unknown-linux-gnu" // CUDA80-SAME: -target-sdk-version=8.0 // CUDA80: ld{{.*}}" -// CUDA70: "-cc1" "-triple" "nvptx64-nvidia-cuda" -// CUDA70-SAME: -target-sdk-version=7.0 -// CUDA70: "-cc1" "-triple" "x86_64-unknown-linux-gnu" -// CUDA70-SAME: -target-sdk-version=7.0 -// CUDA70: ld{{.*}}" +// CUDA100: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// CUDA100-SAME: -target-sdk-version=10.0 +// CUDA100: "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CUDA100-SAME: -target-sdk-version=10.0 +// CUDA100: ld{{.*}}" diff --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu index ea808f2302fbb..adad6dfe632d3 100644 --- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu +++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu @@ -2,14 +2,14 @@ // -fgpu-flush-denormals-to-zero. This should be translated to // -fdenormal-fp-math-f32=preserve-sign -// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s -// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s // RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fgpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s // RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-gpu-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s // Test alias options -f[no-]cuda-flush-denormals-to-zero -// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s -// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s +// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=FTZ %s +// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck -check-prefix=NOFTZ %s // Test explicit argument, with CUDA offload kind // RUN: %clang -x hip -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fgpu-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s diff --git a/clang/test/Driver/cuda-march.cu b/clang/test/Driver/cuda-march.cu index 2dbb9cdf6f589..7684b1df0d685 100644 --- a/clang/test/Driver/cuda-march.cu +++ b/clang/test/Driver/cuda-march.cu @@ -5,12 +5,12 @@ // RUN: %clang -### --target=x86_64-linux-gnu -c \ // RUN: -nogpulib -nogpuinc -march=haswell %s 2>&1 | FileCheck %s // RUN: %clang -### --target=x86_64-linux-gnu -c \ -// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_52 %s 2>&1 | FileCheck %s +// RUN: -nogpulib -nogpuinc -march=haswell --cuda-gpu-arch=sm_75 %s 2>&1 | FileCheck %s // CHECK: "-cc1"{{.*}} "-triple" "nvptx -// CHECK-SAME: "-target-cpu" "sm_52" +// CHECK-SAME: "-target-cpu" "sm_75" // CHECK: ptxas -// CHECK-SAME: "--gpu-name" "sm_52" +// CHECK-SAME: "--gpu-name" "sm_75" // CHECK: "-cc1"{{.*}} "-target-cpu" "haswell" diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu index fc8e83a2bb279..312556707ef19 100644 --- a/clang/test/Driver/cuda-options.cu +++ b/clang/test/Driver/cuda-options.cu @@ -104,12 +104,12 @@ // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X -// we default to sm_52 -- same as if no --cuda-gpu-arch were passed. +// we default to sm_75 -- same as if no --cuda-gpu-arch were passed. // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_60 \ // RUN: --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_60 \ // RUN: -c %s 2>&1 \ -// RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s +// RUN: | FileCheck -check-prefixes ARCH-SM75,NOARCH-SM60,NOARCH-SM70 %s // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ @@ -193,6 +193,8 @@ // NOARCH-SM60-NOT: "-cc1"{{.*}}"-target-cpu" "sm_60" // ARCH-SM70: "-cc1"{{.*}}"-target-cpu" "sm_70" // NOARCH-SM70-NOT: "-cc1"{{.*}}"-target-cpu" "sm_70" +// ARCH-SM75: "-cc1"{{.*}}"-target-cpu" "sm_75" +// NOARCH-SM75-NOT: "-cc1"{{.*}}"-target-cpu" "sm_75" // ARCHALLERROR: error: unsupported CUDA gpu architecture: all // Match device-side preprocessor and compiler phases with -save-temps. diff --git a/clang/test/Driver/cuda-ptxas-path.cu b/clang/test/Driver/cuda-ptxas-path.cu index f36dcc94558f1..7027984d07b2e 100644 --- a/clang/test/Driver/cuda-ptxas-path.cu +++ b/clang/test/Driver/cuda-ptxas-path.cu @@ -1,8 +1,8 @@ // RUN: %clang -### --target=i386-unknown-linux \ -// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \ +// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: --ptxas-path=/some/path/to/ptxas %s 2>&1 \ // RUN: | FileCheck %s // CHECK-NOT: "ptxas" // CHECK: "/some/path/to/ptxas" -// CHECK-SAME: "--gpu-name" "sm_52" +// CHECK-SAME: "--gpu-name" "sm_75" diff --git a/clang/test/Driver/cuda-short-ptr.cu b/clang/test/Driver/cuda-short-ptr.cu index e0ae4505e0b56..bf3c1c168b922 100644 --- a/clang/test/Driver/cuda-short-ptr.cu +++ b/clang/test/Driver/cuda-short-ptr.cu @@ -1,6 +1,6 @@ // Checks that cuda compilation does the right thing when passed -fcuda-short-ptr -// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 | FileCheck %s +// RUN: %clang -### --target=x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_20 -fcuda-short-ptr -nocudainc -nocudalib --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda %s 2>&1 | FileCheck %s // CHECK: "-mllvm" "--nvptx-short-ptr" // CHECK-SAME: "-fcuda-short-ptr" diff --git a/clang/test/Driver/cuda-version-check.cu b/clang/test/Driver/cuda-version-check.cu index 9eceb928ffabd..4b43012b39483 100644 --- a/clang/test/Driver/cuda-version-check.cu +++ b/clang/test/Driver/cuda-version-check.cu @@ -1,4 +1,4 @@ -// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK @@ -15,33 +15,33 @@ // RUN: --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION_CXX -// The installation at Inputs/CUDA is CUDA 7.0, which doesn't support sm_60. -// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// The installation at Inputs/CUDA_70 is CUDA 7.0, which doesn't support sm_60. +// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 // This should only complain about sm_60, not sm_35. // RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_35 \ -// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 --check-prefix=OK_SM35 // We should get two errors here, one for sm_60 and one for sm_61. // RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_61 \ -// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 --check-prefix=ERR_SM61 // We should still get an error if we pass -nocudainc, because this compilation // would invoke ptxas, and we do a version check on that, too. -// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 -nocudainc --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 // If with -nocudainc and -E, we don't touch the CUDA install, so we // shouldn't get an error. // RUN: %clang --target=x86_64-linux -v -### -E --cuda-device-only --cuda-gpu-arch=sm_60 -nocudainc \ -// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ +// RUN: --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK // --no-cuda-version-check should suppress all of these errors. -// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda 2>&1 \ // RUN: --no-cuda-version-check %s | \ // RUN: FileCheck %s --check-prefix=OK @@ -49,9 +49,9 @@ // therefore we should not get an error in host-only mode. We use the -S here // to avoid the error being produced in case by the assembler tool, which does // the same check. -// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \ +// RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-host-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=OK -// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda -S 2>&1 %s | \ +// RUN: not %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-device-only --cuda-path=%S/Inputs/CUDA_70/usr/local/cuda -S 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=ERR_SM60 // OK-NOT: error: GPU arch diff --git a/clang/test/Driver/cuda-windows.cu b/clang/test/Driver/cuda-windows.cu index 4459e809072d9..64d75ddc223b4 100644 --- a/clang/test/Driver/cuda-windows.cu +++ b/clang/test/Driver/cuda-windows.cu @@ -3,7 +3,7 @@ // RUN: %clang -v --target=i386-pc-windows-mingw32 \ // RUN: --sysroot=%S/Inputs/CUDA-windows 2>&1 %s -### | FileCheck %s -// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0 +// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA-windows/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0 // CHECK: "-cc1" "-triple" "nvptx-nvidia-cuda" // CHECK-SAME: "-fms-extensions" // CHECK-SAME: "-fms-compatibility" diff --git a/clang/test/Driver/lto.cu b/clang/test/Driver/lto.cu index 596e6cfe07379..e4a773b487c6c 100644 --- a/clang/test/Driver/lto.cu +++ b/clang/test/Driver/lto.cu @@ -26,7 +26,7 @@ // llvm-bc and llvm-ll outputs need to match regular suffixes // (unfortunately). -// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda -### 2> %t +// RUN: %clangxx %s --target=x86_64-unknown-linux-gnu --no-offload-new-driver -nocudainc -nocudalib -flto -save-temps --cuda-path=%S/Inputs/CUDA/usr/local/cuda -### 2> %t // RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s // // CHECK-COMPILELINK-SUFFIXES: "-o" "[[CPP:.*lto-host.*\.cui]]" "-x" "cuda" "{{.*}}lto.cu" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
