tra updated this revision to Diff 114206.
tra added a comment.

Added tests for sm_70 support.


https://reviews.llvm.org/D37576

Files:
  clang/include/clang/Basic/Cuda.h
  clang/lib/Basic/Cuda.cpp
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Headers/__clang_cuda_runtime_wrapper.h
  clang/test/Driver/cuda-arch-translation.cu
  llvm/lib/Target/NVPTX/NVPTX.td
  llvm/test/CodeGen/NVPTX/sm-version-70.ll

Index: llvm/test/CodeGen/NVPTX/sm-version-70.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/NVPTX/sm-version-70.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_70 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 | FileCheck %s
+
+; CHECK: .version 6.0
+; CHECK: .target sm_70
Index: llvm/lib/Target/NVPTX/NVPTX.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTX.td
+++ llvm/lib/Target/NVPTX/NVPTX.td
@@ -50,6 +50,8 @@
                              "Target SM 6.1">;
 def SM62 : SubtargetFeature<"sm_62", "SmVersion", "62",
                              "Target SM 6.2">;
+def SM70 : SubtargetFeature<"sm_70", "SmVersion", "70",
+                             "Target SM 7.0">;
 
 def SATOM : SubtargetFeature<"satom", "HasAtomScope", "true",
                              "Atomic operations with scope">;
@@ -67,6 +69,8 @@
                              "Use PTX version 4.3">;
 def PTX50 : SubtargetFeature<"ptx50", "PTXVersion", "50",
                              "Use PTX version 5.0">;
+def PTX60 : SubtargetFeature<"ptx60", "PTXVersion", "60",
+                             "Use PTX version 6.0">;
 
 //===----------------------------------------------------------------------===//
 // NVPTX supported processors.
@@ -87,6 +91,7 @@
 def : Proc<"sm_60", [SM60, PTX50, SATOM]>;
 def : Proc<"sm_61", [SM61, PTX50, SATOM]>;
 def : Proc<"sm_62", [SM62, PTX50, SATOM]>;
+def : Proc<"sm_70", [SM70, PTX60, SATOM]>;
 
 def NVPTXInstrInfo : InstrInfo {
 }
Index: clang/test/Driver/cuda-arch-translation.cu
===================================================================
--- clang/test/Driver/cuda-arch-translation.cu
+++ clang/test/Driver/cuda-arch-translation.cu
@@ -5,26 +5,36 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target
 
-// CHECK:fatbinary
-
 // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM20 %s
 // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_21 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM21 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM21 %s
 // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_30 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM30 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM30 %s
 // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_32 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM32 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM32 %s
 // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_35 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM35 %s
 // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_37 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM37 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM37 %s
 // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_50 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM50 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM50 %s
 // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_52 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM52 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM52 %s
 // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_53 %s 2>&1 \
-// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM53 %s
+// RUN: | FileCheck -check-prefixes=COMMON,SM53 %s
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_60 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=COMMON,SM60 %s
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_61 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=COMMON,SM61 %s
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_62 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=COMMON,SM62 %s
+// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefixes=COMMON,SM70 %s
+
+// COMMON: ptxas
+// COMMON-SAME: -m64
+// COMMON: fatbinary
 
 // SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20
 // SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20
@@ -35,3 +45,7 @@
 // SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50
 // SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52
 // SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53
+// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60
+// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
+// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
+// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
Index: clang/lib/Headers/__clang_cuda_runtime_wrapper.h
===================================================================
--- clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -62,7 +62,7 @@
 #include "cuda.h"
 #if !defined(CUDA_VERSION)
 #error "cuda.h did not define CUDA_VERSION"
-#elif CUDA_VERSION < 7000 || CUDA_VERSION > 8000
+#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9000
 #error "Unsupported CUDA version!"
 #endif
 
@@ -86,14 +86,19 @@
 #define __COMMON_FUNCTIONS_H__
 
 #undef __CUDACC__
+#if CUDA_VERSION < 9000
 #define __CUDABE__
+#else
+#define __CUDA_LIBDEVICE__
+#endif
 // Disables definitions of device-side runtime support stubs in
 // cuda_device_runtime_api.h
 #include "driver_types.h"
 #include "host_config.h"
 #include "host_defines.h"
 
 #undef __CUDABE__
+#undef __CUDA_LIBDEVICE__
 #define __CUDACC__
 #include "cuda_runtime.h"
 
@@ -105,7 +110,9 @@
 #define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)
 #define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)
 
+#if CUDA_VERSION < 9000
 #include "crt/device_runtime.h"
+#endif
 #include "crt/host_runtime.h"
 // device_runtime.h defines __cxa_* macros that will conflict with
 // cxxabi.h.
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -49,6 +49,8 @@
     return CudaVersion::CUDA_75;
   if (Major == 8 && Minor == 0)
     return CudaVersion::CUDA_80;
+  if (Major == 9 && Minor == 0)
+    return CudaVersion::CUDA_90;
   return CudaVersion::UNKNOWN;
 }
 
@@ -112,43 +114,55 @@
       Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
     }
 
-    std::error_code EC;
-    for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
-         !EC && LI != LE; LI = LI.increment(EC)) {
-      StringRef FilePath = LI->path();
-      StringRef FileName = llvm::sys::path::filename(FilePath);
-      // Process all bitcode filenames that look like libdevice.compute_XX.YY.bc
-      const StringRef LibDeviceName = "libdevice.";
-      if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
-        continue;
-      StringRef GpuArch = FileName.slice(
-          LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
-      LibDeviceMap[GpuArch] = FilePath.str();
-      // Insert map entries for specifc devices with this compute
-      // capability. NVCC's choice of the libdevice library version is
-      // rather peculiar and depends on the CUDA version.
-      if (GpuArch == "compute_20") {
-        LibDeviceMap["sm_20"] = FilePath;
-        LibDeviceMap["sm_21"] = FilePath;
-        LibDeviceMap["sm_32"] = FilePath;
-      } else if (GpuArch == "compute_30") {
-        LibDeviceMap["sm_30"] = FilePath;
-        if (Version < CudaVersion::CUDA_80) {
-          LibDeviceMap["sm_50"] = FilePath;
-          LibDeviceMap["sm_52"] = FilePath;
-          LibDeviceMap["sm_53"] = FilePath;
-        }
-        LibDeviceMap["sm_60"] = FilePath;
-        LibDeviceMap["sm_61"] = FilePath;
-        LibDeviceMap["sm_62"] = FilePath;
-      } else if (GpuArch == "compute_35") {
-        LibDeviceMap["sm_35"] = FilePath;
-        LibDeviceMap["sm_37"] = FilePath;
-      } else if (GpuArch == "compute_50") {
-        if (Version >= CudaVersion::CUDA_80) {
-          LibDeviceMap["sm_50"] = FilePath;
-          LibDeviceMap["sm_52"] = FilePath;
-          LibDeviceMap["sm_53"] = FilePath;
+    if (Version == CudaVersion::CUDA_90) {
+      // CUDA-9 uses single libdevice file for all GPU variants.
+      std::string FilePath = LibDevicePath + "/libdevice.10.bc";
+      if (FS.exists(FilePath)) {
+        for (const char *GpuArch :
+             {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
+              "sm_60", "sm_61", "sm_62", "sm_70"})
+          LibDeviceMap[GpuArch] = FilePath;
+      }
+    } else {
+      std::error_code EC;
+      for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
+           !EC && LI != LE; LI = LI.increment(EC)) {
+        StringRef FilePath = LI->path();
+        StringRef FileName = llvm::sys::path::filename(FilePath);
+        // Process all bitcode filenames that look like
+        // libdevice.compute_XX.YY.bc
+        const StringRef LibDeviceName = "libdevice.";
+        if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
+          continue;
+        StringRef GpuArch = FileName.slice(
+            LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
+        LibDeviceMap[GpuArch] = FilePath.str();
+        // Insert map entries for specifc devices with this compute
+        // capability. NVCC's choice of the libdevice library version is
+        // rather peculiar and depends on the CUDA version.
+        if (GpuArch == "compute_20") {
+          LibDeviceMap["sm_20"] = FilePath;
+          LibDeviceMap["sm_21"] = FilePath;
+          LibDeviceMap["sm_32"] = FilePath;
+        } else if (GpuArch == "compute_30") {
+          LibDeviceMap["sm_30"] = FilePath;
+          if (Version < CudaVersion::CUDA_80) {
+            LibDeviceMap["sm_50"] = FilePath;
+            LibDeviceMap["sm_52"] = FilePath;
+            LibDeviceMap["sm_53"] = FilePath;
+          }
+          LibDeviceMap["sm_60"] = FilePath;
+          LibDeviceMap["sm_61"] = FilePath;
+          LibDeviceMap["sm_62"] = FilePath;
+        } else if (GpuArch == "compute_35") {
+          LibDeviceMap["sm_35"] = FilePath;
+          LibDeviceMap["sm_37"] = FilePath;
+        } else if (GpuArch == "compute_50") {
+          if (Version >= CudaVersion::CUDA_80) {
+            LibDeviceMap["sm_50"] = FilePath;
+            LibDeviceMap["sm_52"] = FilePath;
+            LibDeviceMap["sm_53"] = FilePath;
+          }
         }
       }
     }
Index: clang/lib/Basic/Targets/NVPTX.cpp
===================================================================
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -183,6 +183,8 @@
         return "610";
       case CudaArch::SM_62:
         return "620";
+      case CudaArch::SM_70:
+        return "700";
       }
       llvm_unreachable("unhandled CudaArch");
     }();
Index: clang/lib/Basic/Cuda.cpp
===================================================================
--- clang/lib/Basic/Cuda.cpp
+++ clang/lib/Basic/Cuda.cpp
@@ -16,6 +16,8 @@
     return "7.5";
   case CudaVersion::CUDA_80:
     return "8.0";
+  case CudaVersion::CUDA_90:
+    return "9.0";
   }
   llvm_unreachable("invalid enum");
 }
@@ -48,6 +50,8 @@
     return "sm_61";
   case CudaArch::SM_62:
     return "sm_62";
+  case CudaArch::SM_70:
+    return "sm_70";
   }
   llvm_unreachable("invalid enum");
 }
@@ -66,6 +70,7 @@
       .Case("sm_60", CudaArch::SM_60)
       .Case("sm_61", CudaArch::SM_61)
       .Case("sm_62", CudaArch::SM_62)
+      .Case("sm_70", CudaArch::SM_70)
       .Default(CudaArch::UNKNOWN);
 }
 
@@ -95,6 +100,8 @@
     return "compute_61";
   case CudaVirtualArch::COMPUTE_62:
     return "compute_62";
+  case CudaVirtualArch::COMPUTE_70:
+    return "compute_70";
   }
   llvm_unreachable("invalid enum");
 }
@@ -112,6 +119,7 @@
       .Case("compute_60", CudaVirtualArch::COMPUTE_60)
       .Case("compute_61", CudaVirtualArch::COMPUTE_61)
       .Case("compute_62", CudaVirtualArch::COMPUTE_62)
+      .Case("compute_70", CudaVirtualArch::COMPUTE_70)
       .Default(CudaVirtualArch::UNKNOWN);
 }
 
@@ -142,6 +150,8 @@
     return CudaVirtualArch::COMPUTE_61;
   case CudaArch::SM_62:
     return CudaVirtualArch::COMPUTE_62;
+  case CudaArch::SM_70:
+    return CudaVirtualArch::COMPUTE_70;
   }
   llvm_unreachable("invalid enum");
 }
@@ -164,6 +174,8 @@
   case CudaArch::SM_61:
   case CudaArch::SM_62:
     return CudaVersion::CUDA_80;
+  case CudaArch::SM_70:
+    return CudaVersion::CUDA_90;
   }
   llvm_unreachable("invalid enum");
 }
Index: clang/include/clang/Basic/Cuda.h
===================================================================
--- clang/include/clang/Basic/Cuda.h
+++ clang/include/clang/Basic/Cuda.h
@@ -21,6 +21,7 @@
   CUDA_70,
   CUDA_75,
   CUDA_80,
+  CUDA_90,
 };
 const char *CudaVersionToString(CudaVersion V);
 
@@ -41,6 +42,7 @@
   SM_60,
   SM_61,
   SM_62,
+  SM_70,
 };
 const char *CudaArchToString(CudaArch A);
 
@@ -60,6 +62,7 @@
   COMPUTE_60,
   COMPUTE_61,
   COMPUTE_62,
+  COMPUTE_70,
 };
 const char *CudaVirtualArchToString(CudaVirtualArch A);
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to