[PATCH] D44747: Set calling convention for CUDA kernel

Yaxun Liu via Phabricator via cfe-commits Fri, 23 Mar 2018 11:48:06 -0700

yaxunl updated this revision to Diff 139625.
yaxunl retitled this revision from "[AMDGPU] Set calling convention for CUDA 
kernel" to "Set calling convention for CUDA kernel".
yaxunl edited the summary of this revision.
yaxunl added a comment.


Revised by John's comments. Introduce CC_CUDAKernel calling convention in AST, 
which is translated to target calling convention in IR.


https://reviews.llvm.org/D44747

Files:
  include/clang/Basic/Specifiers.h
  lib/AST/ItaniumMangle.cpp
  lib/AST/Type.cpp
  lib/AST/TypePrinter.cpp
  lib/CodeGen/CGCall.cpp
  lib/CodeGen/CGDebugInfo.cpp
  lib/CodeGen/TargetInfo.cpp
  lib/CodeGen/TargetInfo.h
  lib/Sema/SemaOverload.cpp
  lib/Sema/SemaType.cpp
  tools/libclang/CXType.cpp

Index: tools/libclang/CXType.cpp
===================================================================
--- tools/libclang/CXType.cpp
+++ tools/libclang/CXType.cpp
@@ -626,6 +626,8 @@
       TCALLINGCONV(PreserveAll);
     case CC_SpirFunction: return CXCallingConv_Unexposed;
     case CC_OpenCLKernel: return CXCallingConv_Unexposed;
+    case CC_CUDAKernel:
+      return CXCallingConv_Unexposed;
       break;
     }
 #undef TCALLINGCONV
Index: lib/Sema/SemaType.cpp
===================================================================
--- lib/Sema/SemaType.cpp
+++ lib/Sema/SemaType.cpp
@@ -3316,6 +3316,18 @@
   CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic,
                                                          IsCXXInstanceMethod);
 
+  // Attribute AT_CUDAGlobal affects the calling convention for AMDGPU targets.
+  // This is the simplest place to infer calling convention for CUDA kernels.
+  if (S.getLangOpts().CUDA && S.getLangOpts().CUDAIsDevice) {
+    for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList();
+         Attr; Attr = Attr->getNext()) {
+      if (Attr->getKind() == AttributeList::AT_CUDAGlobal) {
+        CC = CC_CUDAKernel;
+        break;
+      }
+    }
+  }
+
   // Attribute AT_OpenCLKernel affects the calling convention for SPIR
   // and AMDGPU targets, hence it cannot be treated as a calling
   // convention attribute. This is the simplest place to infer
Index: lib/Sema/SemaOverload.cpp
===================================================================
--- lib/Sema/SemaOverload.cpp
+++ lib/Sema/SemaOverload.cpp
@@ -1482,6 +1482,15 @@
       Changed = true;
     }
 
+    // Drop cuda_kernel calling convention since function pointer can only
+    // be used in host code.
+    if (getLangOpts().CUDA && FromFPT->getCallConv() == CC_CUDAKernel &&
+        FromFPT->getCallConv() != ToFPT->getCallConv()) {
+      FromFn = Context.adjustFunctionType(
+          FromFn, FromEInfo.withCallingConv(ToFPT->getCallConv()));
+      Changed = true;
+    }
+
     // Convert FromFPT's ExtParameterInfo if necessary. The conversion is valid
     // only if the ExtParameterInfo lists of the two function prototypes can be
     // merged and the merged list is identical to ToFPT's ExtParameterInfo list.
Index: lib/CodeGen/TargetInfo.h
===================================================================
--- lib/CodeGen/TargetInfo.h
+++ lib/CodeGen/TargetInfo.h
@@ -223,6 +223,9 @@
   /// Get LLVM calling convention for OpenCL kernel.
   virtual unsigned getOpenCLKernelCallingConv() const;
 
+  /// Get LLVM calling convention for CUDA kernel.
+  virtual unsigned getCUDAKernelCallingConv() const;
+
   /// Get target specific null pointer.
   /// \param T is the LLVM type of the null pointer.
   /// \param QT is the clang QualType of the null pointer.
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -431,6 +431,10 @@
   return llvm::CallingConv::SPIR_KERNEL;
 }
 
+unsigned TargetCodeGenInfo::getCUDAKernelCallingConv() const {
+  return llvm::CallingConv::C;
+}
+
 llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
     llvm::PointerType *T, QualType QT) const {
   return llvm::ConstantPointerNull::get(T);
@@ -7630,6 +7634,7 @@
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
+  unsigned getCUDAKernelCallingConv() const override;
 
   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
       llvm::PointerType *T, QualType QT) const override;
@@ -7711,6 +7716,10 @@
   return llvm::CallingConv::AMDGPU_KERNEL;
 }
 
+unsigned AMDGPUTargetCodeGenInfo::getCUDAKernelCallingConv() const {
+  return llvm::CallingConv::AMDGPU_KERNEL;
+}
+
 // Currently LLVM assumes null pointers always have value 0,
 // which results in incorrectly transformed IR. Therefore, instead of
 // emitting null pointers in private and local address spaces, a null
Index: lib/CodeGen/CGDebugInfo.cpp
===================================================================
--- lib/CodeGen/CGDebugInfo.cpp
+++ lib/CodeGen/CGDebugInfo.cpp
@@ -1022,6 +1022,9 @@
     return llvm::dwarf::DW_CC_LLVM_PreserveAll;
   case CC_X86RegCall:
     return llvm::dwarf::DW_CC_LLVM_X86RegCall;
+  case CC_CUDAKernel:
+    // ToDo: Add llvm::dwarf::DW_CC_LLVM_CUDAKernel;
+    return 0;
   }
   return 0;
 }
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -64,6 +64,8 @@
   case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
   case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
   case CC_Swift: return llvm::CallingConv::Swift;
+  case CC_CUDAKernel:
+    return CGM.getTargetCodeGenInfo().getCUDAKernelCallingConv();
   }
 }
 
Index: lib/AST/TypePrinter.cpp
===================================================================
--- lib/AST/TypePrinter.cpp
+++ lib/AST/TypePrinter.cpp
@@ -778,6 +778,7 @@
       break;
     case CC_SpirFunction:
     case CC_OpenCLKernel:
+    case CC_CUDAKernel:
       // Do nothing. These CCs are not available as attributes.
       break;
     case CC_Swift:
Index: lib/AST/Type.cpp
===================================================================
--- lib/AST/Type.cpp
+++ lib/AST/Type.cpp
@@ -2758,6 +2758,8 @@
   case CC_Swift: return "swiftcall";
   case CC_PreserveMost: return "preserve_most";
   case CC_PreserveAll: return "preserve_all";
+  case CC_CUDAKernel:
+    return "cuda_kernel";
   }
 
   llvm_unreachable("Invalid calling convention.");
Index: lib/AST/ItaniumMangle.cpp
===================================================================
--- lib/AST/ItaniumMangle.cpp
+++ lib/AST/ItaniumMangle.cpp
@@ -2628,6 +2628,7 @@
   case CC_OpenCLKernel:
   case CC_PreserveMost:
   case CC_PreserveAll:
+  case CC_CUDAKernel:
     // FIXME: we should be mangling all of the above.
     return "";
 
Index: include/clang/Basic/Specifiers.h
===================================================================
--- include/clang/Basic/Specifiers.h
+++ include/clang/Basic/Specifiers.h
@@ -231,23 +231,24 @@
 
   /// \brief CallingConv - Specifies the calling convention that a function uses.
   enum CallingConv {
-    CC_C,           // __attribute__((cdecl))
-    CC_X86StdCall,  // __attribute__((stdcall))
-    CC_X86FastCall, // __attribute__((fastcall))
-    CC_X86ThisCall, // __attribute__((thiscall))
+    CC_C,             // __attribute__((cdecl))
+    CC_X86StdCall,    // __attribute__((stdcall))
+    CC_X86FastCall,   // __attribute__((fastcall))
+    CC_X86ThisCall,   // __attribute__((thiscall))
     CC_X86VectorCall, // __attribute__((vectorcall))
-    CC_X86Pascal,   // __attribute__((pascal))
-    CC_Win64,       // __attribute__((ms_abi))
-    CC_X86_64SysV,  // __attribute__((sysv_abi))
-    CC_X86RegCall, // __attribute__((regcall))
-    CC_AAPCS,       // __attribute__((pcs("aapcs")))
-    CC_AAPCS_VFP,   // __attribute__((pcs("aapcs-vfp")))
-    CC_IntelOclBicc, // __attribute__((intel_ocl_bicc))
-    CC_SpirFunction, // default for OpenCL functions on SPIR target
-    CC_OpenCLKernel, // inferred for OpenCL kernels
-    CC_Swift,        // __attribute__((swiftcall))
-    CC_PreserveMost, // __attribute__((preserve_most))
-    CC_PreserveAll,  // __attribute__((preserve_all))
+    CC_X86Pascal,     // __attribute__((pascal))
+    CC_Win64,         // __attribute__((ms_abi))
+    CC_X86_64SysV,    // __attribute__((sysv_abi))
+    CC_X86RegCall,    // __attribute__((regcall))
+    CC_AAPCS,         // __attribute__((pcs("aapcs")))
+    CC_AAPCS_VFP,     // __attribute__((pcs("aapcs-vfp")))
+    CC_IntelOclBicc,  // __attribute__((intel_ocl_bicc))
+    CC_SpirFunction,  // default for OpenCL functions on SPIR target
+    CC_OpenCLKernel,  // inferred for OpenCL kernels
+    CC_Swift,         // __attribute__((swiftcall))
+    CC_PreserveMost,  // __attribute__((preserve_most))
+    CC_PreserveAll,   // __attribute__((preserve_all))
+    CC_CUDAKernel,    // inferred for CUDA kernels
   };
 
   /// \brief Checks whether the given calling convention supports variadic

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D44747: Set calling convention for CUDA kernel

Reply via email to