[PATCH] D64015: [WIP][CUDA] Use shared MangleContext for CUDA and CXX CG

2019-07-01 Thread Philip Salzmann via Phabricator via cfe-commits
psalz created this revision.
psalz added reviewers: hliao, tra, aheejin.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

NOTE: This is a work in progress and mainly intended to highlight the issue - 
i.e., I'm not certain the provided solution is appropriate.

Given this CUDA program

  template
  __global__ void run_this(Lambda lambda) {
  lambda();
  }
  
  template
  struct remove_reference {
  using type = T;
  };
  
  template
  struct remove_reference {
  using type = T;
  };
  
  template
  constexpr typename remove_reference::type&& move(T&& t) {
  return static_cast::type&&>(t);
  }
  
  int main() {
  auto foo = move([](){});
  run_this<<<1, 1, 1>>>([]() __device__ { printf("Hello World\n"); }); 
  return 0;
  }

the assertion at the top of `CGNVCUDARuntime::emitDeviceStub` will fail. For 
release builds the effect is simply a `cudaErrorInvalidDeviceFunction` error at 
run time. The reason for this is that the mangled names of the device stub and 
the actual device side function differ: The stub is called 
`_Z8run_thisIZ4mainE3$_1EvT_`, while the device function is 
`_Z8run_thisIZ4mainE3$_0EvT_`. The difference comes down to the anonymous 
struct ID that is maintained and assigned by the `ManglerContext`. It appears 
that for the latter `getAnonymousStructId` is never called for the moved no-op 
lambda, resulting in an ID of 0 for the kernel.

My proposed solution would be to simply share the `ManglerContext` used by the 
`CGNVCUDARuntime` and `CGCXXABI` code generators. For this I've added a new 
`ASTContext::getSharedMangleContext` function that memoizes created manglers 
for the given target ABI. From looking at `ManglerContext` to me at least it 
doesn't look like that could cause any issues, but then again, I really don't 
know much about Clang's internals.

Of course an alternative solution could be to make sure that 
`getAnonymousStructId` is always called for both lambdas (and in the correct 
order), but again I don't really know why that is not happening in the first 
place.


Repository:
  rC Clang

https://reviews.llvm.org/D64015

Files:
  include/clang/AST/ASTContext.h
  lib/AST/ASTContext.cpp
  lib/CodeGen/CGCUDANV.cpp
  lib/CodeGen/CGCXXABI.h

Index: lib/CodeGen/CGCXXABI.h
===
--- lib/CodeGen/CGCXXABI.h
+++ lib/CodeGen/CGCXXABI.h
@@ -43,10 +43,10 @@
 class CGCXXABI {
 protected:
   CodeGenModule &CGM;
-  std::unique_ptr MangleCtx;
+  std::shared_ptr MangleCtx;
 
   CGCXXABI(CodeGenModule &CGM)
-: CGM(CGM), MangleCtx(CGM.getContext().createMangleContext()) {}
+: CGM(CGM), MangleCtx(CGM.getContext().getSharedMangleContext()) {}
 
 protected:
   ImplicitParamDecl *getThisDecl(CodeGenFunction &CGF) {
Index: lib/CodeGen/CGCUDANV.cpp
===
--- lib/CodeGen/CGCUDANV.cpp
+++ lib/CodeGen/CGCUDANV.cpp
@@ -60,7 +60,7 @@
   /// Whether we generate relocatable device code.
   bool RelocatableDeviceCode;
   /// Mangle context for device.
-  std::unique_ptr DeviceMC;
+  std::shared_ptr DeviceMC;
 
   llvm::FunctionCallee getSetupArgumentFn() const;
   llvm::FunctionCallee getLaunchFn() const;
@@ -154,7 +154,7 @@
 : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
   TheModule(CGM.getModule()),
   RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
-  DeviceMC(CGM.getContext().createMangleContext(
+  DeviceMC(CGM.getContext().getSharedMangleContext(
   CGM.getContext().getAuxTargetInfo())) {
   CodeGen::CodeGenTypes &Types = CGM.getTypes();
   ASTContext &Ctx = CGM.getContext();
Index: lib/AST/ASTContext.cpp
===
--- lib/AST/ASTContext.cpp
+++ lib/AST/ASTContext.cpp
@@ -10048,10 +10048,8 @@
   return VTContext.get();
 }
 
-MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
-  if (!T)
-T = Target;
-  switch (T->getCXXABI().getKind()) {
+MangleContext::ManglerKind getManglerKindForABI(TargetCXXABI::Kind K) {
+  switch (K) {
   case TargetCXXABI::GenericAArch64:
   case TargetCXXABI::GenericItanium:
   case TargetCXXABI::GenericARM:
@@ -10060,13 +10058,37 @@
   case TargetCXXABI::iOS64:
   case TargetCXXABI::WebAssembly:
   case TargetCXXABI::WatchOS:
-return ItaniumMangleContext::create(*this, getDiagnostics());
+return MangleContext::MK_Itanium;
   case TargetCXXABI::Microsoft:
-return MicrosoftMangleContext::create(*this, getDiagnostics());
+return MangleContext::MK_Microsoft;
   }
   llvm_unreachable("Unsupported ABI");
 }
 
+MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
+  if (!T)
+T = Target;
+  switch (getManglerKindForABI(T->getCXXABI().getKind())) {
+  case MangleContext::MK_Itanium:
+return ItaniumMangleContext::create(*this, getDiagnostics());
+  case MangleContext::MK_Microsoft:
+return MicrosoftMangleContext::create(*this, getD

[PATCH] D64015: [WIP][CUDA] Use shared MangleContext for CUDA and CXX CG

2019-07-02 Thread Philip Salzmann via Phabricator via cfe-commits
psalz updated this revision to Diff 207488.
psalz added a comment.

Move ownership of shared `MangleContext`s to `ASTContext`, return references 
from `getSharedMangleContext`.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64015/new/

https://reviews.llvm.org/D64015

Files:
  include/clang/AST/ASTContext.h
  lib/AST/ASTContext.cpp
  lib/CodeGen/CGCUDANV.cpp
  lib/CodeGen/CGCXXABI.h

Index: lib/CodeGen/CGCXXABI.h
===
--- lib/CodeGen/CGCXXABI.h
+++ lib/CodeGen/CGCXXABI.h
@@ -43,10 +43,10 @@
 class CGCXXABI {
 protected:
   CodeGenModule &CGM;
-  std::unique_ptr MangleCtx;
+  MangleContext &MangleCtx;
 
   CGCXXABI(CodeGenModule &CGM)
-: CGM(CGM), MangleCtx(CGM.getContext().createMangleContext()) {}
+: CGM(CGM), MangleCtx(CGM.getContext().getSharedMangleContext()) {}
 
 protected:
   ImplicitParamDecl *getThisDecl(CodeGenFunction &CGF) {
@@ -94,7 +94,7 @@
 
   /// Gets the mangle context.
   MangleContext &getMangleContext() {
-return *MangleCtx;
+return MangleCtx;
   }
 
   /// Returns true if the given constructor or destructor is one of the
Index: lib/CodeGen/CGCUDANV.cpp
===
--- lib/CodeGen/CGCUDANV.cpp
+++ lib/CodeGen/CGCUDANV.cpp
@@ -60,7 +60,7 @@
   /// Whether we generate relocatable device code.
   bool RelocatableDeviceCode;
   /// Mangle context for device.
-  std::unique_ptr DeviceMC;
+  MangleContext &DeviceMC;
 
   llvm::FunctionCallee getSetupArgumentFn() const;
   llvm::FunctionCallee getLaunchFn() const;
@@ -154,7 +154,7 @@
 : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
   TheModule(CGM.getModule()),
   RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
-  DeviceMC(CGM.getContext().createMangleContext(
+  DeviceMC(CGM.getContext().getSharedMangleContext(
   CGM.getContext().getAuxTargetInfo())) {
   CodeGen::CodeGenTypes &Types = CGM.getTypes();
   ASTContext &Ctx = CGM.getContext();
@@ -207,10 +207,10 @@
 std::string CGNVCUDARuntime::getDeviceSideName(const Decl *D) {
   auto *ND = cast(D);
   std::string DeviceSideName;
-  if (DeviceMC->shouldMangleDeclName(ND)) {
+  if (DeviceMC.shouldMangleDeclName(ND)) {
 SmallString<256> Buffer;
 llvm::raw_svector_ostream Out(Buffer);
-DeviceMC->mangleName(ND, Out);
+DeviceMC.mangleName(ND, Out);
 DeviceSideName = Out.str();
   } else
 DeviceSideName = ND->getIdentifier()->getName();
Index: lib/AST/ASTContext.cpp
===
--- lib/AST/ASTContext.cpp
+++ lib/AST/ASTContext.cpp
@@ -10048,10 +10048,8 @@
   return VTContext.get();
 }
 
-MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
-  if (!T)
-T = Target;
-  switch (T->getCXXABI().getKind()) {
+MangleContext::ManglerKind getManglerKindForABI(TargetCXXABI::Kind K) {
+  switch (K) {
   case TargetCXXABI::GenericAArch64:
   case TargetCXXABI::GenericItanium:
   case TargetCXXABI::GenericARM:
@@ -10060,13 +10058,36 @@
   case TargetCXXABI::iOS64:
   case TargetCXXABI::WebAssembly:
   case TargetCXXABI::WatchOS:
-return ItaniumMangleContext::create(*this, getDiagnostics());
+return MangleContext::MK_Itanium;
   case TargetCXXABI::Microsoft:
-return MicrosoftMangleContext::create(*this, getDiagnostics());
+return MangleContext::MK_Microsoft;
   }
   llvm_unreachable("Unsupported ABI");
 }
 
+MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
+  if (!T)
+T = Target;
+  switch (getManglerKindForABI(T->getCXXABI().getKind())) {
+  case MangleContext::MK_Itanium:
+return ItaniumMangleContext::create(*this, getDiagnostics());
+  case MangleContext::MK_Microsoft:
+return MicrosoftMangleContext::create(*this, getDiagnostics());
+  }
+  llvm_unreachable("Unsupported MangleContext");
+}
+
+MangleContext &ASTContext::getSharedMangleContext(const TargetInfo *T) {
+  if (!T)
+T = Target;
+  auto Kind = getManglerKindForABI(T->getCXXABI().getKind());
+  auto I = SharedMangleContexts.find(Kind);
+  if (I == SharedMangleContexts.end())
+I = SharedMangleContexts.insert({Kind,
+std::unique_ptr(createMangleContext(T))}).first;
+  return *I->second;
+}
+
 CXXABI::~CXXABI() = default;
 
 size_t ASTContext::getSideTableAllocatedMemory() const {
Index: include/clang/AST/ASTContext.h
===
--- include/clang/AST/ASTContext.h
+++ include/clang/AST/ASTContext.h
@@ -24,6 +24,7 @@
 #include "clang/AST/DeclarationName.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExternalASTSource.h"
+#include "clang/AST/Mangle.h"
 #include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/RawCommentList.h"
@@ -96,7 +97,6 @@
 class DiagnosticsEngine;
 class Expr;
 class FixedPointSemantics;
-class MangleContext;
 class MangleNumberingContext;
 class Mate