[PATCH] D64015: [WIP][CUDA] Use shared MangleContext for CUDA and CXX CG
psalz created this revision. psalz added reviewers: hliao, tra, aheejin. Herald added a project: clang. Herald added a subscriber: cfe-commits. NOTE: This is a work in progress and mainly intended to highlight the issue - i.e., I'm not certain the provided solution is appropriate. Given this CUDA program template __global__ void run_this(Lambda lambda) { lambda(); } template struct remove_reference { using type = T; }; template struct remove_reference { using type = T; }; template constexpr typename remove_reference::type&& move(T&& t) { return static_cast::type&&>(t); } int main() { auto foo = move([](){}); run_this<<<1, 1, 1>>>([]() __device__ { printf("Hello World\n"); }); return 0; } the assertion at the top of `CGNVCUDARuntime::emitDeviceStub` will fail. For release builds the effect is simply a `cudaErrorInvalidDeviceFunction` error at run time. The reason for this is that the mangled names of the device stub and the actual device side function differ: The stub is called `_Z8run_thisIZ4mainE3$_1EvT_`, while the device function is `_Z8run_thisIZ4mainE3$_0EvT_`. The difference comes down to the anonymous struct ID that is maintained and assigned by the `ManglerContext`. It appears that for the latter `getAnonymousStructId` is never called for the moved no-op lambda, resulting in an ID of 0 for the kernel. My proposed solution would be to simply share the `ManglerContext` used by the `CGNVCUDARuntime` and `CGCXXABI` code generators. For this I've added a new `ASTContext::getSharedMangleContext` function that memoizes created manglers for the given target ABI. From looking at `ManglerContext` to me at least it doesn't look like that could cause any issues, but then again, I really don't know much about Clang's internals. Of course an alternative solution could be to make sure that `getAnonymousStructId` is always called for both lambdas (and in the correct order), but again I don't really know why that is not happening in the first place. Repository: rC Clang https://reviews.llvm.org/D64015 Files: include/clang/AST/ASTContext.h lib/AST/ASTContext.cpp lib/CodeGen/CGCUDANV.cpp lib/CodeGen/CGCXXABI.h Index: lib/CodeGen/CGCXXABI.h === --- lib/CodeGen/CGCXXABI.h +++ lib/CodeGen/CGCXXABI.h @@ -43,10 +43,10 @@ class CGCXXABI { protected: CodeGenModule &CGM; - std::unique_ptr MangleCtx; + std::shared_ptr MangleCtx; CGCXXABI(CodeGenModule &CGM) -: CGM(CGM), MangleCtx(CGM.getContext().createMangleContext()) {} +: CGM(CGM), MangleCtx(CGM.getContext().getSharedMangleContext()) {} protected: ImplicitParamDecl *getThisDecl(CodeGenFunction &CGF) { Index: lib/CodeGen/CGCUDANV.cpp === --- lib/CodeGen/CGCUDANV.cpp +++ lib/CodeGen/CGCUDANV.cpp @@ -60,7 +60,7 @@ /// Whether we generate relocatable device code. bool RelocatableDeviceCode; /// Mangle context for device. - std::unique_ptr DeviceMC; + std::shared_ptr DeviceMC; llvm::FunctionCallee getSetupArgumentFn() const; llvm::FunctionCallee getLaunchFn() const; @@ -154,7 +154,7 @@ : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), TheModule(CGM.getModule()), RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode), - DeviceMC(CGM.getContext().createMangleContext( + DeviceMC(CGM.getContext().getSharedMangleContext( CGM.getContext().getAuxTargetInfo())) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); Index: lib/AST/ASTContext.cpp === --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -10048,10 +10048,8 @@ return VTContext.get(); } -MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { - if (!T) -T = Target; - switch (T->getCXXABI().getKind()) { +MangleContext::ManglerKind getManglerKindForABI(TargetCXXABI::Kind K) { + switch (K) { case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericARM: @@ -10060,13 +10058,37 @@ case TargetCXXABI::iOS64: case TargetCXXABI::WebAssembly: case TargetCXXABI::WatchOS: -return ItaniumMangleContext::create(*this, getDiagnostics()); +return MangleContext::MK_Itanium; case TargetCXXABI::Microsoft: -return MicrosoftMangleContext::create(*this, getDiagnostics()); +return MangleContext::MK_Microsoft; } llvm_unreachable("Unsupported ABI"); } +MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { + if (!T) +T = Target; + switch (getManglerKindForABI(T->getCXXABI().getKind())) { + case MangleContext::MK_Itanium: +return ItaniumMangleContext::create(*this, getDiagnostics()); + case MangleContext::MK_Microsoft: +return MicrosoftMangleContext::create(*this, getD
[PATCH] D64015: [WIP][CUDA] Use shared MangleContext for CUDA and CXX CG
psalz updated this revision to Diff 207488. psalz added a comment. Move ownership of shared `MangleContext`s to `ASTContext`, return references from `getSharedMangleContext`. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64015/new/ https://reviews.llvm.org/D64015 Files: include/clang/AST/ASTContext.h lib/AST/ASTContext.cpp lib/CodeGen/CGCUDANV.cpp lib/CodeGen/CGCXXABI.h Index: lib/CodeGen/CGCXXABI.h === --- lib/CodeGen/CGCXXABI.h +++ lib/CodeGen/CGCXXABI.h @@ -43,10 +43,10 @@ class CGCXXABI { protected: CodeGenModule &CGM; - std::unique_ptr MangleCtx; + MangleContext &MangleCtx; CGCXXABI(CodeGenModule &CGM) -: CGM(CGM), MangleCtx(CGM.getContext().createMangleContext()) {} +: CGM(CGM), MangleCtx(CGM.getContext().getSharedMangleContext()) {} protected: ImplicitParamDecl *getThisDecl(CodeGenFunction &CGF) { @@ -94,7 +94,7 @@ /// Gets the mangle context. MangleContext &getMangleContext() { -return *MangleCtx; +return MangleCtx; } /// Returns true if the given constructor or destructor is one of the Index: lib/CodeGen/CGCUDANV.cpp === --- lib/CodeGen/CGCUDANV.cpp +++ lib/CodeGen/CGCUDANV.cpp @@ -60,7 +60,7 @@ /// Whether we generate relocatable device code. bool RelocatableDeviceCode; /// Mangle context for device. - std::unique_ptr DeviceMC; + MangleContext &DeviceMC; llvm::FunctionCallee getSetupArgumentFn() const; llvm::FunctionCallee getLaunchFn() const; @@ -154,7 +154,7 @@ : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), TheModule(CGM.getModule()), RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode), - DeviceMC(CGM.getContext().createMangleContext( + DeviceMC(CGM.getContext().getSharedMangleContext( CGM.getContext().getAuxTargetInfo())) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); @@ -207,10 +207,10 @@ std::string CGNVCUDARuntime::getDeviceSideName(const Decl *D) { auto *ND = cast(D); std::string DeviceSideName; - if (DeviceMC->shouldMangleDeclName(ND)) { + if (DeviceMC.shouldMangleDeclName(ND)) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); -DeviceMC->mangleName(ND, Out); +DeviceMC.mangleName(ND, Out); DeviceSideName = Out.str(); } else DeviceSideName = ND->getIdentifier()->getName(); Index: lib/AST/ASTContext.cpp === --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -10048,10 +10048,8 @@ return VTContext.get(); } -MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { - if (!T) -T = Target; - switch (T->getCXXABI().getKind()) { +MangleContext::ManglerKind getManglerKindForABI(TargetCXXABI::Kind K) { + switch (K) { case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericARM: @@ -10060,13 +10058,36 @@ case TargetCXXABI::iOS64: case TargetCXXABI::WebAssembly: case TargetCXXABI::WatchOS: -return ItaniumMangleContext::create(*this, getDiagnostics()); +return MangleContext::MK_Itanium; case TargetCXXABI::Microsoft: -return MicrosoftMangleContext::create(*this, getDiagnostics()); +return MangleContext::MK_Microsoft; } llvm_unreachable("Unsupported ABI"); } +MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { + if (!T) +T = Target; + switch (getManglerKindForABI(T->getCXXABI().getKind())) { + case MangleContext::MK_Itanium: +return ItaniumMangleContext::create(*this, getDiagnostics()); + case MangleContext::MK_Microsoft: +return MicrosoftMangleContext::create(*this, getDiagnostics()); + } + llvm_unreachable("Unsupported MangleContext"); +} + +MangleContext &ASTContext::getSharedMangleContext(const TargetInfo *T) { + if (!T) +T = Target; + auto Kind = getManglerKindForABI(T->getCXXABI().getKind()); + auto I = SharedMangleContexts.find(Kind); + if (I == SharedMangleContexts.end()) +I = SharedMangleContexts.insert({Kind, +std::unique_ptr(createMangleContext(T))}).first; + return *I->second; +} + CXXABI::~CXXABI() = default; size_t ASTContext::getSideTableAllocatedMemory() const { Index: include/clang/AST/ASTContext.h === --- include/clang/AST/ASTContext.h +++ include/clang/AST/ASTContext.h @@ -24,6 +24,7 @@ #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExternalASTSource.h" +#include "clang/AST/Mangle.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/PrettyPrinter.h" #include "clang/AST/RawCommentList.h" @@ -96,7 +97,6 @@ class DiagnosticsEngine; class Expr; class FixedPointSemantics; -class MangleContext; class MangleNumberingContext; class Mate