SimeonEhrig updated this revision to Diff 143706.
SimeonEhrig added a comment.
Add a comment, which declares the need of a unique ctor/dotr name.
https://reviews.llvm.org/D44435
Files:
lib/CodeGen/CGCUDANV.cpp
unittests/CodeGen/IncrementalProcessingTest.cpp
Index: unittests/CodeGen/IncrementalProcessingTest.cpp
===================================================================
--- unittests/CodeGen/IncrementalProcessingTest.cpp
+++ unittests/CodeGen/IncrementalProcessingTest.cpp
@@ -21,9 +21,11 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/TargetOptions.h"
#include "gtest/gtest.h"
#include <memory>
+#include <string>
using namespace llvm;
using namespace clang;
@@ -171,4 +173,122 @@
}
+
+// In CUDA incremental processing, a CUDA ctor or dtor will be generated for
+// every statement if a fatbinary file exists.
+const char CUDATestProgram1[] =
+ "void cudaFunc1(){}\n";
+
+const char CUDATestProgram2[] =
+ "void cudaFunc2(){}\n";
+
+const Function* getCUDActor(llvm::Module& M) {
+ for (const auto& Func: M)
+ if (Func.hasName() && Func.getName().startswith("__cuda_module_ctor_"))
+ return &Func;
+
+ return nullptr;
+}
+
+const Function* getCUDAdtor(llvm::Module& M) {
+ for (const auto& Func: M)
+ if (Func.hasName() && Func.getName().startswith("__cuda_module_dtor_"))
+ return &Func;
+
+ return nullptr;
+}
+
+TEST(IncrementalProcessing, EmitCUDAGlobalInitFunc) {
+ LLVMContext Context;
+ CompilerInstance compiler;
+
+ compiler.createDiagnostics();
+ compiler.getLangOpts().CPlusPlus = 1;
+ compiler.getLangOpts().CPlusPlus11 = 1;
+ compiler.getLangOpts().CUDA = 1;
+
+ compiler.getTargetOpts().Triple = llvm::Triple::normalize(
+ llvm::sys::getProcessTriple());
+ compiler.setTarget(clang::TargetInfo::CreateTargetInfo(
+ compiler.getDiagnostics(),
+ std::make_shared<clang::TargetOptions>(
+ compiler.getTargetOpts())));
+
+ // To enable the generating of cuda host code, it's needs to set up the
+ // auxTriple.
+ llvm::Triple hostTriple(llvm::sys::getProcessTriple());
+ compiler.getFrontendOpts().AuxTriple =
+ hostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda";
+ auto targetOptions = std::make_shared<clang::TargetOptions>();
+ targetOptions->Triple = compiler.getFrontendOpts().AuxTriple;
+ targetOptions->HostTriple = compiler.getTarget().getTriple().str();
+ compiler.setAuxTarget(clang::TargetInfo::CreateTargetInfo(
+ compiler.getDiagnostics(), targetOptions));
+
+ // A fatbinary file is necessary, that the code generator generates the ctor
+ // and dtor.
+ auto tmpFatbinFileOrError = llvm::sys::fs::TempFile::create("dummy.fatbin");
+ ASSERT_TRUE((bool)tmpFatbinFileOrError);
+ auto tmpFatbinFile = std::move(*tmpFatbinFileOrError);
+ compiler.getCodeGenOpts().CudaGpuBinaryFileName = tmpFatbinFile.TmpName;
+
+ compiler.createFileManager();
+ compiler.createSourceManager(compiler.getFileManager());
+ compiler.createPreprocessor(clang::TU_Prefix);
+ compiler.getPreprocessor().enableIncrementalProcessing();
+
+ compiler.createASTContext();
+
+ CodeGenerator* CG =
+ CreateLLVMCodeGen(
+ compiler.getDiagnostics(),
+ "main-module",
+ compiler.getHeaderSearchOpts(),
+ compiler.getPreprocessorOpts(),
+ compiler.getCodeGenOpts(),
+ Context);
+
+ compiler.setASTConsumer(std::unique_ptr<ASTConsumer>(CG));
+ compiler.createSema(clang::TU_Prefix, nullptr);
+ Sema& S = compiler.getSema();
+
+ std::unique_ptr<Parser> ParseOP(new Parser(S.getPreprocessor(), S,
+ /*SkipFunctionBodies*/ false));
+ Parser &P = *ParseOP.get();
+
+ std::array<std::unique_ptr<llvm::Module>, 3> M;
+ M[0] = IncrementalParseAST(compiler, P, *CG, nullptr);
+ ASSERT_TRUE(M[0]);
+
+ M[1] = IncrementalParseAST(compiler, P, *CG, CUDATestProgram1);
+ ASSERT_TRUE(M[1]);
+ ASSERT_TRUE(M[1]->getFunction("_Z9cudaFunc1v"));
+
+ M[2] = IncrementalParseAST(compiler, P, *CG, CUDATestProgram2);
+ ASSERT_TRUE(M[2]);
+ ASSERT_TRUE(M[2]->getFunction("_Z9cudaFunc2v"));
+ // First code should not end up in second module:
+ ASSERT_FALSE(M[2]->getFunction("_Z9cudaFunc1v"));
+
+ // Make sure, that cuda ctor's and dtor's exist:
+ const Function* CUDActor1 = getCUDActor(*M[1]);
+ ASSERT_TRUE(CUDActor1);
+
+ const Function* CUDActor2 = getCUDActor(*M[2]);
+ ASSERT_TRUE(CUDActor2);
+
+ const Function* CUDAdtor1 = getCUDAdtor(*M[1]);
+ ASSERT_TRUE(CUDAdtor1);
+
+ const Function* CUDAdtor2 = getCUDAdtor(*M[2]);
+ ASSERT_TRUE(CUDAdtor2);
+
+ // Compare the names of both ctor's and dtor's to check, that they are
+ // unique.
+ ASSERT_FALSE(CUDActor1->getName() == CUDActor2->getName());
+ ASSERT_FALSE(CUDAdtor1->getName() == CUDAdtor2->getName());
+
+ ASSERT_FALSE((bool)tmpFatbinFile.discard());
+}
+
} // end anonymous namespace
Index: lib/CodeGen/CGCUDANV.cpp
===================================================================
--- lib/CodeGen/CGCUDANV.cpp
+++ lib/CodeGen/CGCUDANV.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/Path.h"
using namespace clang;
using namespace CodeGen;
@@ -244,7 +245,7 @@
/// Creates a global constructor function for the module:
/// \code
-/// void __cuda_module_ctor(void*) {
+/// void __cuda_module_ctor_<ModuleName>(void*) {
/// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
/// __cuda_register_globals(Handle);
/// }
@@ -277,9 +278,28 @@
return nullptr;
}
+ // A unique ctor/dtor name is necessary for incremental and lazy JIT
+ // compilation of cuda code in which each TU can include more than one llvm
+ // module. Each llvm module has a cuda ctor/dtor (if a fatbinary file exist).
+ const SmallString<128> ModuleName
+ = llvm::sys::path::filename(CGM.getModule().getName());
+ SmallString<128> CtorSuffix("");
+ if (!ModuleName.empty()){
+ CtorSuffix.append("_");
+ CtorSuffix.append(ModuleName);
+ }
+
+ for (size_t i = 0; i < CtorSuffix.size(); ++i) {
+ // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+ // to be the set of C preprocessing numbers.
+ if (!isPreprocessingNumberBody(CtorSuffix[i]))
+ CtorSuffix[i] = '_';
+ }
+
llvm::Function *ModuleCtorFunc = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
- llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
+ llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor" + CtorSuffix,
+ &TheModule);
llvm::BasicBlock *CtorEntryBB =
llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
CGBuilderTy CtorBuilder(CGM, Context);
@@ -329,7 +349,7 @@
/// Creates a global destructor function that unregisters the GPU code blob
/// registered by constructor.
/// \code
-/// void __cuda_module_dtor(void*) {
+/// void __cuda_module_dtor_<ModuleName>(void*) {
/// __cudaUnregisterFatBinary(Handle);
/// }
/// \endcode
@@ -343,9 +363,28 @@
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
"__cudaUnregisterFatBinary");
+ // A unique ctor/dtor name is necessary for incremental and lazy JIT
+ // compilation of cuda code in which each TU can include more than one llvm
+ // module. Each llvm module has a cuda ctor/dtor (if a fatbinary file exist).
+ const SmallString<128> ModuleName
+ = llvm::sys::path::filename(CGM.getModule().getName());
+ SmallString<128> DtorSuffix("");
+ if (!ModuleName.empty()){
+ DtorSuffix.append("_");
+ DtorSuffix.append(ModuleName);
+ }
+
+ for (size_t i = 0; i < DtorSuffix.size(); ++i) {
+ // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+ // to be the set of C preprocessing numbers.
+ if (!isPreprocessingNumberBody(DtorSuffix[i]))
+ DtorSuffix[i] = '_';
+ }
+
llvm::Function *ModuleDtorFunc = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
- llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule);
+ llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor" + DtorSuffix,
+ &TheModule);
llvm::BasicBlock *DtorEntryBB =
llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
CGBuilderTy DtorBuilder(CGM, Context);
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits