https://github.com/anutosh491 updated https://github.com/llvm/llvm-project/pull/136404
>From 1b18e96882590825075b8f8e5094fdcb5225d349 Mon Sep 17 00:00:00 2001 From: anutosh491 <andersonbhat...@gmail.com> Date: Fri, 18 Apr 2025 18:45:00 +0530 Subject: [PATCH 1/5] Fix cuda flag with clang-repl --- clang/include/clang/Interpreter/Interpreter.h | 3 +- clang/lib/Interpreter/DeviceOffload.cpp | 3 +- clang/lib/Interpreter/Interpreter.cpp | 48 +++++++++++++------ 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index b1b63aedf86ab..7425797c55297 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -129,7 +129,8 @@ class Interpreter { public: virtual ~Interpreter(); static llvm::Expected<std::unique_ptr<Interpreter>> - create(std::unique_ptr<CompilerInstance> CI); + create(std::unique_ptr<CompilerInstance> CI, + std::unique_ptr<CompilerInstance> DeviceCI = nullptr); static llvm::Expected<std::unique_ptr<Interpreter>> createWithCUDA(std::unique_ptr<CompilerInstance> CI, std::unique_ptr<CompilerInstance> DCI); diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 1999d63d1aa04..9a7be006250a0 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -34,14 +34,15 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( TargetOpts(HostInstance.getTargetOpts()) { if (Err) return; - DeviceCI = std::move(DeviceInstance); StringRef Arch = TargetOpts.CPU; if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) { + DeviceInstance.release(); Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>( "Invalid CUDA architecture", llvm::inconvertibleErrorCode())); return; } + DeviceCI = std::move(DeviceInstance); } llvm::Expected<TranslationUnitDecl *> diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index f8c8d0a425659..049cc00cd198f 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -451,13 +451,44 @@ const char *const Runtimes = R"( )"; llvm::Expected<std::unique_ptr<Interpreter>> -Interpreter::create(std::unique_ptr<CompilerInstance> CI) { +Interpreter::create(std::unique_ptr<CompilerInstance> CI, + std::unique_ptr<CompilerInstance> DeviceCI) { llvm::Error Err = llvm::Error::success(); auto Interp = std::unique_ptr<Interpreter>(new Interpreter(std::move(CI), Err)); if (Err) return std::move(Err); + if (DeviceCI) { + // auto DeviceLLVMCtx = std::make_unique<llvm::LLVMContext>(); + // auto DeviceTSCtx = + // std::make_unique<llvm::orc::ThreadSafeContext>(std::move(DeviceLLVMCtx)); + + // llvm::Error DeviceErr = llvm::Error::success(); + // llvm::ErrorAsOutParameter EAO(&DeviceErr); + + // auto DeviceAct = std::make_unique<IncrementalAction>( + // *DeviceCI, *DeviceTSCtx->getContext(), DeviceErr, *Interp); + + // if (DeviceErr) + // return std::move(DeviceErr); + + // DeviceCI->ExecuteAction(*DeviceAct); + DeviceCI->ExecuteAction(*Interp->Act); + + llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS = + std::make_unique<llvm::vfs::InMemoryFileSystem>(); + + auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>( + std::move(DeviceCI), *Interp->getCompilerInstance(), IMVFS, Err, + Interp->PTUs); + + if (Err) + return std::move(Err); + + Interp->DeviceParser = std::move(DeviceParser); + } + // Add runtime code and set a marker to hide it from user code. Undo will not // go through that. auto PTU = Interp->Parse(Runtimes); @@ -481,20 +512,7 @@ Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI, OverlayVFS->pushOverlay(IMVFS); CI->createFileManager(OverlayVFS); - auto Interp = Interpreter::create(std::move(CI)); - if (auto E = Interp.takeError()) - return std::move(E); - - llvm::Error Err = llvm::Error::success(); - auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>( - std::move(DCI), *(*Interp)->getCompilerInstance(), IMVFS, Err, - (*Interp)->PTUs); - if (Err) - return std::move(Err); - - (*Interp)->DeviceParser = std::move(DeviceParser); - - return Interp; + return Interpreter::create(std::move(CI), std::move(DCI)); } const CompilerInstance *Interpreter::getCompilerInstance() const { >From 35fb15bafb670ee704b1dcb208875bef239a6199 Mon Sep 17 00:00:00 2001 From: anutosh491 <andersonbhat...@gmail.com> Date: Sat, 19 Apr 2025 14:12:19 +0530 Subject: [PATCH 2/5] Use Act instead of DeviceAct --- clang/lib/Interpreter/Interpreter.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 049cc00cd198f..2d1d6847e358b 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -460,20 +460,6 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI, return std::move(Err); if (DeviceCI) { - // auto DeviceLLVMCtx = std::make_unique<llvm::LLVMContext>(); - // auto DeviceTSCtx = - // std::make_unique<llvm::orc::ThreadSafeContext>(std::move(DeviceLLVMCtx)); - - // llvm::Error DeviceErr = llvm::Error::success(); - // llvm::ErrorAsOutParameter EAO(&DeviceErr); - - // auto DeviceAct = std::make_unique<IncrementalAction>( - // *DeviceCI, *DeviceTSCtx->getContext(), DeviceErr, *Interp); - - // if (DeviceErr) - // return std::move(DeviceErr); - - // DeviceCI->ExecuteAction(*DeviceAct); DeviceCI->ExecuteAction(*Interp->Act); llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS = >From ee374eed24eceafe8cb8c3f69c603bc4965ed534 Mon Sep 17 00:00:00 2001 From: anutosh491 <andersonbhat...@gmail.com> Date: Tue, 22 Apr 2025 11:22:40 +0530 Subject: [PATCH 3/5] Fix Parsing for DeviceParser --- clang/lib/Interpreter/DeviceOffload.cpp | 84 +++++++++++++++++++------ clang/lib/Interpreter/DeviceOffload.h | 7 ++- clang/lib/Interpreter/Interpreter.cpp | 43 ++++++++++++- 3 files changed, 113 insertions(+), 21 deletions(-) diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 9a7be006250a0..6fe78905a5bec 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -28,10 +28,10 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( std::unique_ptr<CompilerInstance> DeviceInstance, CompilerInstance &HostInstance, llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS, - llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs) + llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs) : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS), - CodeGenOpts(HostInstance.getCodeGenOpts()), - TargetOpts(HostInstance.getTargetOpts()) { + CodeGenOpts(DeviceInstance->getCodeGenOpts()), + TargetOpts(DeviceInstance->getTargetOpts()) { if (Err) return; StringRef Arch = TargetOpts.CPU; @@ -51,37 +51,61 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { if (!PTU) return PTU.takeError(); - auto PTX = GeneratePTX(); - if (!PTX) - return PTX.takeError(); + // auto PTX = GeneratePTX(); + // if (!PTX) + // return PTX.takeError(); - auto Err = GenerateFatbinary(); - if (Err) - return std::move(Err); + // auto Err = GenerateFatbinary(); + // if (Err) + // return std::move(Err); - std::string FatbinFileName = - "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; - VFS->addFile(FatbinFileName, 0, - llvm::MemoryBuffer::getMemBuffer( - llvm::StringRef(FatbinContent.data(), FatbinContent.size()), - "", false)); + // std::string FatbinFileName = + // "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; + // VFS->addFile(FatbinFileName, 0, + // llvm::MemoryBuffer::getMemBuffer( + // llvm::StringRef(FatbinContent.data(), FatbinContent.size()), + // "", false)); - CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; + // CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; - FatbinContent.clear(); + // FatbinContent.clear(); return PTU; } +PartialTranslationUnit & +IncrementalCUDADeviceParser::RegisterPTU(TranslationUnitDecl *TU) { + llvm::errs() << "[CUDA] RegisterPTU called. TU = " << TU << "\n"; + PTUs.push_back(PartialTranslationUnit()); + llvm::errs() << "[CUDA] PTUs size after push: " << PTUs.size() << "\n"; + PartialTranslationUnit &LastPTU = PTUs.back(); + LastPTU.TUPart = TU; + return LastPTU; +} + llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { + llvm::errs() << "[CUDA] Generating PTX. PTUs size: " << PTUs.size() << "\n"; + assert(!PTUs.empty() && "PTUs list is empty during PTX generation!"); auto &PTU = PTUs.back(); std::string Error; + if (!PTU.TheModule) { + llvm::errs() << "[CUDA] Error: PTU has no associated Module!\n"; + } else { + llvm::errs() << "[CUDA] Module Triple: " << PTU.TheModule->getTargetTriple().str() << "\n"; + } + + llvm::errs() << ">>> PTU Module Target Triple: " << PTU.TheModule->getTargetTriple().str() << "\n"; + llvm::errs() << ">>> Using CPU: " << TargetOpts.CPU << "\n"; + const llvm::Target *Target = llvm::TargetRegistry::lookupTarget( PTU.TheModule->getTargetTriple(), Error); - if (!Target) + if (!Target) { + llvm::errs() << ">>> Failed to lookup target: " << Error << "\n"; return llvm::make_error<llvm::StringError>(std::move(Error), std::error_code()); + } + llvm::TargetOptions TO = llvm::TargetOptions(); llvm::TargetMachine *TargetMachine = Target->createTargetMachine( PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO, @@ -173,9 +197,33 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { FatbinContent.append(PTXCode.begin(), PTXCode.end()); + std::string FatbinFileName = + "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; + + VFS->addFile(FatbinFileName, 0, + llvm::MemoryBuffer::getMemBuffer( + llvm::StringRef(FatbinContent.data(), FatbinContent.size()), + "", false)); + + CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; + + FatbinContent.clear(); + return llvm::Error::success(); } +// void IncrementalCUDADeviceParser::EmitFatbinaryToVFS(std::string &FatbinFileName) { +// std::string FatbinFileName = "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; + +// VFS->addFile(FatbinFileName, 0, +// llvm::MemoryBuffer::getMemBuffer( +// llvm::StringRef(FatbinContent.data(), FatbinContent.size()), +// "", false)); + +// CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; +// FatbinContent.clear(); +// } + IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {} } // namespace clang diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h index b9a1acab004c3..6da3b65ae72d1 100644 --- a/clang/lib/Interpreter/DeviceOffload.h +++ b/clang/lib/Interpreter/DeviceOffload.h @@ -24,14 +24,14 @@ class CodeGenOptions; class TargetOptions; class IncrementalCUDADeviceParser : public IncrementalParser { - const std::list<PartialTranslationUnit> &PTUs; + std::list<PartialTranslationUnit> &PTUs; public: IncrementalCUDADeviceParser( std::unique_ptr<CompilerInstance> DeviceInstance, CompilerInstance &HostInstance, llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS, - llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs); + llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs); llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override; @@ -41,6 +41,9 @@ class IncrementalCUDADeviceParser : public IncrementalParser { // Generate fatbinary contents in memory llvm::Error GenerateFatbinary(); + PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU); + // llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override; + ~IncrementalCUDADeviceParser(); protected: diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 2d1d6847e358b..6b4bafd8afc32 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -561,9 +561,50 @@ Interpreter::Parse(llvm::StringRef Code) { // If we have a device parser, parse it first. The generated code will be // included in the host compilation if (DeviceParser) { + llvm::errs() << "[CUDA] Parsing device code...\n"; llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code); - if (auto E = DeviceTU.takeError()) + if (auto E = DeviceTU.takeError()) { + llvm::errs() << "[CUDA] Device Parse failed!\n"; return std::move(E); + } + llvm::errs() << "[CUDA] Device parse successful.\n"; + + auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get()); + llvm::errs() << "[CUDA] Registering device PTU...\n"; + + PartialTranslationUnit &DevicePTU = CudaParser->RegisterPTU(*DeviceTU); + FrontendAction *WrappedAct = Act->getWrapped(); + if (!WrappedAct->hasIRSupport()) { + llvm::errs() << "[CUDA] Error: WrappedAct has no IR support!\n"; + return llvm::make_error<llvm::StringError>( + "Device action has no IR support", llvm::inconvertibleErrorCode()); + } + + CodeGenerator *CG = static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator(); + if (!CG) { + llvm::errs() << "[CUDA] Error: CodeGen is null!\n"; + return llvm::make_error<llvm::StringError>( + "Device CodeGen is null", llvm::inconvertibleErrorCode()); + } + std::unique_ptr<llvm::Module> M(CG->ReleaseModule()); + if (!M) { + llvm::errs() << "[CUDA] Error: Released module is null!\n"; + return llvm::make_error<llvm::StringError>( + "Device LLVM module is null", llvm::inconvertibleErrorCode()); + } + static unsigned ID = 0; + CG->StartModule("incr_module_" + std::to_string(ID++), M->getContext()); + DevicePTU.TheModule = std::move(M); + llvm::errs() << "[CUDA] Assigned LLVM module to DevicePTU\n"; + llvm::errs() << "[CUDA] Registered device PTU. TUPart=" << DevicePTU.TUPart << "\n"; + llvm::errs() << "[CUDA] Generating PTX...\n"; + llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX(); + if (!PTX) + return PTX.takeError(); + + llvm::Error Err = CudaParser->GenerateFatbinary(); + if (Err) + return std::move(Err); } // Tell the interpreter sliently ignore unused expressions since value >From 1f4948f12274234e422b50a3e8c52fdb7f3d85e7 Mon Sep 17 00:00:00 2001 From: anutosh491 <andersonbhat...@gmail.com> Date: Wed, 23 Apr 2025 12:07:02 +0530 Subject: [PATCH 4/5] fix codegen opts --- clang/lib/Interpreter/DeviceOffload.cpp | 12 +----------- clang/lib/Interpreter/DeviceOffload.h | 3 --- clang/lib/Interpreter/Interpreter.cpp | 26 +------------------------ 3 files changed, 2 insertions(+), 39 deletions(-) diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 6fe78905a5bec..9db598230d014 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -30,7 +30,7 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS, llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs) : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS), - CodeGenOpts(DeviceInstance->getCodeGenOpts()), + CodeGenOpts(HostInstance.getCodeGenOpts()), TargetOpts(DeviceInstance->getTargetOpts()) { if (Err) return; @@ -73,16 +73,6 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { return PTU; } -PartialTranslationUnit & -IncrementalCUDADeviceParser::RegisterPTU(TranslationUnitDecl *TU) { - llvm::errs() << "[CUDA] RegisterPTU called. TU = " << TU << "\n"; - PTUs.push_back(PartialTranslationUnit()); - llvm::errs() << "[CUDA] PTUs size after push: " << PTUs.size() << "\n"; - PartialTranslationUnit &LastPTU = PTUs.back(); - LastPTU.TUPart = TU; - return LastPTU; -} - llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { llvm::errs() << "[CUDA] Generating PTX. PTUs size: " << PTUs.size() << "\n"; assert(!PTUs.empty() && "PTUs list is empty during PTX generation!"); diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h index 6da3b65ae72d1..23d89046c09e1 100644 --- a/clang/lib/Interpreter/DeviceOffload.h +++ b/clang/lib/Interpreter/DeviceOffload.h @@ -41,9 +41,6 @@ class IncrementalCUDADeviceParser : public IncrementalParser { // Generate fatbinary contents in memory llvm::Error GenerateFatbinary(); - PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU); - // llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override; - ~IncrementalCUDADeviceParser(); protected: diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 6b4bafd8afc32..fbc6b7707f294 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -572,32 +572,8 @@ Interpreter::Parse(llvm::StringRef Code) { auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get()); llvm::errs() << "[CUDA] Registering device PTU...\n"; - PartialTranslationUnit &DevicePTU = CudaParser->RegisterPTU(*DeviceTU); - FrontendAction *WrappedAct = Act->getWrapped(); - if (!WrappedAct->hasIRSupport()) { - llvm::errs() << "[CUDA] Error: WrappedAct has no IR support!\n"; - return llvm::make_error<llvm::StringError>( - "Device action has no IR support", llvm::inconvertibleErrorCode()); - } + PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU); - CodeGenerator *CG = static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator(); - if (!CG) { - llvm::errs() << "[CUDA] Error: CodeGen is null!\n"; - return llvm::make_error<llvm::StringError>( - "Device CodeGen is null", llvm::inconvertibleErrorCode()); - } - std::unique_ptr<llvm::Module> M(CG->ReleaseModule()); - if (!M) { - llvm::errs() << "[CUDA] Error: Released module is null!\n"; - return llvm::make_error<llvm::StringError>( - "Device LLVM module is null", llvm::inconvertibleErrorCode()); - } - static unsigned ID = 0; - CG->StartModule("incr_module_" + std::to_string(ID++), M->getContext()); - DevicePTU.TheModule = std::move(M); - llvm::errs() << "[CUDA] Assigned LLVM module to DevicePTU\n"; - llvm::errs() << "[CUDA] Registered device PTU. TUPart=" << DevicePTU.TUPart << "\n"; - llvm::errs() << "[CUDA] Generating PTX...\n"; llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX(); if (!PTX) return PTX.takeError(); >From 9ff990f23148d35b6c3c071b285650a8deed4b86 Mon Sep 17 00:00:00 2001 From: anutosh491 <andersonbhat...@gmail.com> Date: Thu, 24 Apr 2025 12:47:53 +0530 Subject: [PATCH 5/5] Fixed cuda flag --- clang/include/clang/Interpreter/Interpreter.h | 10 ++-- clang/lib/Interpreter/DeviceOffload.cpp | 48 ++---------------- clang/lib/Interpreter/Interpreter.cpp | 50 ++++++++++--------- 3 files changed, 37 insertions(+), 71 deletions(-) diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index 7425797c55297..1b228e0917d02 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -95,6 +95,9 @@ class Interpreter { // An optional parser for CUDA offloading std::unique_ptr<IncrementalParser> DeviceParser; + // An optional action for CUDA offloading + std::unique_ptr<IncrementalAction> DeviceAct; + /// List containing information about each incrementally parsed piece of code. std::list<PartialTranslationUnit> PTUs; @@ -176,10 +179,11 @@ class Interpreter { llvm::Expected<Expr *> ExtractValueFromExpr(Expr *E); llvm::Expected<llvm::orc::ExecutorAddr> CompileDtorCall(CXXRecordDecl *CXXRD); - CodeGenerator *getCodeGen() const; - std::unique_ptr<llvm::Module> GenModule(); + CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const; + std::unique_ptr<llvm::Module> GenModule(IncrementalAction *Action = nullptr); PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU, - std::unique_ptr<llvm::Module> M = {}); + std::unique_ptr<llvm::Module> M = {}, + IncrementalAction *Action = nullptr); // A cache for the compiled destructors used to for de-allocation of managed // clang::Values. diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 9db598230d014..6977d7fa674ab 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -51,47 +51,16 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { if (!PTU) return PTU.takeError(); - // auto PTX = GeneratePTX(); - // if (!PTX) - // return PTX.takeError(); - - // auto Err = GenerateFatbinary(); - // if (Err) - // return std::move(Err); - - // std::string FatbinFileName = - // "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; - // VFS->addFile(FatbinFileName, 0, - // llvm::MemoryBuffer::getMemBuffer( - // llvm::StringRef(FatbinContent.data(), FatbinContent.size()), - // "", false)); - - // CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; - - // FatbinContent.clear(); - return PTU; } llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { - llvm::errs() << "[CUDA] Generating PTX. PTUs size: " << PTUs.size() << "\n"; - assert(!PTUs.empty() && "PTUs list is empty during PTX generation!"); auto &PTU = PTUs.back(); std::string Error; - if (!PTU.TheModule) { - llvm::errs() << "[CUDA] Error: PTU has no associated Module!\n"; - } else { - llvm::errs() << "[CUDA] Module Triple: " << PTU.TheModule->getTargetTriple().str() << "\n"; - } - - llvm::errs() << ">>> PTU Module Target Triple: " << PTU.TheModule->getTargetTriple().str() << "\n"; - llvm::errs() << ">>> Using CPU: " << TargetOpts.CPU << "\n"; - const llvm::Target *Target = llvm::TargetRegistry::lookupTarget( PTU.TheModule->getTargetTriple(), Error); if (!Target) { - llvm::errs() << ">>> Failed to lookup target: " << Error << "\n"; return llvm::make_error<llvm::StringError>(std::move(Error), std::error_code()); } @@ -187,8 +156,9 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { FatbinContent.append(PTXCode.begin(), PTXCode.end()); - std::string FatbinFileName = - "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; + auto &PTU = PTUs.back(); + + std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin"; VFS->addFile(FatbinFileName, 0, llvm::MemoryBuffer::getMemBuffer( @@ -202,18 +172,6 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { return llvm::Error::success(); } -// void IncrementalCUDADeviceParser::EmitFatbinaryToVFS(std::string &FatbinFileName) { -// std::string FatbinFileName = "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; - -// VFS->addFile(FatbinFileName, 0, -// llvm::MemoryBuffer::getMemBuffer( -// llvm::StringRef(FatbinContent.data(), FatbinContent.size()), -// "", false)); - -// CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; -// FatbinContent.clear(); -// } - IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {} } // namespace clang diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index fbc6b7707f294..e0865c6e9dfdf 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -459,14 +459,28 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI, if (Err) return std::move(Err); + CompilerInstance &HostCI = *(Interp->getCompilerInstance()); + if (DeviceCI) { - DeviceCI->ExecuteAction(*Interp->Act); + Interp->DeviceAct = std::make_unique<IncrementalAction>( + *DeviceCI, *Interp->TSCtx->getContext(), Err, *Interp); + + if (Err) + return std::move(Err); + + DeviceCI->ExecuteAction(*Interp->DeviceAct); + // avoid writing fat binary to disk using an in-memory virtual file system llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS = std::make_unique<llvm::vfs::InMemoryFileSystem>(); + llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS = + std::make_unique<llvm::vfs::OverlayFileSystem>( + llvm::vfs::getRealFileSystem()); + OverlayVFS->pushOverlay(IMVFS); + HostCI.createFileManager(OverlayVFS); auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>( - std::move(DeviceCI), *Interp->getCompilerInstance(), IMVFS, Err, + std::move(DeviceCI), HostCI, IMVFS, Err, Interp->PTUs); if (Err) @@ -489,15 +503,6 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI, llvm::Expected<std::unique_ptr<Interpreter>> Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI, std::unique_ptr<CompilerInstance> DCI) { - // avoid writing fat binary to disk using an in-memory virtual file system - llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS = - std::make_unique<llvm::vfs::InMemoryFileSystem>(); - llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS = - std::make_unique<llvm::vfs::OverlayFileSystem>( - llvm::vfs::getRealFileSystem()); - OverlayVFS->pushOverlay(IMVFS); - CI->createFileManager(OverlayVFS); - return Interpreter::create(std::move(CI), std::move(DCI)); } @@ -536,15 +541,16 @@ size_t Interpreter::getEffectivePTUSize() const { PartialTranslationUnit & Interpreter::RegisterPTU(TranslationUnitDecl *TU, - std::unique_ptr<llvm::Module> M /*={}*/) { + std::unique_ptr<llvm::Module> M /*={}*/, + IncrementalAction *Action) { PTUs.emplace_back(PartialTranslationUnit()); PartialTranslationUnit &LastPTU = PTUs.back(); LastPTU.TUPart = TU; if (!M) - M = GenModule(); + M = GenModule(Action); - assert((!getCodeGen() || M) && "Must have a llvm::Module at this point"); + assert((!getCodeGen(Action) || M) && "Must have a llvm::Module at this point"); LastPTU.TheModule = std::move(M); LLVM_DEBUG(llvm::dbgs() << "compile-ptu " << PTUs.size() - 1 @@ -561,18 +567,14 @@ Interpreter::Parse(llvm::StringRef Code) { // If we have a device parser, parse it first. The generated code will be // included in the host compilation if (DeviceParser) { - llvm::errs() << "[CUDA] Parsing device code...\n"; llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code); if (auto E = DeviceTU.takeError()) { - llvm::errs() << "[CUDA] Device Parse failed!\n"; return std::move(E); } - llvm::errs() << "[CUDA] Device parse successful.\n"; auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get()); - llvm::errs() << "[CUDA] Registering device PTU...\n"; - PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU); + PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU, nullptr, DeviceAct.get()); llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX(); if (!PTX) @@ -757,9 +759,9 @@ llvm::Error Interpreter::LoadDynamicLibrary(const char *name) { return llvm::Error::success(); } -std::unique_ptr<llvm::Module> Interpreter::GenModule() { +std::unique_ptr<llvm::Module> Interpreter::GenModule(IncrementalAction *Action) { static unsigned ID = 0; - if (CodeGenerator *CG = getCodeGen()) { + if (CodeGenerator *CG = getCodeGen(Action)) { // Clang's CodeGen is designed to work with a single llvm::Module. In many // cases for convenience various CodeGen parts have a reference to the // llvm::Module (TheModule or Module) which does not change when a new @@ -781,8 +783,10 @@ std::unique_ptr<llvm::Module> Interpreter::GenModule() { return nullptr; } -CodeGenerator *Interpreter::getCodeGen() const { - FrontendAction *WrappedAct = Act->getWrapped(); +CodeGenerator *Interpreter::getCodeGen(IncrementalAction *Action) const { + if (!Action) + Action = Act.get(); + FrontendAction *WrappedAct = Action->getWrapped(); if (!WrappedAct->hasIRSupport()) return nullptr; return static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator(); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits