https://github.com/anutosh491 updated https://github.com/llvm/llvm-project/pull/136404
>From 1f7205615f8d11c1b58e2a2760f85663f97767c5 Mon Sep 17 00:00:00 2001 From: anutosh491 <andersonbhat...@gmail.com> Date: Fri, 18 Apr 2025 18:45:00 +0530 Subject: [PATCH 1/3] Fix cuda flag with clang-repl --- clang/include/clang/Interpreter/Interpreter.h | 13 ++- clang/lib/Interpreter/DeviceOffload.cpp | 43 +++++---- clang/lib/Interpreter/DeviceOffload.h | 4 +- clang/lib/Interpreter/Interpreter.cpp | 89 ++++++++++++------- 4 files changed, 88 insertions(+), 61 deletions(-) diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index b1b63aedf86ab..1b228e0917d02 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -95,6 +95,9 @@ class Interpreter { // An optional parser for CUDA offloading std::unique_ptr<IncrementalParser> DeviceParser; + // An optional action for CUDA offloading + std::unique_ptr<IncrementalAction> DeviceAct; + /// List containing information about each incrementally parsed piece of code. std::list<PartialTranslationUnit> PTUs; @@ -129,7 +132,8 @@ class Interpreter { public: virtual ~Interpreter(); static llvm::Expected<std::unique_ptr<Interpreter>> - create(std::unique_ptr<CompilerInstance> CI); + create(std::unique_ptr<CompilerInstance> CI, + std::unique_ptr<CompilerInstance> DeviceCI = nullptr); static llvm::Expected<std::unique_ptr<Interpreter>> createWithCUDA(std::unique_ptr<CompilerInstance> CI, std::unique_ptr<CompilerInstance> DCI); @@ -175,10 +179,11 @@ class Interpreter { llvm::Expected<Expr *> ExtractValueFromExpr(Expr *E); llvm::Expected<llvm::orc::ExecutorAddr> CompileDtorCall(CXXRecordDecl *CXXRD); - CodeGenerator *getCodeGen() const; - std::unique_ptr<llvm::Module> GenModule(); + CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const; + std::unique_ptr<llvm::Module> GenModule(IncrementalAction *Action = nullptr); PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU, - std::unique_ptr<llvm::Module> M = {}); + std::unique_ptr<llvm::Module> M = {}, + IncrementalAction *Action = nullptr); // A cache for the compiled destructors used to for de-allocation of managed // clang::Values. diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 1999d63d1aa04..6977d7fa674ab 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -28,20 +28,21 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( std::unique_ptr<CompilerInstance> DeviceInstance, CompilerInstance &HostInstance, llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS, - llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs) + llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs) : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS), CodeGenOpts(HostInstance.getCodeGenOpts()), - TargetOpts(HostInstance.getTargetOpts()) { + TargetOpts(DeviceInstance->getTargetOpts()) { if (Err) return; - DeviceCI = std::move(DeviceInstance); StringRef Arch = TargetOpts.CPU; if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) { + DeviceInstance.release(); Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>( "Invalid CUDA architecture", llvm::inconvertibleErrorCode())); return; } + DeviceCI = std::move(DeviceInstance); } llvm::Expected<TranslationUnitDecl *> @@ -50,25 +51,6 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { if (!PTU) return PTU.takeError(); - auto PTX = GeneratePTX(); - if (!PTX) - return PTX.takeError(); - - auto Err = GenerateFatbinary(); - if (Err) - return std::move(Err); - - std::string FatbinFileName = - "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; - VFS->addFile(FatbinFileName, 0, - llvm::MemoryBuffer::getMemBuffer( - llvm::StringRef(FatbinContent.data(), FatbinContent.size()), - "", false)); - - CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; - - FatbinContent.clear(); - return PTU; } @@ -78,9 +60,11 @@ llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { const llvm::Target *Target = llvm::TargetRegistry::lookupTarget( PTU.TheModule->getTargetTriple(), Error); - if (!Target) + if (!Target) { return llvm::make_error<llvm::StringError>(std::move(Error), std::error_code()); + } + llvm::TargetOptions TO = llvm::TargetOptions(); llvm::TargetMachine *TargetMachine = Target->createTargetMachine( PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO, @@ -172,6 +156,19 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { FatbinContent.append(PTXCode.begin(), PTXCode.end()); + auto &PTU = PTUs.back(); + + std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin"; + + VFS->addFile(FatbinFileName, 0, + llvm::MemoryBuffer::getMemBuffer( + llvm::StringRef(FatbinContent.data(), FatbinContent.size()), + "", false)); + + CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; + + FatbinContent.clear(); + return llvm::Error::success(); } diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h index b9a1acab004c3..23d89046c09e1 100644 --- a/clang/lib/Interpreter/DeviceOffload.h +++ b/clang/lib/Interpreter/DeviceOffload.h @@ -24,14 +24,14 @@ class CodeGenOptions; class TargetOptions; class IncrementalCUDADeviceParser : public IncrementalParser { - const std::list<PartialTranslationUnit> &PTUs; + std::list<PartialTranslationUnit> &PTUs; public: IncrementalCUDADeviceParser( std::unique_ptr<CompilerInstance> DeviceInstance, CompilerInstance &HostInstance, llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS, - llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs); + llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs); llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override; diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index f8c8d0a425659..e0865c6e9dfdf 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -451,13 +451,44 @@ const char *const Runtimes = R"( )"; llvm::Expected<std::unique_ptr<Interpreter>> -Interpreter::create(std::unique_ptr<CompilerInstance> CI) { +Interpreter::create(std::unique_ptr<CompilerInstance> CI, + std::unique_ptr<CompilerInstance> DeviceCI) { llvm::Error Err = llvm::Error::success(); auto Interp = std::unique_ptr<Interpreter>(new Interpreter(std::move(CI), Err)); if (Err) return std::move(Err); + CompilerInstance &HostCI = *(Interp->getCompilerInstance()); + + if (DeviceCI) { + Interp->DeviceAct = std::make_unique<IncrementalAction>( + *DeviceCI, *Interp->TSCtx->getContext(), Err, *Interp); + + if (Err) + return std::move(Err); + + DeviceCI->ExecuteAction(*Interp->DeviceAct); + + // avoid writing fat binary to disk using an in-memory virtual file system + llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS = + std::make_unique<llvm::vfs::InMemoryFileSystem>(); + llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS = + std::make_unique<llvm::vfs::OverlayFileSystem>( + llvm::vfs::getRealFileSystem()); + OverlayVFS->pushOverlay(IMVFS); + HostCI.createFileManager(OverlayVFS); + + auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>( + std::move(DeviceCI), HostCI, IMVFS, Err, + Interp->PTUs); + + if (Err) + return std::move(Err); + + Interp->DeviceParser = std::move(DeviceParser); + } + // Add runtime code and set a marker to hide it from user code. Undo will not // go through that. auto PTU = Interp->Parse(Runtimes); @@ -472,29 +503,7 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI) { llvm::Expected<std::unique_ptr<Interpreter>> Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI, std::unique_ptr<CompilerInstance> DCI) { - // avoid writing fat binary to disk using an in-memory virtual file system - llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS = - std::make_unique<llvm::vfs::InMemoryFileSystem>(); - llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS = - std::make_unique<llvm::vfs::OverlayFileSystem>( - llvm::vfs::getRealFileSystem()); - OverlayVFS->pushOverlay(IMVFS); - CI->createFileManager(OverlayVFS); - - auto Interp = Interpreter::create(std::move(CI)); - if (auto E = Interp.takeError()) - return std::move(E); - - llvm::Error Err = llvm::Error::success(); - auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>( - std::move(DCI), *(*Interp)->getCompilerInstance(), IMVFS, Err, - (*Interp)->PTUs); - if (Err) - return std::move(Err); - - (*Interp)->DeviceParser = std::move(DeviceParser); - - return Interp; + return Interpreter::create(std::move(CI), std::move(DCI)); } const CompilerInstance *Interpreter::getCompilerInstance() const { @@ -532,15 +541,16 @@ size_t Interpreter::getEffectivePTUSize() const { PartialTranslationUnit & Interpreter::RegisterPTU(TranslationUnitDecl *TU, - std::unique_ptr<llvm::Module> M /*={}*/) { + std::unique_ptr<llvm::Module> M /*={}*/, + IncrementalAction *Action) { PTUs.emplace_back(PartialTranslationUnit()); PartialTranslationUnit &LastPTU = PTUs.back(); LastPTU.TUPart = TU; if (!M) - M = GenModule(); + M = GenModule(Action); - assert((!getCodeGen() || M) && "Must have a llvm::Module at this point"); + assert((!getCodeGen(Action) || M) && "Must have a llvm::Module at this point"); LastPTU.TheModule = std::move(M); LLVM_DEBUG(llvm::dbgs() << "compile-ptu " << PTUs.size() - 1 @@ -558,8 +568,21 @@ Interpreter::Parse(llvm::StringRef Code) { // included in the host compilation if (DeviceParser) { llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code); - if (auto E = DeviceTU.takeError()) + if (auto E = DeviceTU.takeError()) { return std::move(E); + } + + auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get()); + + PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU, nullptr, DeviceAct.get()); + + llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX(); + if (!PTX) + return PTX.takeError(); + + llvm::Error Err = CudaParser->GenerateFatbinary(); + if (Err) + return std::move(Err); } // Tell the interpreter sliently ignore unused expressions since value @@ -736,9 +759,9 @@ llvm::Error Interpreter::LoadDynamicLibrary(const char *name) { return llvm::Error::success(); } -std::unique_ptr<llvm::Module> Interpreter::GenModule() { +std::unique_ptr<llvm::Module> Interpreter::GenModule(IncrementalAction *Action) { static unsigned ID = 0; - if (CodeGenerator *CG = getCodeGen()) { + if (CodeGenerator *CG = getCodeGen(Action)) { // Clang's CodeGen is designed to work with a single llvm::Module. In many // cases for convenience various CodeGen parts have a reference to the // llvm::Module (TheModule or Module) which does not change when a new @@ -760,8 +783,10 @@ std::unique_ptr<llvm::Module> Interpreter::GenModule() { return nullptr; } -CodeGenerator *Interpreter::getCodeGen() const { - FrontendAction *WrappedAct = Act->getWrapped(); +CodeGenerator *Interpreter::getCodeGen(IncrementalAction *Action) const { + if (!Action) + Action = Act.get(); + FrontendAction *WrappedAct = Action->getWrapped(); if (!WrappedAct->hasIRSupport()) return nullptr; return static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator(); >From 9955a9971c83d7e9c5d138f593c1cb3fcbd68257 Mon Sep 17 00:00:00 2001 From: anutosh491 <andersonbhat...@gmail.com> Date: Thu, 24 Apr 2025 17:32:23 +0530 Subject: [PATCH 2/3] Enable virtual file system to be used --- clang/include/clang/Interpreter/Interpreter.h | 3 +- clang/lib/Interpreter/DeviceOffload.cpp | 4 +- clang/lib/Interpreter/Interpreter.cpp | 71 ++++++++++--------- 3 files changed, 40 insertions(+), 38 deletions(-) diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index 1b228e0917d02..59089cf639f96 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -132,8 +132,7 @@ class Interpreter { public: virtual ~Interpreter(); static llvm::Expected<std::unique_ptr<Interpreter>> - create(std::unique_ptr<CompilerInstance> CI, - std::unique_ptr<CompilerInstance> DeviceCI = nullptr); + create(std::unique_ptr<CompilerInstance> CI); static llvm::Expected<std::unique_ptr<Interpreter>> createWithCUDA(std::unique_ptr<CompilerInstance> CI, std::unique_ptr<CompilerInstance> DCI); diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 6977d7fa674ab..d9b00787f038d 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -60,11 +60,9 @@ llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() { const llvm::Target *Target = llvm::TargetRegistry::lookupTarget( PTU.TheModule->getTargetTriple(), Error); - if (!Target) { + if (!Target) return llvm::make_error<llvm::StringError>(std::move(Error), std::error_code()); - } - llvm::TargetOptions TO = llvm::TargetOptions(); llvm::TargetMachine *TargetMachine = Target->createTargetMachine( PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO, diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index e0865c6e9dfdf..a01dc93526635 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -451,44 +451,13 @@ const char *const Runtimes = R"( )"; llvm::Expected<std::unique_ptr<Interpreter>> -Interpreter::create(std::unique_ptr<CompilerInstance> CI, - std::unique_ptr<CompilerInstance> DeviceCI) { +Interpreter::create(std::unique_ptr<CompilerInstance> CI) { llvm::Error Err = llvm::Error::success(); auto Interp = std::unique_ptr<Interpreter>(new Interpreter(std::move(CI), Err)); if (Err) return std::move(Err); - CompilerInstance &HostCI = *(Interp->getCompilerInstance()); - - if (DeviceCI) { - Interp->DeviceAct = std::make_unique<IncrementalAction>( - *DeviceCI, *Interp->TSCtx->getContext(), Err, *Interp); - - if (Err) - return std::move(Err); - - DeviceCI->ExecuteAction(*Interp->DeviceAct); - - // avoid writing fat binary to disk using an in-memory virtual file system - llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS = - std::make_unique<llvm::vfs::InMemoryFileSystem>(); - llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS = - std::make_unique<llvm::vfs::OverlayFileSystem>( - llvm::vfs::getRealFileSystem()); - OverlayVFS->pushOverlay(IMVFS); - HostCI.createFileManager(OverlayVFS); - - auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>( - std::move(DeviceCI), HostCI, IMVFS, Err, - Interp->PTUs); - - if (Err) - return std::move(Err); - - Interp->DeviceParser = std::move(DeviceParser); - } - // Add runtime code and set a marker to hide it from user code. Undo will not // go through that. auto PTU = Interp->Parse(Runtimes); @@ -503,7 +472,43 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI, llvm::Expected<std::unique_ptr<Interpreter>> Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI, std::unique_ptr<CompilerInstance> DCI) { - return Interpreter::create(std::move(CI), std::move(DCI)); + // avoid writing fat binary to disk using an in-memory virtual file system + llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS = + std::make_unique<llvm::vfs::InMemoryFileSystem>(); + llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS = + std::make_unique<llvm::vfs::OverlayFileSystem>( + llvm::vfs::getRealFileSystem()); + OverlayVFS->pushOverlay(IMVFS); + CI->createFileManager(OverlayVFS); + + llvm::Expected<std::unique_ptr<Interpreter>> InterpOrErr = Interpreter::create(std::move(CI)); + if (!InterpOrErr) + return InterpOrErr; + + std::unique_ptr<Interpreter> Interp = std::move(*InterpOrErr); + + llvm::Error Err = llvm::Error::success(); + llvm::LLVMContext &LLVMCtx = *Interp->TSCtx->getContext(); + + auto DeviceAct = std::make_unique<IncrementalAction>( + *DCI, LLVMCtx, Err, *Interp); + + if (Err) + return std::move(Err); + + Interp->DeviceAct = std::move(DeviceAct); + + DCI->ExecuteAction(*Interp->DeviceAct); + + auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>( + std::move(DCI), *Interp->getCompilerInstance(), IMVFS, Err, + Interp->PTUs); + + if (Err) + return std::move(Err); + + Interp->DeviceParser = std::move(DeviceParser); + return std::move(Interp); } const CompilerInstance *Interpreter::getCompilerInstance() const { >From 77c773e8caa2dd723406b1feda18259bf47e7105 Mon Sep 17 00:00:00 2001 From: anutosh491 <andersonbhat...@gmail.com> Date: Thu, 24 Apr 2025 17:40:41 +0530 Subject: [PATCH 3/3] remove unused variable --- clang/lib/Interpreter/Interpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index a01dc93526635..4a541188c0557 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -579,7 +579,7 @@ Interpreter::Parse(llvm::StringRef Code) { auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get()); - PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU, nullptr, DeviceAct.get()); + RegisterPTU(*DeviceTU, nullptr, DeviceAct.get()); llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX(); if (!PTX) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits