[clang] 21fb19f - [clang-repl] : Fix clang-repl crash with --cuda flag (#136404)

via cfe-commits Fri, 25 Apr 2025 23:47:11 -0700

Author: Anutosh Bhat
Date: 2025-04-26T09:46:26+03:00
New Revision: 21fb19f3b5d572f608e959af895d781b9b24fbbd


URL: 
https://github.com/llvm/llvm-project/commit/21fb19f3b5d572f608e959af895d781b9b24fbbd
DIFF: 
https://github.com/llvm/llvm-project/commit/21fb19f3b5d572f608e959af895d781b9b24fbbd.diff

LOG: [clang-repl] : Fix clang-repl crash with --cuda flag (#136404)

`clang-repl --cuda` was previously crashing with a segmentation fault,
instead of reporting a clean error
```
(base) anutosh491@Anutoshs-MacBook-Air bin % ./clang-repl --cuda
#0 0x0000000111da4fbc llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) 
(/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x150fbc)
#1 0x0000000111da31dc llvm::sys::RunSignalHandlers() 
(/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x14f1dc)
#2 0x0000000111da5628 SignalHandler(int) 
(/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x151628)
#3 0x000000019b242de4 (/usr/lib/system/libsystem_platform.dylib+0x180482de4)
#4 0x0000000107f638d0 
clang::IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(std::__1::unique_ptr<clang::CompilerInstance,
 std::__1::default_delete<clang::CompilerInstance>>, clang::CompilerInstance&, 
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>, llvm::Error&, 
std::__1::list<clang::PartialTranslationUnit, 
std::__1::allocator<clang::PartialTranslationUnit>> const&) 
(/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x216b8d0)
#5 0x0000000107f638d0 
clang::IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(std::__1::unique_ptr<clang::CompilerInstance,
 std::__1::default_delete<clang::CompilerInstance>>, clang::CompilerInstance&, 
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>, llvm::Error&, 
std::__1::list<clang::PartialTranslationUnit, 
std::__1::allocator<clang::PartialTranslationUnit>> const&) 
(/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x216b8d0)
#6 0x0000000107f6bac8 
clang::Interpreter::createWithCUDA(std::__1::unique_ptr<clang::CompilerInstance,
 std::__1::default_delete<clang::CompilerInstance>>, 
std::__1::unique_ptr<clang::CompilerInstance, 
std::__1::default_delete<clang::CompilerInstance>>) 
(/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x2173ac8)
#7 0x000000010206f8a8 main 
(/opt/local/libexec/llvm-20/bin/clang-repl+0x1000038a8)
#8 0x000000019ae8c274 
Segmentation fault: 11
```


The underlying issue was that the `DeviceCompilerInstance` (used for
device-side CUDA compilation) was never initialized with a `Sema`, which
is required before constructing the `IncrementalCUDADeviceParser`.


https://github.com/llvm/llvm-project/blob/89687e6f383b742a3c6542dc673a84d9f82d02de/clang/lib/Interpreter/DeviceOffload.cpp#L32


https://github.com/llvm/llvm-project/blob/89687e6f383b742a3c6542dc673a84d9f82d02de/clang/lib/Interpreter/IncrementalParser.cpp#L31

Unlike the host-side `CompilerInstance` which runs `ExecuteAction`
inside the Interpreter constructor (thereby setting up Sema), the
device-side CI was passed into the parser uninitialized, leading to an
assertion or crash when accessing its internals.

To fix this, I refactored the `Interpreter::create` method to include an
optional `DeviceCI` parameter. If provided, we know we need to take care
of this instance too. Only then do we construct the
`IncrementalCUDADeviceParser`.

Added: 
    

Modified: 
    clang/include/clang/Interpreter/Interpreter.h
    clang/lib/Interpreter/DeviceOffload.cpp
    clang/lib/Interpreter/DeviceOffload.h
    clang/lib/Interpreter/Interpreter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Interpreter/Interpreter.h 
b/clang/include/clang/Interpreter/Interpreter.h
index b1b63aedf86ab..56213f88b9e30 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -41,6 +41,7 @@ class CXXRecordDecl;
 class Decl;
 class IncrementalExecutor;
 class IncrementalParser;
+class IncrementalCUDADeviceParser;
 
 /// Create a pre-configured \c CompilerInstance for incremental processing.
 class IncrementalCompilerBuilder {
@@ -93,7 +94,10 @@ class Interpreter {
   std::unique_ptr<IncrementalExecutor> IncrExecutor;
 
   // An optional parser for CUDA offloading
-  std::unique_ptr<IncrementalParser> DeviceParser;
+  std::unique_ptr<IncrementalCUDADeviceParser> DeviceParser;
+
+  // An optional action for CUDA offloading
+  std::unique_ptr<IncrementalAction> DeviceAct;
 
   /// List containing information about each incrementally parsed piece of 
code.
   std::list<PartialTranslationUnit> PTUs;
@@ -175,10 +179,11 @@ class Interpreter {
   llvm::Expected<Expr *> ExtractValueFromExpr(Expr *E);
   llvm::Expected<llvm::orc::ExecutorAddr> CompileDtorCall(CXXRecordDecl 
*CXXRD);
 
-  CodeGenerator *getCodeGen() const;
-  std::unique_ptr<llvm::Module> GenModule();
+  CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const;
+  std::unique_ptr<llvm::Module> GenModule(IncrementalAction *Action = nullptr);
   PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU,
-                                      std::unique_ptr<llvm::Module> M = {});
+                                      std::unique_ptr<llvm::Module> M = {},
+                                      IncrementalAction *Action = nullptr);
 
   // A cache for the compiled destructors used to for de-allocation of managed
   // clang::Values.

diff  --git a/clang/lib/Interpreter/DeviceOffload.cpp 
b/clang/lib/Interpreter/DeviceOffload.cpp
index 1999d63d1aa04..7d0125403ea52 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -31,10 +31,9 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
     llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
     : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
       CodeGenOpts(HostInstance.getCodeGenOpts()),
-      TargetOpts(HostInstance.getTargetOpts()) {
+      TargetOpts(DeviceInstance->getTargetOpts()) {
   if (Err)
     return;
-  DeviceCI = std::move(DeviceInstance);
   StringRef Arch = TargetOpts.CPU;
   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
@@ -42,34 +41,7 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
                                                
llvm::inconvertibleErrorCode()));
     return;
   }
-}
-
-llvm::Expected<TranslationUnitDecl *>
-IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
-  auto PTU = IncrementalParser::Parse(Input);
-  if (!PTU)
-    return PTU.takeError();
-
-  auto PTX = GeneratePTX();
-  if (!PTX)
-    return PTX.takeError();
-
-  auto Err = GenerateFatbinary();
-  if (Err)
-    return std::move(Err);
-
-  std::string FatbinFileName =
-      "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
-  VFS->addFile(FatbinFileName, 0,
-               llvm::MemoryBuffer::getMemBuffer(
-                   llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
-                   "", false));
-
-  CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
-
-  FatbinContent.clear();
-
-  return PTU;
+  DeviceCI = std::move(DeviceInstance);
 }
 
 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
@@ -172,6 +144,19 @@ llvm::Error 
IncrementalCUDADeviceParser::GenerateFatbinary() {
 
   FatbinContent.append(PTXCode.begin(), PTXCode.end());
 
+  const PartialTranslationUnit &PTU = PTUs.back();
+
+  std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + 
".fatbin";
+
+  VFS->addFile(FatbinFileName, 0,
+               llvm::MemoryBuffer::getMemBuffer(
+                   llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
+                   "", false));
+
+  CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
+
+  FatbinContent.clear();
+
   return llvm::Error::success();
 }
 

diff  --git a/clang/lib/Interpreter/DeviceOffload.h 
b/clang/lib/Interpreter/DeviceOffload.h
index b9a1acab004c3..43645033c4840 100644
--- a/clang/lib/Interpreter/DeviceOffload.h
+++ b/clang/lib/Interpreter/DeviceOffload.h
@@ -33,8 +33,6 @@ class IncrementalCUDADeviceParser : public IncrementalParser {
       llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS,
       llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs);
 
-  llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override;
-
   // Generate PTX for the last PTU.
   llvm::Expected<llvm::StringRef> GeneratePTX();
 

diff  --git a/clang/lib/Interpreter/Interpreter.cpp 
b/clang/lib/Interpreter/Interpreter.cpp
index 45fa583650f03..e2950e171a77e 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -481,20 +481,34 @@ 
Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
   OverlayVFS->pushOverlay(IMVFS);
   CI->createFileManager(OverlayVFS);
 
-  auto Interp = Interpreter::create(std::move(CI));
-  if (auto E = Interp.takeError())
-    return std::move(E);
+  llvm::Expected<std::unique_ptr<Interpreter>> InterpOrErr =
+      Interpreter::create(std::move(CI));
+  if (!InterpOrErr)
+    return InterpOrErr;
+
+  std::unique_ptr<Interpreter> Interp = std::move(*InterpOrErr);
 
   llvm::Error Err = llvm::Error::success();
-  auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
-      std::move(DCI), *(*Interp)->getCompilerInstance(), IMVFS, Err,
-      (*Interp)->PTUs);
+  llvm::LLVMContext &LLVMCtx = *Interp->TSCtx->getContext();
+
+  auto DeviceAct =
+      std::make_unique<IncrementalAction>(*DCI, LLVMCtx, Err, *Interp);
+
   if (Err)
     return std::move(Err);
 
-  (*Interp)->DeviceParser = std::move(DeviceParser);
+  Interp->DeviceAct = std::move(DeviceAct);
+
+  DCI->ExecuteAction(*Interp->DeviceAct);
+
+  auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
+      std::move(DCI), *Interp->getCompilerInstance(), IMVFS, Err, 
Interp->PTUs);
+
+  if (Err)
+    return std::move(Err);
 
-  return Interp;
+  Interp->DeviceParser = std::move(DeviceParser);
+  return std::move(Interp);
 }
 
 const CompilerInstance *Interpreter::getCompilerInstance() const {
@@ -532,15 +546,17 @@ size_t Interpreter::getEffectivePTUSize() const {
 
 PartialTranslationUnit &
 Interpreter::RegisterPTU(TranslationUnitDecl *TU,
-                         std::unique_ptr<llvm::Module> M /*={}*/) {
+                         std::unique_ptr<llvm::Module> M /*={}*/,
+                         IncrementalAction *Action) {
   PTUs.emplace_back(PartialTranslationUnit());
   PartialTranslationUnit &LastPTU = PTUs.back();
   LastPTU.TUPart = TU;
 
   if (!M)
-    M = GenModule();
+    M = GenModule(Action);
 
-  assert((!getCodeGen() || M) && "Must have a llvm::Module at this point");
+  assert((!getCodeGen(Action) || M) &&
+         "Must have a llvm::Module at this point");
 
   LastPTU.TheModule = std::move(M);
   LLVM_DEBUG(llvm::dbgs() << "compile-ptu " << PTUs.size() - 1
@@ -560,6 +576,16 @@ Interpreter::Parse(llvm::StringRef Code) {
     llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code);
     if (auto E = DeviceTU.takeError())
       return std::move(E);
+
+    RegisterPTU(*DeviceTU, nullptr, DeviceAct.get());
+
+    llvm::Expected<llvm::StringRef> PTX = DeviceParser->GeneratePTX();
+    if (!PTX)
+      return PTX.takeError();
+
+    llvm::Error Err = DeviceParser->GenerateFatbinary();
+    if (Err)
+      return std::move(Err);
   }
 
   // Tell the interpreter sliently ignore unused expressions since value
@@ -736,9 +762,10 @@ llvm::Error Interpreter::LoadDynamicLibrary(const char 
*name) {
   return llvm::Error::success();
 }
 
-std::unique_ptr<llvm::Module> Interpreter::GenModule() {
+std::unique_ptr<llvm::Module>
+Interpreter::GenModule(IncrementalAction *Action) {
   static unsigned ID = 0;
-  if (CodeGenerator *CG = getCodeGen()) {
+  if (CodeGenerator *CG = getCodeGen(Action)) {
     // Clang's CodeGen is designed to work with a single llvm::Module. In many
     // cases for convenience various CodeGen parts have a reference to the
     // llvm::Module (TheModule or Module) which does not change when a new
@@ -760,8 +787,10 @@ std::unique_ptr<llvm::Module> Interpreter::GenModule() {
   return nullptr;
 }
 
-CodeGenerator *Interpreter::getCodeGen() const {
-  FrontendAction *WrappedAct = Act->getWrapped();
+CodeGenerator *Interpreter::getCodeGen(IncrementalAction *Action) const {
+  if (!Action)
+    Action = Act.get();
+  FrontendAction *WrappedAct = Action->getWrapped();
   if (!WrappedAct->hasIRSupport())
     return nullptr;
   return static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 21fb19f - [clang-repl] : Fix clang-repl crash with --cuda flag (#136404)

Reply via email to