[clang] [clan-repl] : Fix clang-repl crash with --cuda flag (PR #136404)

Fri Apr 18 21:33:56 PDT 2025

https://github.com/anutosh491 created https://github.com/llvm/llvm-project/pull/136404



`clang-repl --cuda` was previously crashing with a segmentation fault, instead of reporting a clean error
```
(base) anutosh491 at Anutoshs-MacBook-Air bin % ./clang-repl --cuda
#0 0x0000000111da4fbc llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x150fbc)
#1 0x0000000111da31dc llvm::sys::RunSignalHandlers() (/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x14f1dc)
#2 0x0000000111da5628 SignalHandler(int) (/opt/local/libexec/llvm-20/lib/libLLVM.dylib+0x151628)
#3 0x000000019b242de4 (/usr/lib/system/libsystem_platform.dylib+0x180482de4)
#4 0x0000000107f638d0 clang::IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(std::__1::unique_ptr<clang::CompilerInstance, std::__1::default_delete<clang::CompilerInstance>>, clang::CompilerInstance&, llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>, llvm::Error&, std::__1::list<clang::PartialTranslationUnit, std::__1::allocator<clang::PartialTranslationUnit>> const&) (/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x216b8d0)
#5 0x0000000107f638d0 clang::IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(std::__1::unique_ptr<clang::CompilerInstance, std::__1::default_delete<clang::CompilerInstance>>, clang::CompilerInstance&, llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>, llvm::Error&, std::__1::list<clang::PartialTranslationUnit, std::__1::allocator<clang::PartialTranslationUnit>> const&) (/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x216b8d0)
#6 0x0000000107f6bac8 clang::Interpreter::createWithCUDA(std::__1::unique_ptr<clang::CompilerInstance, std::__1::default_delete<clang::CompilerInstance>>, std::__1::unique_ptr<clang::CompilerInstance, std::__1::default_delete<clang::CompilerInstance>>) (/opt/local/libexec/llvm-20/lib/libclang-cpp.dylib+0x2173ac8)
#7 0x000000010206f8a8 main (/opt/local/libexec/llvm-20/bin/clang-repl+0x1000038a8)
#8 0x000000019ae8c274 
Segmentation fault: 11
```


The underlying issue was that the `DeviceCompilerInstance` (used for device-side CUDA compilation) was never initialized with a `Sema`, which is required before constructing the `IncrementalCUDADeviceParser`. 

https://github.com/llvm/llvm-project/blob/89687e6f383b742a3c6542dc673a84d9f82d02de/clang/lib/Interpreter/DeviceOffload.cpp#L32

https://github.com/llvm/llvm-project/blob/89687e6f383b742a3c6542dc673a84d9f82d02de/clang/lib/Interpreter/IncrementalParser.cpp#L31

Unlike the host-side `CompilerInstance` which runs `ExecuteAction` inside the Interpreter constructor (thereby setting up Sema), the device-side CI was passed into the parser uninitialized, leading to an assertion or crash when accessing its internals.

To fix this, I refactored the `Interpreter::create` method to include an optional `DeviceCI` parameter. If provided, we know we need to take care of this instance too. Only then do we construct the `IncrementalCUDADeviceParser`.



>From 82a2be2dd0780bd3d57f4a22d61a4b0752696934 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Fri, 18 Apr 2025 18:45:00 +0530
Subject: [PATCH 1/4] Fix cuda flag with clang-repl

---
 clang/include/clang/Interpreter/Interpreter.h |  3 +-
 clang/lib/Interpreter/Interpreter.cpp         | 43 +++++++++++--------
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index b1b63aedf86ab..20d22c4437a97 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -129,7 +129,8 @@ class Interpreter {
 public:
   virtual ~Interpreter();
   static llvm::Expected<std::unique_ptr<Interpreter>>
-  create(std::unique_ptr<CompilerInstance> CI);
+  create(std::unique_ptr<CompilerInstance> CI,
+    std::unique_ptr<CompilerInstance> DeviceCI = nullptr);
   static llvm::Expected<std::unique_ptr<Interpreter>>
   createWithCUDA(std::unique_ptr<CompilerInstance> CI,
                  std::unique_ptr<CompilerInstance> DCI);
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index f8c8d0a425659..2002854daa2a0 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -451,20 +451,40 @@ const char *const Runtimes = R"(
 )";
 
 llvm::Expected<std::unique_ptr<Interpreter>>
-Interpreter::create(std::unique_ptr<CompilerInstance> CI) {
+Interpreter::create(std::unique_ptr<CompilerInstance> CI,
+                    std::unique_ptr<CompilerInstance> DeviceCI) {
   llvm::Error Err = llvm::Error::success();
   auto Interp =
       std::unique_ptr<Interpreter>(new Interpreter(std::move(CI), Err));
   if (Err)
     return std::move(Err);
 
-  // Add runtime code and set a marker to hide it from user code. Undo will not
-  // go through that.
+  // CUDA-specific logic (only if CUDA mode)
+  if (DeviceCI) {
+    // Ensure we initialize Sema on the device CI
+    // DeviceCI->createSema(Interp->Act->getTranslationUnitKind(), nullptr);
+    DeviceCI->ExecuteAction(*Interp->Act);
+
+    // Use a custom in-memory VFS for fatbin writing
+    llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
+        std::make_unique<llvm::vfs::InMemoryFileSystem>();
+
+    // Create the CUDA device parser
+    auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
+        std::move(DeviceCI), *Interp->getCompilerInstance(), IMVFS, Err,
+        Interp->PTUs);
+
+    if (Err)
+      return std::move(Err);
+
+    Interp->DeviceParser = std::move(DeviceParser);
+  }
+
+  // Add runtime code and set a marker to hide it from user code.
   auto PTU = Interp->Parse(Runtimes);
   if (!PTU)
     return PTU.takeError();
   Interp->markUserCodeStart();
-
   Interp->ValuePrintingInfo.resize(4);
   return std::move(Interp);
 }
@@ -481,20 +501,7 @@ Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
   OverlayVFS->pushOverlay(IMVFS);
   CI->createFileManager(OverlayVFS);
 
-  auto Interp = Interpreter::create(std::move(CI));
-  if (auto E = Interp.takeError())
-    return std::move(E);
-
-  llvm::Error Err = llvm::Error::success();
-  auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
-      std::move(DCI), *(*Interp)->getCompilerInstance(), IMVFS, Err,
-      (*Interp)->PTUs);
-  if (Err)
-    return std::move(Err);
-
-  (*Interp)->DeviceParser = std::move(DeviceParser);
-
-  return Interp;
+  return Interpreter::create(std::move(CI), std::move(DCI));
 }
 
 const CompilerInstance *Interpreter::getCompilerInstance() const {

>From ae0605f4c2b4c991074d0f91f046b9460ff4eac1 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Fri, 18 Apr 2025 19:33:00 +0530
Subject: [PATCH 2/4] execute action on deviceCI

---
 clang/lib/Interpreter/Interpreter.cpp | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 2002854daa2a0..b2c33480107d8 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -459,17 +459,30 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
   if (Err)
     return std::move(Err);
 
-  // CUDA-specific logic (only if CUDA mode)
   if (DeviceCI) {
-    // Ensure we initialize Sema on the device CI
-    // DeviceCI->createSema(Interp->Act->getTranslationUnitKind(), nullptr);
+    // Create a fresh LLVM context for the CUDA device interpreter
+    // auto DeviceLLVMCtx = std::make_unique<llvm::LLVMContext>();
+    // auto DeviceTSCtx =
+    //     std::make_unique<llvm::orc::ThreadSafeContext>(std::move(DeviceLLVMCtx));
+
+    // llvm::Error DeviceErr = llvm::Error::success();
+    // llvm::ErrorAsOutParameter EAO(&DeviceErr);
+
+    // // Create an IncrementalAction for the device CompilerInstance
+    // auto DeviceAct = std::make_unique<IncrementalAction>(
+    //     *DeviceCI, *DeviceTSCtx->getContext(), DeviceErr, *Interp);
+
+    // if (DeviceErr)
+    //   return std::move(DeviceErr);
+
+    // Execute the device-side action (this will create Sema etc.)
     DeviceCI->ExecuteAction(*Interp->Act);
 
-    // Use a custom in-memory VFS for fatbin writing
+    // Set up a virtual file system to emit fatbin to memory
     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
         std::make_unique<llvm::vfs::InMemoryFileSystem>();
 
-    // Create the CUDA device parser
+    // Create the CUDA device parser using the initialized device CI
     auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
         std::move(DeviceCI), *Interp->getCompilerInstance(), IMVFS, Err,
         Interp->PTUs);

>From fef5b992418214f51a977b32aaa5a7502565a168 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Sat, 19 Apr 2025 09:50:55 +0530
Subject: [PATCH 3/4] Fix releasing DeviceInstance

---
 clang/lib/Interpreter/DeviceOffload.cpp | 3 ++-
 clang/lib/Interpreter/Interpreter.cpp   | 6 +-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 1999d63d1aa04..9a7be006250a0 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -34,14 +34,15 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
       TargetOpts(HostInstance.getTargetOpts()) {
   if (Err)
     return;
-  DeviceCI = std::move(DeviceInstance);
   StringRef Arch = TargetOpts.CPU;
   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
+    DeviceInstance.release();
     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
                                                "Invalid CUDA architecture",
                                                llvm::inconvertibleErrorCode()));
     return;
   }
+  DeviceCI = std::move(DeviceInstance);
 }
 
 llvm::Expected<TranslationUnitDecl *>
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index b2c33480107d8..1daad6c224adf 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -460,7 +460,6 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
     return std::move(Err);
 
   if (DeviceCI) {
-    // Create a fresh LLVM context for the CUDA device interpreter
     // auto DeviceLLVMCtx = std::make_unique<llvm::LLVMContext>();
     // auto DeviceTSCtx =
     //     std::make_unique<llvm::orc::ThreadSafeContext>(std::move(DeviceLLVMCtx));
@@ -468,21 +467,18 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
     // llvm::Error DeviceErr = llvm::Error::success();
     // llvm::ErrorAsOutParameter EAO(&DeviceErr);
 
-    // // Create an IncrementalAction for the device CompilerInstance
     // auto DeviceAct = std::make_unique<IncrementalAction>(
     //     *DeviceCI, *DeviceTSCtx->getContext(), DeviceErr, *Interp);
 
     // if (DeviceErr)
     //   return std::move(DeviceErr);
 
-    // Execute the device-side action (this will create Sema etc.)
+    //DeviceCI->ExecuteAction(*DeviceAct);
     DeviceCI->ExecuteAction(*Interp->Act);
 
-    // Set up a virtual file system to emit fatbin to memory
     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
         std::make_unique<llvm::vfs::InMemoryFileSystem>();
 
-    // Create the CUDA device parser using the initialized device CI
     auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
         std::move(DeviceCI), *Interp->getCompilerInstance(), IMVFS, Err,
         Interp->PTUs);

>From 5d6c20809821c2ca0635bfe1175ecf2a9e6a6e39 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Sat, 19 Apr 2025 09:53:00 +0530
Subject: [PATCH 4/4] minor changes

---
 clang/lib/Interpreter/Interpreter.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 1daad6c224adf..1af3f24774028 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -489,11 +489,13 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
     Interp->DeviceParser = std::move(DeviceParser);
   }
 
-  // Add runtime code and set a marker to hide it from user code.
+  // Add runtime code and set a marker to hide it from user code. Undo will not
+  // go through that.
   auto PTU = Interp->Parse(Runtimes);
   if (!PTU)
     return PTU.takeError();
   Interp->markUserCodeStart();
+
   Interp->ValuePrintingInfo.resize(4);
   return std::move(Interp);
 }