[clang] [clang-repl] : Fix clang-repl crash with --cuda flag (PR #136404)

Sat Apr 19 10:57:44 PDT 2025

================
@@ -451,13 +451,44 @@ const char *const Runtimes = R"(
 )";
 
 llvm::Expected<std::unique_ptr<Interpreter>>
-Interpreter::create(std::unique_ptr<CompilerInstance> CI) {
+Interpreter::create(std::unique_ptr<CompilerInstance> CI,
+                    std::unique_ptr<CompilerInstance> DeviceCI) {
   llvm::Error Err = llvm::Error::success();
   auto Interp =
       std::unique_ptr<Interpreter>(new Interpreter(std::move(CI), Err));
   if (Err)
     return std::move(Err);
 
+  if (DeviceCI) {
+    // auto DeviceLLVMCtx = std::make_unique<llvm::LLVMContext>();
+    // auto DeviceTSCtx =
+    //     std::make_unique<llvm::orc::ThreadSafeContext>(std::move(DeviceLLVMCtx));
+
+    // llvm::Error DeviceErr = llvm::Error::success();
+    // llvm::ErrorAsOutParameter EAO(&DeviceErr);
+
+    // auto DeviceAct = std::make_unique<IncrementalAction>(
+    //     *DeviceCI, *DeviceTSCtx->getContext(), DeviceErr, *Interp);
+
+    // if (DeviceErr)
+    //   return std::move(DeviceErr);
+
+    // DeviceCI->ExecuteAction(*DeviceAct);
+    DeviceCI->ExecuteAction(*Interp->Act);
----------------
vgvassilev wrote:

While doing testing I came across on a patch of mine which does not crash:
```patch
commit bcc670b90af2cefedbb654d3057cda4f891fa4b7
Author: Vassil Vassilev <v.g.vassilev at gmail.com>
Date:   Sat Apr 19 15:39:41 2025 +0000

    fix cuda

diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index b1b63aedf86a..ba61c063d96a 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -94,6 +94,8 @@ class Interpreter {
 
   // An optional parser for CUDA offloading
   std::unique_ptr<IncrementalParser> DeviceParser;
+  // An optional action for CUDA offloading
+  std::unique_ptr<IncrementalAction> DeviceAct;
 
   /// List containing information about each incrementally parsed piece of code.
   std::list<PartialTranslationUnit> PTUs;
@@ -129,7 +131,8 @@ protected:
 public:
   virtual ~Interpreter();
   static llvm::Expected<std::unique_ptr<Interpreter>>
-  create(std::unique_ptr<CompilerInstance> CI);
+  create(std::unique_ptr<CompilerInstance> CI,
+         std::unique_ptr<CompilerInstance> DeviceCI = nullptr);
   static llvm::Expected<std::unique_ptr<Interpreter>>
   createWithCUDA(std::unique_ptr<CompilerInstance> CI,
                  std::unique_ptr<CompilerInstance> DCI);
diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 1999d63d1aa0..3db96a6fd517 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -27,18 +27,27 @@ namespace clang {
 IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
     std::unique_ptr<CompilerInstance> DeviceInstance,
     CompilerInstance &HostInstance,
-    llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
     llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
-    : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
-      CodeGenOpts(HostInstance.getCodeGenOpts()),
-      TargetOpts(HostInstance.getTargetOpts()) {
+  : IncrementalParser([this](CompilerInstance *DCI) -> CompilerInstance& {
+    // Avoid writing fat binary to disk using an in-memory virtual file system.
+    VFS = new llvm::vfs::InMemoryFileSystem();
+    llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
+      std::make_unique<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+    OverlayVFS->pushOverlay(VFS);
+    DCI->createFileManager(OverlayVFS);
+    return *DCI;
+  }(DeviceInstance.get()), Err), PTUs(PTUs),
+      HostCodeGenOpts(HostInstance.getCodeGenOpts()),
+      HostTargetOpts(HostInstance.getTargetOpts()) {
   if (Err)
     return;
   DeviceCI = std::move(DeviceInstance);
-  StringRef Arch = TargetOpts.CPU;
+  StringRef Arch = HostTargetOpts.CPU;
   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
-                                               "Invalid CUDA architecture",
+                                               "Invalid CUDA architecture '" +
+                                               Arch + "'",
                                                llvm::inconvertibleErrorCode()));
     return;
   }
@@ -65,7 +74,7 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
                    llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
                    "", false));
 
-  CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
+  HostCodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
 
   FatbinContent.clear();
 
@@ -83,7 +92,7 @@ llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
                                                std::error_code());
   llvm::TargetOptions TO = llvm::TargetOptions();
   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
-      PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO,
+      PTU.TheModule->getTargetTriple(), HostTargetOpts.CPU, "", TO,
       llvm::Reloc::Model::PIC_);
   PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
 
diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h
index b9a1acab004c..07994ccb40eb 100644
--- a/clang/lib/Interpreter/DeviceOffload.h
+++ b/clang/lib/Interpreter/DeviceOffload.h
@@ -30,7 +30,6 @@ public:
   IncrementalCUDADeviceParser(
       std::unique_ptr<CompilerInstance> DeviceInstance,
       CompilerInstance &HostInstance,
-      llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS,
       llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs);
 
   llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override;
@@ -49,8 +48,8 @@ protected:
   llvm::SmallString<1024> PTXCode;
   llvm::SmallVector<char, 1024> FatbinContent;
   llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS;
-  CodeGenOptions &CodeGenOpts; // Intentionally a reference.
-  const TargetOptions &TargetOpts;
+  CodeGenOptions &HostCodeGenOpts; // Intentionally a reference.
+  const TargetOptions &HostTargetOpts;
 };
 
 } // namespace clang
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 1859c6802c6f..3ca2e21d6f8a 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -450,13 +450,33 @@ const char *const Runtimes = R"(
 )";
 
 llvm::Expected<std::unique_ptr<Interpreter>>
-Interpreter::create(std::unique_ptr<CompilerInstance> CI) {
+Interpreter::create(std::unique_ptr<CompilerInstance> CI,
+                    std::unique_ptr<CompilerInstance> DeviceCI/*=nullptr*/) {
   llvm::Error Err = llvm::Error::success();
+
   auto Interp =
       std::unique_ptr<Interpreter>(new Interpreter(std::move(CI), Err));
+  CompilerInstance &HostCI = *(Interp->getCompilerInstance());
   if (Err)
     return std::move(Err);
 
+  if (DeviceCI) {
+    Interp->DeviceAct = std::make_unique<IncrementalAction>(*DeviceCI, *Interp->TSCtx->getContext(), Err,
+                                                            *Interp);
+
+    if (Err)
+      return std::move(Err);
+
+    DeviceCI->ExecuteAction(*Interp->DeviceAct);
+
+    auto DeviceParser = new IncrementalCUDADeviceParser(std::move(DeviceCI),
+                                                        HostCI, Err, Interp->PTUs);
+    if (Err)
+      return std::move(Err);
+
+    Interp->DeviceParser.reset(DeviceParser);
+  }
+
   // Add runtime code and set a marker to hide it from user code. Undo will not
   // go through that.
   auto PTU = Interp->Parse(Runtimes);
@@ -471,29 +491,7 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI) {
 llvm::Expected<std::unique_ptr<Interpreter>>
 Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
                             std::unique_ptr<CompilerInstance> DCI) {
-  // avoid writing fat binary to disk using an in-memory virtual file system
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
-      std::make_unique<llvm::vfs::InMemoryFileSystem>();
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
-      std::make_unique<llvm::vfs::OverlayFileSystem>(
-          llvm::vfs::getRealFileSystem());
-  OverlayVFS->pushOverlay(IMVFS);
-  CI->createFileManager(OverlayVFS);
-
-  auto Interp = Interpreter::create(std::move(CI));
-  if (auto E = Interp.takeError())
-    return std::move(E);
-
-  llvm::Error Err = llvm::Error::success();
-  auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
-      std::move(DCI), *(*Interp)->getCompilerInstance(), IMVFS, Err,
-      (*Interp)->PTUs);
-  if (Err)
-    return std::move(Err);
-
-  (*Interp)->DeviceParser = std::move(DeviceParser);
-
-  return Interp;
+  return Interpreter::create(std::move(CI), std::move(DCI));
 }
 
 const CompilerInstance *Interpreter::getCompilerInstance() const {
 ```
 
 but shows:
 
 ```
 ./bin/clang-repl  --cuda
module flag identifiers must be unique (or of 'require' type)
!"wchar_size"
module flag identifiers must be unique (or of 'require' type)
!"nvvm-reflect-ftz"
module flag identifiers must be unique (or of 'require' type)
!"frame-pointer"
fatal error: error in backend: Broken module found, compilation aborted!
```

https://github.com/llvm/llvm-project/pull/136404