[clang] [clang-repl] : Fix clang-repl crash with --cuda flag (PR #136404)

Thu Apr 24 00:18:10 PDT 2025

https://github.com/anutosh491 updated https://github.com/llvm/llvm-project/pull/136404

>From 1b18e96882590825075b8f8e5094fdcb5225d349 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Fri, 18 Apr 2025 18:45:00 +0530
Subject: [PATCH 1/5] Fix cuda flag with clang-repl

---
 clang/include/clang/Interpreter/Interpreter.h |  3 +-
 clang/lib/Interpreter/DeviceOffload.cpp       |  3 +-
 clang/lib/Interpreter/Interpreter.cpp         | 48 +++++++++++++------
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index b1b63aedf86ab..7425797c55297 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -129,7 +129,8 @@ class Interpreter {
 public:
   virtual ~Interpreter();
   static llvm::Expected<std::unique_ptr<Interpreter>>
-  create(std::unique_ptr<CompilerInstance> CI);
+  create(std::unique_ptr<CompilerInstance> CI,
+         std::unique_ptr<CompilerInstance> DeviceCI = nullptr);
   static llvm::Expected<std::unique_ptr<Interpreter>>
   createWithCUDA(std::unique_ptr<CompilerInstance> CI,
                  std::unique_ptr<CompilerInstance> DCI);
diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 1999d63d1aa04..9a7be006250a0 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -34,14 +34,15 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
       TargetOpts(HostInstance.getTargetOpts()) {
   if (Err)
     return;
-  DeviceCI = std::move(DeviceInstance);
   StringRef Arch = TargetOpts.CPU;
   if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
+    DeviceInstance.release();
     Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
                                                "Invalid CUDA architecture",
                                                llvm::inconvertibleErrorCode()));
     return;
   }
+  DeviceCI = std::move(DeviceInstance);
 }
 
 llvm::Expected<TranslationUnitDecl *>
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index f8c8d0a425659..049cc00cd198f 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -451,13 +451,44 @@ const char *const Runtimes = R"(
 )";
 
 llvm::Expected<std::unique_ptr<Interpreter>>
-Interpreter::create(std::unique_ptr<CompilerInstance> CI) {
+Interpreter::create(std::unique_ptr<CompilerInstance> CI,
+                    std::unique_ptr<CompilerInstance> DeviceCI) {
   llvm::Error Err = llvm::Error::success();
   auto Interp =
       std::unique_ptr<Interpreter>(new Interpreter(std::move(CI), Err));
   if (Err)
     return std::move(Err);
 
+  if (DeviceCI) {
+    // auto DeviceLLVMCtx = std::make_unique<llvm::LLVMContext>();
+    // auto DeviceTSCtx =
+    //     std::make_unique<llvm::orc::ThreadSafeContext>(std::move(DeviceLLVMCtx));
+
+    // llvm::Error DeviceErr = llvm::Error::success();
+    // llvm::ErrorAsOutParameter EAO(&DeviceErr);
+
+    // auto DeviceAct = std::make_unique<IncrementalAction>(
+    //     *DeviceCI, *DeviceTSCtx->getContext(), DeviceErr, *Interp);
+
+    // if (DeviceErr)
+    //   return std::move(DeviceErr);
+
+    // DeviceCI->ExecuteAction(*DeviceAct);
+    DeviceCI->ExecuteAction(*Interp->Act);
+
+    llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
+        std::make_unique<llvm::vfs::InMemoryFileSystem>();
+
+    auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
+        std::move(DeviceCI), *Interp->getCompilerInstance(), IMVFS, Err,
+        Interp->PTUs);
+
+    if (Err)
+      return std::move(Err);
+
+    Interp->DeviceParser = std::move(DeviceParser);
+  }
+
   // Add runtime code and set a marker to hide it from user code. Undo will not
   // go through that.
   auto PTU = Interp->Parse(Runtimes);
@@ -481,20 +512,7 @@ Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
   OverlayVFS->pushOverlay(IMVFS);
   CI->createFileManager(OverlayVFS);
 
-  auto Interp = Interpreter::create(std::move(CI));
-  if (auto E = Interp.takeError())
-    return std::move(E);
-
-  llvm::Error Err = llvm::Error::success();
-  auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
-      std::move(DCI), *(*Interp)->getCompilerInstance(), IMVFS, Err,
-      (*Interp)->PTUs);
-  if (Err)
-    return std::move(Err);
-
-  (*Interp)->DeviceParser = std::move(DeviceParser);
-
-  return Interp;
+  return Interpreter::create(std::move(CI), std::move(DCI));
 }
 
 const CompilerInstance *Interpreter::getCompilerInstance() const {

>From 35fb15bafb670ee704b1dcb208875bef239a6199 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Sat, 19 Apr 2025 14:12:19 +0530
Subject: [PATCH 2/5] Use Act instead of DeviceAct

---
 clang/lib/Interpreter/Interpreter.cpp | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 049cc00cd198f..2d1d6847e358b 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -460,20 +460,6 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
     return std::move(Err);
 
   if (DeviceCI) {
-    // auto DeviceLLVMCtx = std::make_unique<llvm::LLVMContext>();
-    // auto DeviceTSCtx =
-    //     std::make_unique<llvm::orc::ThreadSafeContext>(std::move(DeviceLLVMCtx));
-
-    // llvm::Error DeviceErr = llvm::Error::success();
-    // llvm::ErrorAsOutParameter EAO(&DeviceErr);
-
-    // auto DeviceAct = std::make_unique<IncrementalAction>(
-    //     *DeviceCI, *DeviceTSCtx->getContext(), DeviceErr, *Interp);
-
-    // if (DeviceErr)
-    //   return std::move(DeviceErr);
-
-    // DeviceCI->ExecuteAction(*DeviceAct);
     DeviceCI->ExecuteAction(*Interp->Act);
 
     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =

>From ee374eed24eceafe8cb8c3f69c603bc4965ed534 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Tue, 22 Apr 2025 11:22:40 +0530
Subject: [PATCH 3/5] Fix Parsing for DeviceParser

---
 clang/lib/Interpreter/DeviceOffload.cpp | 84 +++++++++++++++++++------
 clang/lib/Interpreter/DeviceOffload.h   |  7 ++-
 clang/lib/Interpreter/Interpreter.cpp   | 43 ++++++++++++-
 3 files changed, 113 insertions(+), 21 deletions(-)

diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 9a7be006250a0..6fe78905a5bec 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -28,10 +28,10 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
     std::unique_ptr<CompilerInstance> DeviceInstance,
     CompilerInstance &HostInstance,
     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
-    llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
+    llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs)
     : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
-      CodeGenOpts(HostInstance.getCodeGenOpts()),
-      TargetOpts(HostInstance.getTargetOpts()) {
+      CodeGenOpts(DeviceInstance->getCodeGenOpts()),
+      TargetOpts(DeviceInstance->getTargetOpts()) {
   if (Err)
     return;
   StringRef Arch = TargetOpts.CPU;
@@ -51,37 +51,61 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
   if (!PTU)
     return PTU.takeError();
 
-  auto PTX = GeneratePTX();
-  if (!PTX)
-    return PTX.takeError();
+  // auto PTX = GeneratePTX();
+  // if (!PTX)
+  //   return PTX.takeError();
 
-  auto Err = GenerateFatbinary();
-  if (Err)
-    return std::move(Err);
+  // auto Err = GenerateFatbinary();
+  // if (Err)
+  //   return std::move(Err);
 
-  std::string FatbinFileName =
-      "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
-  VFS->addFile(FatbinFileName, 0,
-               llvm::MemoryBuffer::getMemBuffer(
-                   llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
-                   "", false));
+  // std::string FatbinFileName =
+  //     "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
+  // VFS->addFile(FatbinFileName, 0,
+  //              llvm::MemoryBuffer::getMemBuffer(
+  //                  llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
+  //                  "", false));
 
-  CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
+  // CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
 
-  FatbinContent.clear();
+  // FatbinContent.clear();
 
   return PTU;
 }
 
+PartialTranslationUnit &
+IncrementalCUDADeviceParser::RegisterPTU(TranslationUnitDecl *TU) {
+  llvm::errs() << "[CUDA] RegisterPTU called. TU = " << TU << "\n";
+  PTUs.push_back(PartialTranslationUnit());
+  llvm::errs() << "[CUDA] PTUs size after push: " << PTUs.size() << "\n";
+  PartialTranslationUnit &LastPTU = PTUs.back();
+  LastPTU.TUPart = TU;
+  return LastPTU;
+}
+
 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
+  llvm::errs() << "[CUDA] Generating PTX. PTUs size: " << PTUs.size() << "\n";
+  assert(!PTUs.empty() && "PTUs list is empty during PTX generation!");
   auto &PTU = PTUs.back();
   std::string Error;
 
+  if (!PTU.TheModule) {
+    llvm::errs() << "[CUDA] Error: PTU has no associated Module!\n";
+  } else {
+    llvm::errs() << "[CUDA] Module Triple: " << PTU.TheModule->getTargetTriple().str() << "\n";
+  }
+
+  llvm::errs() << ">>> PTU Module Target Triple: " << PTU.TheModule->getTargetTriple().str() << "\n";
+  llvm::errs() << ">>> Using CPU: " << TargetOpts.CPU << "\n";
+
   const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
       PTU.TheModule->getTargetTriple(), Error);
-  if (!Target)
+  if (!Target) {
+    llvm::errs() << ">>> Failed to lookup target: " << Error << "\n";
     return llvm::make_error<llvm::StringError>(std::move(Error),
                                                std::error_code());
+  }
+
   llvm::TargetOptions TO = llvm::TargetOptions();
   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
       PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO,
@@ -173,9 +197,33 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
 
   FatbinContent.append(PTXCode.begin(), PTXCode.end());
 
+  std::string FatbinFileName =
+      "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
+
+  VFS->addFile(FatbinFileName, 0,
+               llvm::MemoryBuffer::getMemBuffer(
+                   llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
+                   "", false));
+
+  CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
+
+  FatbinContent.clear();
+
   return llvm::Error::success();
 }
 
+// void IncrementalCUDADeviceParser::EmitFatbinaryToVFS(std::string &FatbinFileName) {
+//   std::string FatbinFileName = "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
+
+//   VFS->addFile(FatbinFileName, 0,
+//                llvm::MemoryBuffer::getMemBuffer(
+//                    llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
+//                    "", false));
+
+//   CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
+//   FatbinContent.clear();
+// }
+
 IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
 
 } // namespace clang
diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h
index b9a1acab004c3..6da3b65ae72d1 100644
--- a/clang/lib/Interpreter/DeviceOffload.h
+++ b/clang/lib/Interpreter/DeviceOffload.h
@@ -24,14 +24,14 @@ class CodeGenOptions;
 class TargetOptions;
 
 class IncrementalCUDADeviceParser : public IncrementalParser {
-  const std::list<PartialTranslationUnit> &PTUs;
+  std::list<PartialTranslationUnit> &PTUs;
 
 public:
   IncrementalCUDADeviceParser(
       std::unique_ptr<CompilerInstance> DeviceInstance,
       CompilerInstance &HostInstance,
       llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS,
-      llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs);
+      llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs);
 
   llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override;
 
@@ -41,6 +41,9 @@ class IncrementalCUDADeviceParser : public IncrementalParser {
   // Generate fatbinary contents in memory
   llvm::Error GenerateFatbinary();
 
+  PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU);
+  // llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override;
+
   ~IncrementalCUDADeviceParser();
 
 protected:
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 2d1d6847e358b..6b4bafd8afc32 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -561,9 +561,50 @@ Interpreter::Parse(llvm::StringRef Code) {
   // If we have a device parser, parse it first. The generated code will be
   // included in the host compilation
   if (DeviceParser) {
+    llvm::errs() << "[CUDA] Parsing device code...\n";
     llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code);
-    if (auto E = DeviceTU.takeError())
+    if (auto E = DeviceTU.takeError()) {
+      llvm::errs() << "[CUDA] Device Parse failed!\n";
       return std::move(E);
+    }
+    llvm::errs() << "[CUDA] Device parse successful.\n";
+
+    auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get());
+    llvm::errs() << "[CUDA] Registering device PTU...\n";
+
+    PartialTranslationUnit &DevicePTU = CudaParser->RegisterPTU(*DeviceTU);
+    FrontendAction *WrappedAct = Act->getWrapped();
+    if (!WrappedAct->hasIRSupport()) {
+      llvm::errs() << "[CUDA] Error: WrappedAct has no IR support!\n";
+      return llvm::make_error<llvm::StringError>(
+          "Device action has no IR support", llvm::inconvertibleErrorCode());
+    }
+
+    CodeGenerator *CG = static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();
+    if (!CG) {
+      llvm::errs() << "[CUDA] Error: CodeGen is null!\n";
+      return llvm::make_error<llvm::StringError>(
+          "Device CodeGen is null", llvm::inconvertibleErrorCode());
+    }
+    std::unique_ptr<llvm::Module> M(CG->ReleaseModule());
+    if (!M) {
+      llvm::errs() << "[CUDA] Error: Released module is null!\n";
+      return llvm::make_error<llvm::StringError>(
+          "Device LLVM module is null", llvm::inconvertibleErrorCode());
+    }
+    static unsigned ID = 0;
+    CG->StartModule("incr_module_" + std::to_string(ID++), M->getContext());
+    DevicePTU.TheModule = std::move(M);
+    llvm::errs() << "[CUDA] Assigned LLVM module to DevicePTU\n";
+    llvm::errs() << "[CUDA] Registered device PTU. TUPart=" << DevicePTU.TUPart << "\n";
+    llvm::errs() << "[CUDA] Generating PTX...\n";
+    llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX();
+    if (!PTX)
+      return PTX.takeError();
+
+    llvm::Error Err = CudaParser->GenerateFatbinary();
+    if (Err)
+      return std::move(Err);
   }
 
   // Tell the interpreter sliently ignore unused expressions since value

>From 1f4948f12274234e422b50a3e8c52fdb7f3d85e7 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Wed, 23 Apr 2025 12:07:02 +0530
Subject: [PATCH 4/5] fix codegen opts

---
 clang/lib/Interpreter/DeviceOffload.cpp | 12 +-----------
 clang/lib/Interpreter/DeviceOffload.h   |  3 ---
 clang/lib/Interpreter/Interpreter.cpp   | 26 +------------------------
 3 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 6fe78905a5bec..9db598230d014 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -30,7 +30,7 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
     llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs)
     : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
-      CodeGenOpts(DeviceInstance->getCodeGenOpts()),
+      CodeGenOpts(HostInstance.getCodeGenOpts()),
       TargetOpts(DeviceInstance->getTargetOpts()) {
   if (Err)
     return;
@@ -73,16 +73,6 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
   return PTU;
 }
 
-PartialTranslationUnit &
-IncrementalCUDADeviceParser::RegisterPTU(TranslationUnitDecl *TU) {
-  llvm::errs() << "[CUDA] RegisterPTU called. TU = " << TU << "\n";
-  PTUs.push_back(PartialTranslationUnit());
-  llvm::errs() << "[CUDA] PTUs size after push: " << PTUs.size() << "\n";
-  PartialTranslationUnit &LastPTU = PTUs.back();
-  LastPTU.TUPart = TU;
-  return LastPTU;
-}
-
 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
   llvm::errs() << "[CUDA] Generating PTX. PTUs size: " << PTUs.size() << "\n";
   assert(!PTUs.empty() && "PTUs list is empty during PTX generation!");
diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h
index 6da3b65ae72d1..23d89046c09e1 100644
--- a/clang/lib/Interpreter/DeviceOffload.h
+++ b/clang/lib/Interpreter/DeviceOffload.h
@@ -41,9 +41,6 @@ class IncrementalCUDADeviceParser : public IncrementalParser {
   // Generate fatbinary contents in memory
   llvm::Error GenerateFatbinary();
 
-  PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU);
-  // llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override;
-
   ~IncrementalCUDADeviceParser();
 
 protected:
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 6b4bafd8afc32..fbc6b7707f294 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -572,32 +572,8 @@ Interpreter::Parse(llvm::StringRef Code) {
     auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get());
     llvm::errs() << "[CUDA] Registering device PTU...\n";
 
-    PartialTranslationUnit &DevicePTU = CudaParser->RegisterPTU(*DeviceTU);
-    FrontendAction *WrappedAct = Act->getWrapped();
-    if (!WrappedAct->hasIRSupport()) {
-      llvm::errs() << "[CUDA] Error: WrappedAct has no IR support!\n";
-      return llvm::make_error<llvm::StringError>(
-          "Device action has no IR support", llvm::inconvertibleErrorCode());
-    }
+    PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU);
 
-    CodeGenerator *CG = static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();
-    if (!CG) {
-      llvm::errs() << "[CUDA] Error: CodeGen is null!\n";
-      return llvm::make_error<llvm::StringError>(
-          "Device CodeGen is null", llvm::inconvertibleErrorCode());
-    }
-    std::unique_ptr<llvm::Module> M(CG->ReleaseModule());
-    if (!M) {
-      llvm::errs() << "[CUDA] Error: Released module is null!\n";
-      return llvm::make_error<llvm::StringError>(
-          "Device LLVM module is null", llvm::inconvertibleErrorCode());
-    }
-    static unsigned ID = 0;
-    CG->StartModule("incr_module_" + std::to_string(ID++), M->getContext());
-    DevicePTU.TheModule = std::move(M);
-    llvm::errs() << "[CUDA] Assigned LLVM module to DevicePTU\n";
-    llvm::errs() << "[CUDA] Registered device PTU. TUPart=" << DevicePTU.TUPart << "\n";
-    llvm::errs() << "[CUDA] Generating PTX...\n";
     llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX();
     if (!PTX)
       return PTX.takeError();

>From 9ff990f23148d35b6c3c071b285650a8deed4b86 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Thu, 24 Apr 2025 12:47:53 +0530
Subject: [PATCH 5/5] Fixed cuda flag

---
 clang/include/clang/Interpreter/Interpreter.h | 10 ++--
 clang/lib/Interpreter/DeviceOffload.cpp       | 48 ++----------------
 clang/lib/Interpreter/Interpreter.cpp         | 50 ++++++++++---------
 3 files changed, 37 insertions(+), 71 deletions(-)

diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index 7425797c55297..1b228e0917d02 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -95,6 +95,9 @@ class Interpreter {
   // An optional parser for CUDA offloading
   std::unique_ptr<IncrementalParser> DeviceParser;
 
+  // An optional action for CUDA offloading
+  std::unique_ptr<IncrementalAction> DeviceAct;
+
   /// List containing information about each incrementally parsed piece of code.
   std::list<PartialTranslationUnit> PTUs;
 
@@ -176,10 +179,11 @@ class Interpreter {
   llvm::Expected<Expr *> ExtractValueFromExpr(Expr *E);
   llvm::Expected<llvm::orc::ExecutorAddr> CompileDtorCall(CXXRecordDecl *CXXRD);
 
-  CodeGenerator *getCodeGen() const;
-  std::unique_ptr<llvm::Module> GenModule();
+  CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const;
+  std::unique_ptr<llvm::Module> GenModule(IncrementalAction *Action = nullptr);
   PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU,
-                                      std::unique_ptr<llvm::Module> M = {});
+                                      std::unique_ptr<llvm::Module> M = {},
+                                      IncrementalAction *Action = nullptr);
 
   // A cache for the compiled destructors used to for de-allocation of managed
   // clang::Values.
diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 9db598230d014..6977d7fa674ab 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -51,47 +51,16 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
   if (!PTU)
     return PTU.takeError();
 
-  // auto PTX = GeneratePTX();
-  // if (!PTX)
-  //   return PTX.takeError();
-
-  // auto Err = GenerateFatbinary();
-  // if (Err)
-  //   return std::move(Err);
-
-  // std::string FatbinFileName =
-  //     "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
-  // VFS->addFile(FatbinFileName, 0,
-  //              llvm::MemoryBuffer::getMemBuffer(
-  //                  llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
-  //                  "", false));
-
-  // CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
-
-  // FatbinContent.clear();
-
   return PTU;
 }
 
 llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
-  llvm::errs() << "[CUDA] Generating PTX. PTUs size: " << PTUs.size() << "\n";
-  assert(!PTUs.empty() && "PTUs list is empty during PTX generation!");
   auto &PTU = PTUs.back();
   std::string Error;
 
-  if (!PTU.TheModule) {
-    llvm::errs() << "[CUDA] Error: PTU has no associated Module!\n";
-  } else {
-    llvm::errs() << "[CUDA] Module Triple: " << PTU.TheModule->getTargetTriple().str() << "\n";
-  }
-
-  llvm::errs() << ">>> PTU Module Target Triple: " << PTU.TheModule->getTargetTriple().str() << "\n";
-  llvm::errs() << ">>> Using CPU: " << TargetOpts.CPU << "\n";
-
   const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
       PTU.TheModule->getTargetTriple(), Error);
   if (!Target) {
-    llvm::errs() << ">>> Failed to lookup target: " << Error << "\n";
     return llvm::make_error<llvm::StringError>(std::move(Error),
                                                std::error_code());
   }
@@ -187,8 +156,9 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
 
   FatbinContent.append(PTXCode.begin(), PTXCode.end());
 
-  std::string FatbinFileName =
-      "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
+  auto &PTU = PTUs.back();
+
+  std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin";
 
   VFS->addFile(FatbinFileName, 0,
                llvm::MemoryBuffer::getMemBuffer(
@@ -202,18 +172,6 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
   return llvm::Error::success();
 }
 
-// void IncrementalCUDADeviceParser::EmitFatbinaryToVFS(std::string &FatbinFileName) {
-//   std::string FatbinFileName = "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
-
-//   VFS->addFile(FatbinFileName, 0,
-//                llvm::MemoryBuffer::getMemBuffer(
-//                    llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
-//                    "", false));
-
-//   CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
-//   FatbinContent.clear();
-// }
-
 IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
 
 } // namespace clang
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index fbc6b7707f294..e0865c6e9dfdf 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -459,14 +459,28 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
   if (Err)
     return std::move(Err);
 
+  CompilerInstance &HostCI = *(Interp->getCompilerInstance());
+
   if (DeviceCI) {
-    DeviceCI->ExecuteAction(*Interp->Act);
+    Interp->DeviceAct = std::make_unique<IncrementalAction>(
+        *DeviceCI, *Interp->TSCtx->getContext(), Err, *Interp);
+
+    if (Err)
+      return std::move(Err);
+
+    DeviceCI->ExecuteAction(*Interp->DeviceAct);
 
+    // avoid writing fat binary to disk using an in-memory virtual file system
     llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
         std::make_unique<llvm::vfs::InMemoryFileSystem>();
+    llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
+        std::make_unique<llvm::vfs::OverlayFileSystem>(
+            llvm::vfs::getRealFileSystem());
+    OverlayVFS->pushOverlay(IMVFS);
+    HostCI.createFileManager(OverlayVFS);
 
     auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
-        std::move(DeviceCI), *Interp->getCompilerInstance(), IMVFS, Err,
+        std::move(DeviceCI), HostCI, IMVFS, Err,
         Interp->PTUs);
 
     if (Err)
@@ -489,15 +503,6 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
 llvm::Expected<std::unique_ptr<Interpreter>>
 Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
                             std::unique_ptr<CompilerInstance> DCI) {
-  // avoid writing fat binary to disk using an in-memory virtual file system
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
-      std::make_unique<llvm::vfs::InMemoryFileSystem>();
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
-      std::make_unique<llvm::vfs::OverlayFileSystem>(
-          llvm::vfs::getRealFileSystem());
-  OverlayVFS->pushOverlay(IMVFS);
-  CI->createFileManager(OverlayVFS);
-
   return Interpreter::create(std::move(CI), std::move(DCI));
 }
 
@@ -536,15 +541,16 @@ size_t Interpreter::getEffectivePTUSize() const {
 
 PartialTranslationUnit &
 Interpreter::RegisterPTU(TranslationUnitDecl *TU,
-                         std::unique_ptr<llvm::Module> M /*={}*/) {
+                         std::unique_ptr<llvm::Module> M /*={}*/,
+                         IncrementalAction *Action) {
   PTUs.emplace_back(PartialTranslationUnit());
   PartialTranslationUnit &LastPTU = PTUs.back();
   LastPTU.TUPart = TU;
 
   if (!M)
-    M = GenModule();
+    M = GenModule(Action);
 
-  assert((!getCodeGen() || M) && "Must have a llvm::Module at this point");
+  assert((!getCodeGen(Action) || M) && "Must have a llvm::Module at this point");
 
   LastPTU.TheModule = std::move(M);
   LLVM_DEBUG(llvm::dbgs() << "compile-ptu " << PTUs.size() - 1
@@ -561,18 +567,14 @@ Interpreter::Parse(llvm::StringRef Code) {
   // If we have a device parser, parse it first. The generated code will be
   // included in the host compilation
   if (DeviceParser) {
-    llvm::errs() << "[CUDA] Parsing device code...\n";
     llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code);
     if (auto E = DeviceTU.takeError()) {
-      llvm::errs() << "[CUDA] Device Parse failed!\n";
       return std::move(E);
     }
-    llvm::errs() << "[CUDA] Device parse successful.\n";
 
     auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get());
-    llvm::errs() << "[CUDA] Registering device PTU...\n";
 
-    PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU);
+    PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU, nullptr, DeviceAct.get());
 
     llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX();
     if (!PTX)
@@ -757,9 +759,9 @@ llvm::Error Interpreter::LoadDynamicLibrary(const char *name) {
   return llvm::Error::success();
 }
 
-std::unique_ptr<llvm::Module> Interpreter::GenModule() {
+std::unique_ptr<llvm::Module> Interpreter::GenModule(IncrementalAction *Action) {
   static unsigned ID = 0;
-  if (CodeGenerator *CG = getCodeGen()) {
+  if (CodeGenerator *CG = getCodeGen(Action)) {
     // Clang's CodeGen is designed to work with a single llvm::Module. In many
     // cases for convenience various CodeGen parts have a reference to the
     // llvm::Module (TheModule or Module) which does not change when a new
@@ -781,8 +783,10 @@ std::unique_ptr<llvm::Module> Interpreter::GenModule() {
   return nullptr;
 }
 
-CodeGenerator *Interpreter::getCodeGen() const {
-  FrontendAction *WrappedAct = Act->getWrapped();
+CodeGenerator *Interpreter::getCodeGen(IncrementalAction *Action) const {
+  if (!Action)
+    Action = Act.get();
+  FrontendAction *WrappedAct = Action->getWrapped();
   if (!WrappedAct->hasIRSupport())
     return nullptr;
   return static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();