[clang] [clang-repl] : Fix clang-repl crash with --cuda flag (PR #136404)
Anutosh Bhat via cfe-commits
cfe-commits at lists.llvm.org
Thu Apr 24 00:18:10 PDT 2025
https://github.com/anutosh491 updated https://github.com/llvm/llvm-project/pull/136404
>From 1b18e96882590825075b8f8e5094fdcb5225d349 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Fri, 18 Apr 2025 18:45:00 +0530
Subject: [PATCH 1/5] Fix cuda flag with clang-repl
---
clang/include/clang/Interpreter/Interpreter.h | 3 +-
clang/lib/Interpreter/DeviceOffload.cpp | 3 +-
clang/lib/Interpreter/Interpreter.cpp | 48 +++++++++++++------
3 files changed, 37 insertions(+), 17 deletions(-)
diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index b1b63aedf86ab..7425797c55297 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -129,7 +129,8 @@ class Interpreter {
public:
virtual ~Interpreter();
static llvm::Expected<std::unique_ptr<Interpreter>>
- create(std::unique_ptr<CompilerInstance> CI);
+ create(std::unique_ptr<CompilerInstance> CI,
+ std::unique_ptr<CompilerInstance> DeviceCI = nullptr);
static llvm::Expected<std::unique_ptr<Interpreter>>
createWithCUDA(std::unique_ptr<CompilerInstance> CI,
std::unique_ptr<CompilerInstance> DCI);
diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 1999d63d1aa04..9a7be006250a0 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -34,14 +34,15 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
TargetOpts(HostInstance.getTargetOpts()) {
if (Err)
return;
- DeviceCI = std::move(DeviceInstance);
StringRef Arch = TargetOpts.CPU;
if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
+ DeviceInstance.release();
Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
"Invalid CUDA architecture",
llvm::inconvertibleErrorCode()));
return;
}
+ DeviceCI = std::move(DeviceInstance);
}
llvm::Expected<TranslationUnitDecl *>
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index f8c8d0a425659..049cc00cd198f 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -451,13 +451,44 @@ const char *const Runtimes = R"(
)";
llvm::Expected<std::unique_ptr<Interpreter>>
-Interpreter::create(std::unique_ptr<CompilerInstance> CI) {
+Interpreter::create(std::unique_ptr<CompilerInstance> CI,
+ std::unique_ptr<CompilerInstance> DeviceCI) {
llvm::Error Err = llvm::Error::success();
auto Interp =
std::unique_ptr<Interpreter>(new Interpreter(std::move(CI), Err));
if (Err)
return std::move(Err);
+ if (DeviceCI) {
+ // auto DeviceLLVMCtx = std::make_unique<llvm::LLVMContext>();
+ // auto DeviceTSCtx =
+ // std::make_unique<llvm::orc::ThreadSafeContext>(std::move(DeviceLLVMCtx));
+
+ // llvm::Error DeviceErr = llvm::Error::success();
+ // llvm::ErrorAsOutParameter EAO(&DeviceErr);
+
+ // auto DeviceAct = std::make_unique<IncrementalAction>(
+ // *DeviceCI, *DeviceTSCtx->getContext(), DeviceErr, *Interp);
+
+ // if (DeviceErr)
+ // return std::move(DeviceErr);
+
+ // DeviceCI->ExecuteAction(*DeviceAct);
+ DeviceCI->ExecuteAction(*Interp->Act);
+
+ llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
+ std::make_unique<llvm::vfs::InMemoryFileSystem>();
+
+ auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
+ std::move(DeviceCI), *Interp->getCompilerInstance(), IMVFS, Err,
+ Interp->PTUs);
+
+ if (Err)
+ return std::move(Err);
+
+ Interp->DeviceParser = std::move(DeviceParser);
+ }
+
// Add runtime code and set a marker to hide it from user code. Undo will not
// go through that.
auto PTU = Interp->Parse(Runtimes);
@@ -481,20 +512,7 @@ Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
OverlayVFS->pushOverlay(IMVFS);
CI->createFileManager(OverlayVFS);
- auto Interp = Interpreter::create(std::move(CI));
- if (auto E = Interp.takeError())
- return std::move(E);
-
- llvm::Error Err = llvm::Error::success();
- auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
- std::move(DCI), *(*Interp)->getCompilerInstance(), IMVFS, Err,
- (*Interp)->PTUs);
- if (Err)
- return std::move(Err);
-
- (*Interp)->DeviceParser = std::move(DeviceParser);
-
- return Interp;
+ return Interpreter::create(std::move(CI), std::move(DCI));
}
const CompilerInstance *Interpreter::getCompilerInstance() const {
>From 35fb15bafb670ee704b1dcb208875bef239a6199 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Sat, 19 Apr 2025 14:12:19 +0530
Subject: [PATCH 2/5] Use Act instead of DeviceAct
---
clang/lib/Interpreter/Interpreter.cpp | 14 --------------
1 file changed, 14 deletions(-)
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 049cc00cd198f..2d1d6847e358b 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -460,20 +460,6 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
return std::move(Err);
if (DeviceCI) {
- // auto DeviceLLVMCtx = std::make_unique<llvm::LLVMContext>();
- // auto DeviceTSCtx =
- // std::make_unique<llvm::orc::ThreadSafeContext>(std::move(DeviceLLVMCtx));
-
- // llvm::Error DeviceErr = llvm::Error::success();
- // llvm::ErrorAsOutParameter EAO(&DeviceErr);
-
- // auto DeviceAct = std::make_unique<IncrementalAction>(
- // *DeviceCI, *DeviceTSCtx->getContext(), DeviceErr, *Interp);
-
- // if (DeviceErr)
- // return std::move(DeviceErr);
-
- // DeviceCI->ExecuteAction(*DeviceAct);
DeviceCI->ExecuteAction(*Interp->Act);
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
>From ee374eed24eceafe8cb8c3f69c603bc4965ed534 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Tue, 22 Apr 2025 11:22:40 +0530
Subject: [PATCH 3/5] Fix Parsing for DeviceParser
---
clang/lib/Interpreter/DeviceOffload.cpp | 84 +++++++++++++++++++------
clang/lib/Interpreter/DeviceOffload.h | 7 ++-
clang/lib/Interpreter/Interpreter.cpp | 43 ++++++++++++-
3 files changed, 113 insertions(+), 21 deletions(-)
diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 9a7be006250a0..6fe78905a5bec 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -28,10 +28,10 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
std::unique_ptr<CompilerInstance> DeviceInstance,
CompilerInstance &HostInstance,
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
- llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs)
+ llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs)
: IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
- CodeGenOpts(HostInstance.getCodeGenOpts()),
- TargetOpts(HostInstance.getTargetOpts()) {
+ CodeGenOpts(DeviceInstance->getCodeGenOpts()),
+ TargetOpts(DeviceInstance->getTargetOpts()) {
if (Err)
return;
StringRef Arch = TargetOpts.CPU;
@@ -51,37 +51,61 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
if (!PTU)
return PTU.takeError();
- auto PTX = GeneratePTX();
- if (!PTX)
- return PTX.takeError();
+ // auto PTX = GeneratePTX();
+ // if (!PTX)
+ // return PTX.takeError();
- auto Err = GenerateFatbinary();
- if (Err)
- return std::move(Err);
+ // auto Err = GenerateFatbinary();
+ // if (Err)
+ // return std::move(Err);
- std::string FatbinFileName =
- "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
- VFS->addFile(FatbinFileName, 0,
- llvm::MemoryBuffer::getMemBuffer(
- llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
- "", false));
+ // std::string FatbinFileName =
+ // "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
+ // VFS->addFile(FatbinFileName, 0,
+ // llvm::MemoryBuffer::getMemBuffer(
+ // llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
+ // "", false));
- CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
+ // CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
- FatbinContent.clear();
+ // FatbinContent.clear();
return PTU;
}
+PartialTranslationUnit &
+IncrementalCUDADeviceParser::RegisterPTU(TranslationUnitDecl *TU) {
+ llvm::errs() << "[CUDA] RegisterPTU called. TU = " << TU << "\n";
+ PTUs.push_back(PartialTranslationUnit());
+ llvm::errs() << "[CUDA] PTUs size after push: " << PTUs.size() << "\n";
+ PartialTranslationUnit &LastPTU = PTUs.back();
+ LastPTU.TUPart = TU;
+ return LastPTU;
+}
+
llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
+ llvm::errs() << "[CUDA] Generating PTX. PTUs size: " << PTUs.size() << "\n";
+ assert(!PTUs.empty() && "PTUs list is empty during PTX generation!");
auto &PTU = PTUs.back();
std::string Error;
+ if (!PTU.TheModule) {
+ llvm::errs() << "[CUDA] Error: PTU has no associated Module!\n";
+ } else {
+ llvm::errs() << "[CUDA] Module Triple: " << PTU.TheModule->getTargetTriple().str() << "\n";
+ }
+
+ llvm::errs() << ">>> PTU Module Target Triple: " << PTU.TheModule->getTargetTriple().str() << "\n";
+ llvm::errs() << ">>> Using CPU: " << TargetOpts.CPU << "\n";
+
const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
PTU.TheModule->getTargetTriple(), Error);
- if (!Target)
+ if (!Target) {
+ llvm::errs() << ">>> Failed to lookup target: " << Error << "\n";
return llvm::make_error<llvm::StringError>(std::move(Error),
std::error_code());
+ }
+
llvm::TargetOptions TO = llvm::TargetOptions();
llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
PTU.TheModule->getTargetTriple(), TargetOpts.CPU, "", TO,
@@ -173,9 +197,33 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
FatbinContent.append(PTXCode.begin(), PTXCode.end());
+ std::string FatbinFileName =
+ "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
+
+ VFS->addFile(FatbinFileName, 0,
+ llvm::MemoryBuffer::getMemBuffer(
+ llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
+ "", false));
+
+ CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
+
+ FatbinContent.clear();
+
return llvm::Error::success();
}
+// void IncrementalCUDADeviceParser::EmitFatbinaryToVFS(std::string &FatbinFileName) {
+// std::string FatbinFileName = "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
+
+// VFS->addFile(FatbinFileName, 0,
+// llvm::MemoryBuffer::getMemBuffer(
+// llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
+// "", false));
+
+// CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
+// FatbinContent.clear();
+// }
+
IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
} // namespace clang
diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h
index b9a1acab004c3..6da3b65ae72d1 100644
--- a/clang/lib/Interpreter/DeviceOffload.h
+++ b/clang/lib/Interpreter/DeviceOffload.h
@@ -24,14 +24,14 @@ class CodeGenOptions;
class TargetOptions;
class IncrementalCUDADeviceParser : public IncrementalParser {
- const std::list<PartialTranslationUnit> &PTUs;
+ std::list<PartialTranslationUnit> &PTUs;
public:
IncrementalCUDADeviceParser(
std::unique_ptr<CompilerInstance> DeviceInstance,
CompilerInstance &HostInstance,
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS,
- llvm::Error &Err, const std::list<PartialTranslationUnit> &PTUs);
+ llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs);
llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override;
@@ -41,6 +41,9 @@ class IncrementalCUDADeviceParser : public IncrementalParser {
// Generate fatbinary contents in memory
llvm::Error GenerateFatbinary();
+ PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU);
+ // llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override;
+
~IncrementalCUDADeviceParser();
protected:
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 2d1d6847e358b..6b4bafd8afc32 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -561,9 +561,50 @@ Interpreter::Parse(llvm::StringRef Code) {
// If we have a device parser, parse it first. The generated code will be
// included in the host compilation
if (DeviceParser) {
+ llvm::errs() << "[CUDA] Parsing device code...\n";
llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code);
- if (auto E = DeviceTU.takeError())
+ if (auto E = DeviceTU.takeError()) {
+ llvm::errs() << "[CUDA] Device Parse failed!\n";
return std::move(E);
+ }
+ llvm::errs() << "[CUDA] Device parse successful.\n";
+
+ auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get());
+ llvm::errs() << "[CUDA] Registering device PTU...\n";
+
+ PartialTranslationUnit &DevicePTU = CudaParser->RegisterPTU(*DeviceTU);
+ FrontendAction *WrappedAct = Act->getWrapped();
+ if (!WrappedAct->hasIRSupport()) {
+ llvm::errs() << "[CUDA] Error: WrappedAct has no IR support!\n";
+ return llvm::make_error<llvm::StringError>(
+ "Device action has no IR support", llvm::inconvertibleErrorCode());
+ }
+
+ CodeGenerator *CG = static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();
+ if (!CG) {
+ llvm::errs() << "[CUDA] Error: CodeGen is null!\n";
+ return llvm::make_error<llvm::StringError>(
+ "Device CodeGen is null", llvm::inconvertibleErrorCode());
+ }
+ std::unique_ptr<llvm::Module> M(CG->ReleaseModule());
+ if (!M) {
+ llvm::errs() << "[CUDA] Error: Released module is null!\n";
+ return llvm::make_error<llvm::StringError>(
+ "Device LLVM module is null", llvm::inconvertibleErrorCode());
+ }
+ static unsigned ID = 0;
+ CG->StartModule("incr_module_" + std::to_string(ID++), M->getContext());
+ DevicePTU.TheModule = std::move(M);
+ llvm::errs() << "[CUDA] Assigned LLVM module to DevicePTU\n";
+ llvm::errs() << "[CUDA] Registered device PTU. TUPart=" << DevicePTU.TUPart << "\n";
+ llvm::errs() << "[CUDA] Generating PTX...\n";
+ llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX();
+ if (!PTX)
+ return PTX.takeError();
+
+ llvm::Error Err = CudaParser->GenerateFatbinary();
+ if (Err)
+ return std::move(Err);
}
// Tell the interpreter sliently ignore unused expressions since value
>From 1f4948f12274234e422b50a3e8c52fdb7f3d85e7 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Wed, 23 Apr 2025 12:07:02 +0530
Subject: [PATCH 4/5] fix codegen opts
---
clang/lib/Interpreter/DeviceOffload.cpp | 12 +-----------
clang/lib/Interpreter/DeviceOffload.h | 3 ---
clang/lib/Interpreter/Interpreter.cpp | 26 +------------------------
3 files changed, 2 insertions(+), 39 deletions(-)
diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 6fe78905a5bec..9db598230d014 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -30,7 +30,7 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,
llvm::Error &Err, std::list<PartialTranslationUnit> &PTUs)
: IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS),
- CodeGenOpts(DeviceInstance->getCodeGenOpts()),
+ CodeGenOpts(HostInstance.getCodeGenOpts()),
TargetOpts(DeviceInstance->getTargetOpts()) {
if (Err)
return;
@@ -73,16 +73,6 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
return PTU;
}
-PartialTranslationUnit &
-IncrementalCUDADeviceParser::RegisterPTU(TranslationUnitDecl *TU) {
- llvm::errs() << "[CUDA] RegisterPTU called. TU = " << TU << "\n";
- PTUs.push_back(PartialTranslationUnit());
- llvm::errs() << "[CUDA] PTUs size after push: " << PTUs.size() << "\n";
- PartialTranslationUnit &LastPTU = PTUs.back();
- LastPTU.TUPart = TU;
- return LastPTU;
-}
-
llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
llvm::errs() << "[CUDA] Generating PTX. PTUs size: " << PTUs.size() << "\n";
assert(!PTUs.empty() && "PTUs list is empty during PTX generation!");
diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h
index 6da3b65ae72d1..23d89046c09e1 100644
--- a/clang/lib/Interpreter/DeviceOffload.h
+++ b/clang/lib/Interpreter/DeviceOffload.h
@@ -41,9 +41,6 @@ class IncrementalCUDADeviceParser : public IncrementalParser {
// Generate fatbinary contents in memory
llvm::Error GenerateFatbinary();
- PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU);
- // llvm::Expected<TranslationUnitDecl *> Parse(llvm::StringRef Input) override;
-
~IncrementalCUDADeviceParser();
protected:
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 6b4bafd8afc32..fbc6b7707f294 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -572,32 +572,8 @@ Interpreter::Parse(llvm::StringRef Code) {
auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get());
llvm::errs() << "[CUDA] Registering device PTU...\n";
- PartialTranslationUnit &DevicePTU = CudaParser->RegisterPTU(*DeviceTU);
- FrontendAction *WrappedAct = Act->getWrapped();
- if (!WrappedAct->hasIRSupport()) {
- llvm::errs() << "[CUDA] Error: WrappedAct has no IR support!\n";
- return llvm::make_error<llvm::StringError>(
- "Device action has no IR support", llvm::inconvertibleErrorCode());
- }
+ PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU);
- CodeGenerator *CG = static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();
- if (!CG) {
- llvm::errs() << "[CUDA] Error: CodeGen is null!\n";
- return llvm::make_error<llvm::StringError>(
- "Device CodeGen is null", llvm::inconvertibleErrorCode());
- }
- std::unique_ptr<llvm::Module> M(CG->ReleaseModule());
- if (!M) {
- llvm::errs() << "[CUDA] Error: Released module is null!\n";
- return llvm::make_error<llvm::StringError>(
- "Device LLVM module is null", llvm::inconvertibleErrorCode());
- }
- static unsigned ID = 0;
- CG->StartModule("incr_module_" + std::to_string(ID++), M->getContext());
- DevicePTU.TheModule = std::move(M);
- llvm::errs() << "[CUDA] Assigned LLVM module to DevicePTU\n";
- llvm::errs() << "[CUDA] Registered device PTU. TUPart=" << DevicePTU.TUPart << "\n";
- llvm::errs() << "[CUDA] Generating PTX...\n";
llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX();
if (!PTX)
return PTX.takeError();
>From 9ff990f23148d35b6c3c071b285650a8deed4b86 Mon Sep 17 00:00:00 2001
From: anutosh491 <andersonbhat491 at gmail.com>
Date: Thu, 24 Apr 2025 12:47:53 +0530
Subject: [PATCH 5/5] Fixed cuda flag
---
clang/include/clang/Interpreter/Interpreter.h | 10 ++--
clang/lib/Interpreter/DeviceOffload.cpp | 48 ++----------------
clang/lib/Interpreter/Interpreter.cpp | 50 ++++++++++---------
3 files changed, 37 insertions(+), 71 deletions(-)
diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index 7425797c55297..1b228e0917d02 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -95,6 +95,9 @@ class Interpreter {
// An optional parser for CUDA offloading
std::unique_ptr<IncrementalParser> DeviceParser;
+ // An optional action for CUDA offloading
+ std::unique_ptr<IncrementalAction> DeviceAct;
+
/// List containing information about each incrementally parsed piece of code.
std::list<PartialTranslationUnit> PTUs;
@@ -176,10 +179,11 @@ class Interpreter {
llvm::Expected<Expr *> ExtractValueFromExpr(Expr *E);
llvm::Expected<llvm::orc::ExecutorAddr> CompileDtorCall(CXXRecordDecl *CXXRD);
- CodeGenerator *getCodeGen() const;
- std::unique_ptr<llvm::Module> GenModule();
+ CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const;
+ std::unique_ptr<llvm::Module> GenModule(IncrementalAction *Action = nullptr);
PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU,
- std::unique_ptr<llvm::Module> M = {});
+ std::unique_ptr<llvm::Module> M = {},
+ IncrementalAction *Action = nullptr);
// A cache for the compiled destructors used to for de-allocation of managed
// clang::Values.
diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp
index 9db598230d014..6977d7fa674ab 100644
--- a/clang/lib/Interpreter/DeviceOffload.cpp
+++ b/clang/lib/Interpreter/DeviceOffload.cpp
@@ -51,47 +51,16 @@ IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
if (!PTU)
return PTU.takeError();
- // auto PTX = GeneratePTX();
- // if (!PTX)
- // return PTX.takeError();
-
- // auto Err = GenerateFatbinary();
- // if (Err)
- // return std::move(Err);
-
- // std::string FatbinFileName =
- // "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
- // VFS->addFile(FatbinFileName, 0,
- // llvm::MemoryBuffer::getMemBuffer(
- // llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
- // "", false));
-
- // CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
-
- // FatbinContent.clear();
-
return PTU;
}
llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {
- llvm::errs() << "[CUDA] Generating PTX. PTUs size: " << PTUs.size() << "\n";
- assert(!PTUs.empty() && "PTUs list is empty during PTX generation!");
auto &PTU = PTUs.back();
std::string Error;
- if (!PTU.TheModule) {
- llvm::errs() << "[CUDA] Error: PTU has no associated Module!\n";
- } else {
- llvm::errs() << "[CUDA] Module Triple: " << PTU.TheModule->getTargetTriple().str() << "\n";
- }
-
- llvm::errs() << ">>> PTU Module Target Triple: " << PTU.TheModule->getTargetTriple().str() << "\n";
- llvm::errs() << ">>> Using CPU: " << TargetOpts.CPU << "\n";
-
const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
PTU.TheModule->getTargetTriple(), Error);
if (!Target) {
- llvm::errs() << ">>> Failed to lookup target: " << Error << "\n";
return llvm::make_error<llvm::StringError>(std::move(Error),
std::error_code());
}
@@ -187,8 +156,9 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
FatbinContent.append(PTXCode.begin(), PTXCode.end());
- std::string FatbinFileName =
- "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
+ auto &PTU = PTUs.back();
+
+ std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin";
VFS->addFile(FatbinFileName, 0,
llvm::MemoryBuffer::getMemBuffer(
@@ -202,18 +172,6 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {
return llvm::Error::success();
}
-// void IncrementalCUDADeviceParser::EmitFatbinaryToVFS(std::string &FatbinFileName) {
-// std::string FatbinFileName = "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
-
-// VFS->addFile(FatbinFileName, 0,
-// llvm::MemoryBuffer::getMemBuffer(
-// llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
-// "", false));
-
-// CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName;
-// FatbinContent.clear();
-// }
-
IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}
} // namespace clang
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index fbc6b7707f294..e0865c6e9dfdf 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -459,14 +459,28 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
if (Err)
return std::move(Err);
+ CompilerInstance &HostCI = *(Interp->getCompilerInstance());
+
if (DeviceCI) {
- DeviceCI->ExecuteAction(*Interp->Act);
+ Interp->DeviceAct = std::make_unique<IncrementalAction>(
+ *DeviceCI, *Interp->TSCtx->getContext(), Err, *Interp);
+
+ if (Err)
+ return std::move(Err);
+
+ DeviceCI->ExecuteAction(*Interp->DeviceAct);
+ // avoid writing fat binary to disk using an in-memory virtual file system
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
std::make_unique<llvm::vfs::InMemoryFileSystem>();
+ llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
+ std::make_unique<llvm::vfs::OverlayFileSystem>(
+ llvm::vfs::getRealFileSystem());
+ OverlayVFS->pushOverlay(IMVFS);
+ HostCI.createFileManager(OverlayVFS);
auto DeviceParser = std::make_unique<IncrementalCUDADeviceParser>(
- std::move(DeviceCI), *Interp->getCompilerInstance(), IMVFS, Err,
+ std::move(DeviceCI), HostCI, IMVFS, Err,
Interp->PTUs);
if (Err)
@@ -489,15 +503,6 @@ Interpreter::create(std::unique_ptr<CompilerInstance> CI,
llvm::Expected<std::unique_ptr<Interpreter>>
Interpreter::createWithCUDA(std::unique_ptr<CompilerInstance> CI,
std::unique_ptr<CompilerInstance> DCI) {
- // avoid writing fat binary to disk using an in-memory virtual file system
- llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> IMVFS =
- std::make_unique<llvm::vfs::InMemoryFileSystem>();
- llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayVFS =
- std::make_unique<llvm::vfs::OverlayFileSystem>(
- llvm::vfs::getRealFileSystem());
- OverlayVFS->pushOverlay(IMVFS);
- CI->createFileManager(OverlayVFS);
-
return Interpreter::create(std::move(CI), std::move(DCI));
}
@@ -536,15 +541,16 @@ size_t Interpreter::getEffectivePTUSize() const {
PartialTranslationUnit &
Interpreter::RegisterPTU(TranslationUnitDecl *TU,
- std::unique_ptr<llvm::Module> M /*={}*/) {
+ std::unique_ptr<llvm::Module> M /*={}*/,
+ IncrementalAction *Action) {
PTUs.emplace_back(PartialTranslationUnit());
PartialTranslationUnit &LastPTU = PTUs.back();
LastPTU.TUPart = TU;
if (!M)
- M = GenModule();
+ M = GenModule(Action);
- assert((!getCodeGen() || M) && "Must have a llvm::Module at this point");
+ assert((!getCodeGen(Action) || M) && "Must have a llvm::Module at this point");
LastPTU.TheModule = std::move(M);
LLVM_DEBUG(llvm::dbgs() << "compile-ptu " << PTUs.size() - 1
@@ -561,18 +567,14 @@ Interpreter::Parse(llvm::StringRef Code) {
// If we have a device parser, parse it first. The generated code will be
// included in the host compilation
if (DeviceParser) {
- llvm::errs() << "[CUDA] Parsing device code...\n";
llvm::Expected<TranslationUnitDecl *> DeviceTU = DeviceParser->Parse(Code);
if (auto E = DeviceTU.takeError()) {
- llvm::errs() << "[CUDA] Device Parse failed!\n";
return std::move(E);
}
- llvm::errs() << "[CUDA] Device parse successful.\n";
auto *CudaParser = llvm::cast<IncrementalCUDADeviceParser>(DeviceParser.get());
- llvm::errs() << "[CUDA] Registering device PTU...\n";
- PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU);
+ PartialTranslationUnit &DevicePTU = RegisterPTU(*DeviceTU, nullptr, DeviceAct.get());
llvm::Expected<llvm::StringRef> PTX = CudaParser->GeneratePTX();
if (!PTX)
@@ -757,9 +759,9 @@ llvm::Error Interpreter::LoadDynamicLibrary(const char *name) {
return llvm::Error::success();
}
-std::unique_ptr<llvm::Module> Interpreter::GenModule() {
+std::unique_ptr<llvm::Module> Interpreter::GenModule(IncrementalAction *Action) {
static unsigned ID = 0;
- if (CodeGenerator *CG = getCodeGen()) {
+ if (CodeGenerator *CG = getCodeGen(Action)) {
// Clang's CodeGen is designed to work with a single llvm::Module. In many
// cases for convenience various CodeGen parts have a reference to the
// llvm::Module (TheModule or Module) which does not change when a new
@@ -781,8 +783,10 @@ std::unique_ptr<llvm::Module> Interpreter::GenModule() {
return nullptr;
}
-CodeGenerator *Interpreter::getCodeGen() const {
- FrontendAction *WrappedAct = Act->getWrapped();
+CodeGenerator *Interpreter::getCodeGen(IncrementalAction *Action) const {
+ if (!Action)
+ Action = Act.get();
+ FrontendAction *WrappedAct = Action->getWrapped();
if (!WrappedAct->hasIRSupport())
return nullptr;
return static_cast<CodeGenAction *>(WrappedAct)->getCodeGenerator();
More information about the cfe-commits
mailing list