[clang] [llvm] Thin3 (PR #108614)
Kyungwoo Lee via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 7 18:46:17 PDT 2024
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/108614
>From 1bbb7e5291bb59d95d8b308a90620a4d70e35152 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Fri, 13 Sep 2024 08:51:00 -0700
Subject: [PATCH 1/5] [CGData][ThinLTO] Global Outlining with Two-CodeGen
Rounds
---
llvm/include/llvm/CGData/CodeGenData.h | 16 +++
llvm/lib/CGData/CodeGenData.cpp | 81 +++++++++++++-
llvm/lib/LTO/CMakeLists.txt | 1 +
llvm/lib/LTO/LTO.cpp | 103 +++++++++++++++++-
llvm/lib/LTO/LTOBackend.cpp | 11 ++
.../test/ThinLTO/AArch64/cgdata-two-rounds.ll | 94 ++++++++++++++++
llvm/test/ThinLTO/AArch64/lit.local.cfg | 2 +
7 files changed, 302 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll
create mode 100644 llvm/test/ThinLTO/AArch64/lit.local.cfg
diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index 84133a433170fe..1e1afe99327650 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}
+/// Initialize the two-codegen rounds.
+void initializeTwoCodegenRounds();
+
+/// Save the current module before the first codegen round.
+void saveModuleForTwoRounds(const Module &TheModule, unsigned Task);
+
+/// Load the current module before the second codegen round.
+std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
+ unsigned Task,
+ LLVMContext &Context);
+
+/// Merge the codegen data from the input files in scratch vector in ThinLTO
+/// two-codegen rounds.
+Error mergeCodeGenData(
+ const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles);
+
void warn(Error E, StringRef Whence = "");
void warn(Twine Message, std::string Whence = "", std::string Hint = "");
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index 55d2504231c744..ff8e5dd7c75790 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -17,6 +17,7 @@
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/WithColor.h"
#define DEBUG_TYPE "cg-data"
@@ -30,6 +31,14 @@ cl::opt<bool>
cl::opt<std::string>
CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
cl::desc("File path to where .cgdata file is read"));
+cl::opt<bool> CodeGenDataThinLTOTwoRounds(
+ "codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden,
+ cl::desc("Enable two-round ThinLTO code generation. The first round "
+ "emits codegen data, while the second round uses the emitted "
+ "codegen data for further optimizations."));
+
+// Path to where the optimized bitcodes are saved and restored for ThinLTO.
+static SmallString<128> CodeGenDataThinLTOTwoRoundsPath;
static std::string getCGDataErrString(cgdata_error Err,
const std::string &ErrMsg = "") {
@@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() {
std::call_once(CodeGenData::OnceFlag, []() {
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
- if (CodeGenDataGenerate)
+ if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds)
Instance->EmitCGData = true;
else if (!CodeGenDataUsePath.empty()) {
// Initialize the global CGData if the input file name is given.
@@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) {
}
}
+static std::string getPath(StringRef Dir, unsigned Task) {
+ return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str();
+}
+
+void initializeTwoCodegenRounds() {
+ assert(CodeGenDataThinLTOTwoRounds);
+ if (auto EC = llvm::sys::fs::createUniqueDirectory(
+ "cgdata", CodeGenDataThinLTOTwoRoundsPath))
+ report_fatal_error(Twine("Failed to create directory: ") + EC.message());
+}
+
+void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) {
+ assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
+ std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
+ std::error_code EC;
+ raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
+ if (EC)
+ report_fatal_error(Twine("Failed to open ") + Path +
+ " to save optimized bitcode: " + EC.message());
+ WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true);
+}
+
+std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
+ unsigned Task,
+ LLVMContext &Context) {
+ assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
+ std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
+ auto FileOrError = MemoryBuffer::getFile(Path);
+ if (auto EC = FileOrError.getError())
+ report_fatal_error(Twine("Failed to open ") + Path +
+ " to load optimized bitcode: " + EC.message());
+
+ std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
+ auto RestoredModule = llvm::parseBitcodeFile(*FileBuffer, Context);
+ if (!RestoredModule)
+ report_fatal_error(Twine("Failed to parse optimized bitcode loaded from ") +
+ Path + "\n");
+
+ // Restore the original module identifier.
+ (*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
+ return std::move(*RestoredModule);
+}
+
+Error mergeCodeGenData(
+ const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles) {
+
+ OutlinedHashTreeRecord GlobalOutlineRecord;
+ for (auto &InputFile : *(InputFiles)) {
+ if (InputFile.empty())
+ continue;
+ StringRef File = StringRef(InputFile.data(), InputFile.size());
+ std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
+ File, "in-memory object file", /*RequiresNullTerminator=*/false);
+ Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
+ object::ObjectFile::createObjectFile(Buffer->getMemBufferRef());
+ if (!BinOrErr)
+ return BinOrErr.takeError();
+
+ std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
+ if (auto E = CodeGenDataReader::mergeFromObjectFile(Obj.get(),
+ GlobalOutlineRecord))
+ return E;
+ }
+
+ if (!GlobalOutlineRecord.empty())
+ cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
+
+ return Error::success();
+}
+
} // end namespace cgdata
} // end namespace llvm
diff --git a/llvm/lib/LTO/CMakeLists.txt b/llvm/lib/LTO/CMakeLists.txt
index 69ff08e1f374c4..057d73b6349cf1 100644
--- a/llvm/lib/LTO/CMakeLists.txt
+++ b/llvm/lib/LTO/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMLTO
BinaryFormat
BitReader
BitWriter
+ CGData
CodeGen
CodeGenTypes
Core
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index ccf1139c037353..53387372646b84 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/CGData/CodeGenData.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/AutoUpgrade.h"
@@ -70,6 +71,8 @@ static cl::opt<bool>
DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden,
cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
+extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;
+
namespace llvm {
/// Enable global value internalization in LTO.
cl::opt<bool> EnableLTOInternalization(
@@ -1466,7 +1469,7 @@ class InProcessThinBackend : public ThinBackendProc {
GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
}
- Error runThinLTOBackendThread(
+ virtual Error runThinLTOBackendThread(
AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
@@ -1555,6 +1558,60 @@ class InProcessThinBackend : public ThinBackendProc {
return Error::success();
}
};
+
+/// This Backend will run ThinBackend process but throw away all the output from
+/// the codegen. This class facilitates the first codegen round.
+class NoOutputThinBackend : public InProcessThinBackend {
+public:
+ NoOutputThinBackend(
+ const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ ThreadPoolStrategy ThinLTOParallelism,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch)
+ : InProcessThinBackend(
+ Conf, CombinedIndex, ThinLTOParallelism, ModuleToDefinedGVSummaries,
+ // Allocate a scratch buffer for each task to write output to.
+ [Allocation = &*Scratch](unsigned Task, const Twine &ModuleName) {
+ return std::make_unique<CachedFileStream>(
+ std::make_unique<raw_svector_ostream>((*Allocation)[Task]));
+ },
+ FileCache(), nullptr, false, false),
+ Scratch(std::move(Scratch)) {}
+
+ /// Scratch space for writing output during the codegen.
+ std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch;
+};
+
+/// This Backend performs codegen on bitcode that was previously saved after
+/// going through optimization. This class facilitates the second codegen round.
+class OptimizedBitcodeThinBackend : public InProcessThinBackend {
+public:
+ OptimizedBitcodeThinBackend(
+ const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ ThreadPoolStrategy ThinLTOParallelism,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ AddStreamFn AddStream)
+ : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
+ ModuleToDefinedGVSummaries, AddStream, FileCache(),
+ nullptr, false, false) {}
+
+ virtual Error runThinLTOBackendThread(
+ AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
+ ModuleSummaryIndex &CombinedIndex,
+ const FunctionImporter::ImportMapTy &ImportList,
+ const FunctionImporter::ExportSetTy &ExportList,
+ const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+ const GVSummaryMapTy &DefinedGlobals,
+ MapVector<StringRef, BitcodeModule> &ModuleMap) override {
+ LTOLLVMContext BackendContext(Conf);
+ std::unique_ptr<Module> LoadedModule =
+ cgdata::loadModuleForTwoRounds(BM, Task, BackendContext);
+
+ return thinBackend(Conf, Task, AddStream, *LoadedModule, CombinedIndex,
+ ImportList, DefinedGlobals, &ModuleMap,
+ /*CodeGenOnly=*/true);
+ }
+};
} // end anonymous namespace
ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
@@ -1895,10 +1952,46 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
return BackendProcess->wait();
};
- std::unique_ptr<ThinBackendProc> BackendProc =
- ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
- AddStream, Cache);
- return RunBackends(BackendProc.get());
+ if (!CodeGenDataThinLTOTwoRounds) {
+ std::unique_ptr<ThinBackendProc> BackendProc =
+ ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
+ AddStream, Cache);
+ return RunBackends(BackendProc.get());
+ }
+
+ // Perform two rounds of code generation for ThinLTO:
+ // 1. First round: Run optimization and code generation with a scratch output.
+ // 2. Merge codegen data extracted from the scratch output.
+ // 3. Second round: Run code generation again using the merged data.
+ LLVM_DEBUG(dbgs() << "Running ThinLTO two-codegen rounds\n");
+
+ // Initialize a temporary path to store and retrieve optimized IRs for
+ // two-round code generation.
+ cgdata::initializeTwoCodegenRounds();
+
+ // Create a scratch output to hold intermediate results.
+ auto Outputs =
+ std::make_unique<std::vector<llvm::SmallString<0>>>(getMaxTasks());
+ auto FirstRoundLTO = std::make_unique<NoOutputThinBackend>(
+ Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
+ ModuleToDefinedGVSummaries, std::move(Outputs));
+ // First round: Run optimization and code generation with a scratch output.
+ // Before code generation, serialize modules.
+ if (Error E = RunBackends(FirstRoundLTO.get()))
+ return E;
+
+ // Merge codegen data extracted from the scratch output.
+ if (Error E = cgdata::mergeCodeGenData(std::move(FirstRoundLTO->Scratch)))
+ return E;
+
+ // Second round: Run code generation by reading IRs.
+ std::unique_ptr<ThinBackendProc> SecondRoundLTO =
+ std::make_unique<OptimizedBitcodeThinBackend>(
+ Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
+ ModuleToDefinedGVSummaries, AddStream);
+ Error E = RunBackends(SecondRoundLTO.get());
+
+ return E;
}
Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 06eeed3e1bc41f..3e3b5b316d4125 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/CGData/CodeGenData.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
@@ -74,6 +75,8 @@ static cl::opt<bool> ThinLTOAssumeMerged(
cl::desc("Assume the input has already undergone ThinLTO function "
"importing and the other pre-optimization pipeline changes."));
+extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;
+
namespace llvm {
extern cl::opt<bool> NoPGOWarnMismatch;
}
@@ -599,11 +602,19 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
auto OptimizeAndCodegen =
[&](Module &Mod, TargetMachine *TM,
std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) {
+ // Perform optimization and code generation for ThinLTO.
if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
/*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
CmdArgs))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ // Save the current module before the first codegen round.
+ // Note that the second codegen round runs only `codegen()` without
+ // running `opt()`. We're not reaching here as it's bailed out earlier
+ // with CodeGenOnly which has been set in `OptimizedBitcodeThinBackend`.
+ if (CodeGenDataThinLTOTwoRounds)
+ cgdata::saveModuleForTwoRounds(Mod, Task);
+
codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
};
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll b/llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll
new file mode 100644
index 00000000000000..0e082cf4e55e54
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-two-rounds.ll
@@ -0,0 +1,94 @@
+; This test verifies whether we can outline a singleton instance (i.e., an instance that does not repeat)
+; by running two codegen rounds.
+
+; RUN: split-file %s %t
+
+; Verify each outlining instance is singleton with the global outlining for thinlto.
+; They will be identical, which can be folded by the linker with ICF.
+; RUN: opt -module-summary %t/thin-one.ll -o %t/thin-one.bc
+; RUN: opt -module-summary %t/thin-two.ll -o %t/thin-two.bc
+; RUN: llvm-lto2 run %t/thin-one.bc %t/thin-two.bc -o %t/thinlto \
+; RUN: -r %t/thin-one.bc,_f3,px -r %t/thin-one.bc,_g,x \
+; RUN: -r %t/thin-two.bc,_f1,px -r %t/thin-two.bc,_f2,px -r %t/thin-two.bc,_g,x \
+; RUN: -codegen-data-thinlto-two-rounds
+
+; thin-one.ll will have one outlining instance (matched in the global outlined hash tree)
+; RUN: llvm-objdump -d %t/thinlto.1 | FileCheck %s --check-prefix=THINLTO-1
+; THINLTO-1: _OUTLINED_FUNCTION{{.*}}>:
+; THINLTO-1-NEXT: mov
+; THINLTO-1-NEXT: mov
+; THINLTO-1-NEXT: b
+
+; thin-two.ll will have two outlining instances (matched in the global outlined hash tree)
+; RUN: llvm-objdump -d %t/thinlto.2 | FileCheck %s --check-prefix=THINLTO-2
+; THINLTO-2: _OUTLINED_FUNCTION{{.*}}>:
+; THINLTO-2-NEXT: mov
+; THINLTO-2-NEXT: mov
+; THINLTO-2-NEXT: b
+; THINLTO-2: _OUTLINED_FUNCTION{{.*}}>:
+; THINLTO-2-NEXT: mov
+; THINLTO-2-NEXT: mov
+; THINLTO-2-NEXT: b
+
+; Now add a lto module to the above thinlto modules.
+; Verify the lto module is optimized independent of the global outlining for thinlto.
+; RUN: opt %t/lto.ll -o %t/lto.bc
+; RUN: llvm-lto2 run %t/thin-one.bc %t/thin-two.bc %t/lto.bc -o %t/out \
+; RUN: -r %t/thin-one.bc,_f3,px -r %t/thin-one.bc,_g,x \
+; RUN: -r %t/thin-two.bc,_f1,px -r %t/thin-two.bc,_f2,px -r %t/thin-two.bc,_g,x \
+; RUN: -r %t/lto.bc,_f4,px -r %t/lto.bc,_f5,px -r %t/lto.bc,_f6,px -r %t/lto.bc,_g,x \
+; RUN: -codegen-data-thinlto-two-rounds
+
+; lto.ll will have one outlining instance within the lto module itself (no global outlining).
+; RUN: llvm-objdump -d %t/out.0 | FileCheck %s --check-prefix=LTO-0
+; LTO-0: _OUTLINED_FUNCTION{{.*}}>:
+; LTO-0-NEXT: mov
+; LTO-0-NEXT: b
+
+; thin-one.ll will have one outlining instance (matched in the global outlined hash tree)
+; RUN: llvm-objdump -d %t/out.1 | FileCheck %s --check-prefix=THINLTO-1
+
+; thin-two.ll will have two outlining instances (matched in the global outlined hash tree)
+; RUN: llvm-objdump -d %t/out.2 | FileCheck %s --check-prefix=THINLTO-2
+
+;--- thin-one.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- thin-two.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- lto.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+
+declare i32 @g(i32, i32, i32)
+define i32 @f4() minsize {
+ %1 = call i32 @g(i32 10, i32 30, i32 2);
+ ret i32 %1
+}
+define i32 @f5() minsize {
+ %1 = call i32 @g(i32 20, i32 40, i32 2);
+ ret i32 %1
+}
+define i32 @f6() minsize {
+ %1 = call i32 @g(i32 50, i32 60, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/ThinLTO/AArch64/lit.local.cfg b/llvm/test/ThinLTO/AArch64/lit.local.cfg
new file mode 100644
index 00000000000000..10d4a0e953ed47
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "AArch64" in config.root.targets:
+ config.unsupported = True
>From f668e0e295afbe38f0dc2e240ec6980a8f4ef4e5 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Tue, 17 Sep 2024 18:07:49 -0700
Subject: [PATCH 2/5] Address comments from ellishg
---
llvm/include/llvm/CGData/CodeGenData.h | 7 ++++---
llvm/lib/CGData/CodeGenData.cpp | 4 +++-
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index 1e1afe99327650..72b52e6e9b8fd1 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -164,13 +164,14 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}
-/// Initialize the two-codegen rounds.
void initializeTwoCodegenRounds();
-/// Save the current module before the first codegen round.
+/// Save \p TheModule before the first codegen round.
+/// \p Task represents the partition number in the parallel code generation
+/// process.
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task);
-/// Load the current module before the second codegen round.
+/// Load the optimized module before the second codegen round.
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
unsigned Task,
LLVMContext &Context);
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index ff8e5dd7c75790..58b92b7262957a 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -225,7 +225,9 @@ void warn(Error E, StringRef Whence) {
}
static std::string getPath(StringRef Dir, unsigned Task) {
- return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str();
+ llvm::SmallString<128> Path(Dir);
+ llvm::sys::path::append(Path, llvm::Twine(Task) + ".saved_copy.bc");
+ return std::string(Path);
}
void initializeTwoCodegenRounds() {
>From 87a5941256a69242d663836c158be997b0101576 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Tue, 17 Sep 2024 23:37:51 -0700
Subject: [PATCH 3/5] Address comments from NuriAmari
---
llvm/lib/CGData/CodeGenData.cpp | 4 ++--
llvm/lib/LTO/LTO.cpp | 33 +++++++++++++++++++++------------
llvm/lib/LTO/LTOBackend.cpp | 2 +-
3 files changed, 24 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index 58b92b7262957a..4e21045a67cba6 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -245,7 +245,7 @@ void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) {
if (EC)
report_fatal_error(Twine("Failed to open ") + Path +
" to save optimized bitcode: " + EC.message());
- WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true);
+ WriteBitcodeToFile(TheModule, OS, /*ShouldPreserveUseListOrder=*/true);
}
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
@@ -259,7 +259,7 @@ std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
" to load optimized bitcode: " + EC.message());
std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
- auto RestoredModule = llvm::parseBitcodeFile(*FileBuffer, Context);
+ auto RestoredModule = parseBitcodeFile(*FileBuffer, Context);
if (!RestoredModule)
report_fatal_error(Twine("Failed to parse optimized bitcode loaded from ") +
Path + "\n");
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 53387372646b84..a9fa291db25dfc 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1559,11 +1559,14 @@ class InProcessThinBackend : public ThinBackendProc {
}
};
-/// This Backend will run ThinBackend process but throw away all the output from
-/// the codegen. This class facilitates the first codegen round.
-class NoOutputThinBackend : public InProcessThinBackend {
+/// This backend is utilized in the first round of a two-codegen round process.
+/// It first saves optimized bitcode files to disk before the codegen process
+/// begins. After codegen, it stores the resulting object files in a scratch
+/// buffer. Note the codegen data stored in the scratch buffer will be extracted
+/// and merged in the subsequent step.
+class FirstRoundThinBackend : public InProcessThinBackend {
public:
- NoOutputThinBackend(
+ FirstRoundThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
@@ -1575,25 +1578,31 @@ class NoOutputThinBackend : public InProcessThinBackend {
return std::make_unique<CachedFileStream>(
std::make_unique<raw_svector_ostream>((*Allocation)[Task]));
},
- FileCache(), nullptr, false, false),
+ FileCache(), /*OnWrite=*/nullptr, /*ShouldEmitIndexFiles=*/false,
+ /*ShouldEmitImportsFiles=*/false),
Scratch(std::move(Scratch)) {}
/// Scratch space for writing output during the codegen.
std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch;
};
-/// This Backend performs codegen on bitcode that was previously saved after
-/// going through optimization. This class facilitates the second codegen round.
-class OptimizedBitcodeThinBackend : public InProcessThinBackend {
+/// This backend operates in the second round of a two-codegen round process.
+/// It starts by reading the optimized bitcode files that were saved during the
+/// first round. The backend then executes the codegen only to further optimize
+/// the code, utilizing the codegen data merged from the first round. Finally,
+/// it writes the resulting object files as usual.
+class SecondRoundThinBackend : public InProcessThinBackend {
public:
- OptimizedBitcodeThinBackend(
+ SecondRoundThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream)
: InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
ModuleToDefinedGVSummaries, AddStream, FileCache(),
- nullptr, false, false) {}
+ /*OnWrite=*/nullptr,
+ /*ShouldEmitIndexFiles=*/false,
+ /*ShouldEmitImportsFiles=*/false) {}
virtual Error runThinLTOBackendThread(
AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
@@ -1972,7 +1981,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
// Create a scratch output to hold intermediate results.
auto Outputs =
std::make_unique<std::vector<llvm::SmallString<0>>>(getMaxTasks());
- auto FirstRoundLTO = std::make_unique<NoOutputThinBackend>(
+ auto FirstRoundLTO = std::make_unique<FirstRoundThinBackend>(
Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
ModuleToDefinedGVSummaries, std::move(Outputs));
// First round: Run optimization and code generation with a scratch output.
@@ -1986,7 +1995,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
// Second round: Run code generation by reading IRs.
std::unique_ptr<ThinBackendProc> SecondRoundLTO =
- std::make_unique<OptimizedBitcodeThinBackend>(
+ std::make_unique<SecondRoundThinBackend>(
Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
ModuleToDefinedGVSummaries, AddStream);
Error E = RunBackends(SecondRoundLTO.get());
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 3e3b5b316d4125..b66989fe520b42 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -611,7 +611,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
// Save the current module before the first codegen round.
// Note that the second codegen round runs only `codegen()` without
// running `opt()`. We're not reaching here as it's bailed out earlier
- // with CodeGenOnly which has been set in `OptimizedBitcodeThinBackend`.
+ // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
if (CodeGenDataThinLTOTwoRounds)
cgdata::saveModuleForTwoRounds(Mod, Task);
>From 87da778eb4fcfaf481abcfbc6f52434d938103a5 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Sun, 29 Sep 2024 18:28:15 -0700
Subject: [PATCH 4/5] [NFC] Refactor ThinBackend
- Change it to a type from a function.
- Store the parallelism in the type for the future use.
---
llvm/include/llvm/LTO/LTO.h | 76 ++++++++++++++++++++++++--
llvm/lib/LTO/LTO.cpp | 106 +++++++++++-------------------------
2 files changed, 104 insertions(+), 78 deletions(-)
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 5c47c4df7f6a38..a6b9ede2da54e1 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -26,6 +26,7 @@
#include "llvm/Support/Caching.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/thread.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
@@ -105,7 +106,6 @@ void updateMemProfAttributes(Module &Mod, const ModuleSummaryIndex &Index);
class LTO;
struct SymbolResolution;
-class ThinBackendProc;
/// An input file. This is a symbol table wrapper that only exposes the
/// information that an LTO client should need in order to do symbol resolution.
@@ -194,13 +194,80 @@ class InputFile {
}
};
+using IndexWriteCallback = std::function<void(const std::string &)>;
+
+/// This class defines the interface to the ThinLTO backend.
+class ThinBackendProc {
+protected:
+ const Config &Conf;
+ ModuleSummaryIndex &CombinedIndex;
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries;
+ IndexWriteCallback OnWrite;
+ bool ShouldEmitImportsFiles;
+ DefaultThreadPool BackendThreadPool;
+ std::optional<Error> Err;
+ std::mutex ErrMu;
+
+public:
+ ThinBackendProc(
+ const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles,
+ ThreadPoolStrategy ThinLTOParallelism)
+ : Conf(Conf), CombinedIndex(CombinedIndex),
+ ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries),
+ OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles),
+ BackendThreadPool(ThinLTOParallelism) {}
+
+ virtual ~ThinBackendProc() = default;
+ virtual Error start(
+ unsigned Task, BitcodeModule BM,
+ const FunctionImporter::ImportMapTy &ImportList,
+ const FunctionImporter::ExportSetTy &ExportList,
+ const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+ MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
+ Error wait() {
+ BackendThreadPool.wait();
+ if (Err)
+ return std::move(*Err);
+ return Error::success();
+ }
+ unsigned getThreadCount() { return BackendThreadPool.getMaxConcurrency(); }
+ virtual bool isSensitiveToInputOrder() { return false; }
+
+ // Write sharded indices and (optionally) imports to disk
+ Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
+ llvm::StringRef ModulePath,
+ const std::string &NewModulePath) const;
+};
+
/// A ThinBackend defines what happens after the thin-link phase during ThinLTO.
/// The details of this type definition aren't important; clients can only
/// create a ThinBackend using one of the create*ThinBackend() functions below.
-using ThinBackend = std::function<std::unique_ptr<ThinBackendProc>(
+using ThinBackendFunction = std::function<std::unique_ptr<ThinBackendProc>(
const Config &C, ModuleSummaryIndex &CombinedIndex,
- DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, FileCache Cache)>;
+struct ThinBackend {
+ ThinBackend(ThinBackendFunction Func, ThreadPoolStrategy Parallelism)
+ : Func(std::move(Func)), Parallelism(std::move(Parallelism)) {}
+ ThinBackend() = default;
+
+ std::unique_ptr<ThinBackendProc> operator()(
+ const Config &Conf, ModuleSummaryIndex &CombinedIndex,
+ const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ AddStreamFn AddStream, FileCache Cache) {
+ assert(isValid() && "Invalid backend function");
+ return Func(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
+ std::move(AddStream), std::move(Cache));
+ }
+ ThreadPoolStrategy getParallelism() const { return Parallelism; }
+ bool isValid() const { return static_cast<bool>(Func); }
+
+private:
+ ThinBackendFunction Func = nullptr;
+ ThreadPoolStrategy Parallelism;
+};
/// This ThinBackend runs the individual backend jobs in-process.
/// The default value means to use one job per hardware core (not hyper-thread).
@@ -210,7 +277,6 @@ using ThinBackend = std::function<std::unique_ptr<ThinBackendProc>(
/// to the same path as the input module, with suffix ".thinlto.bc"
/// ShouldEmitImportsFiles is true it also writes a list of imported files to a
/// similar path with ".imports" appended instead.
-using IndexWriteCallback = std::function<void(const std::string &)>;
ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism,
IndexWriteCallback OnWrite = nullptr,
bool ShouldEmitIndexFiles = false,
@@ -276,7 +342,7 @@ class LTO {
/// this constructor.
/// FIXME: We do currently require the DiagHandler field to be set in Conf.
/// Until that is fixed, a Config argument is required.
- LTO(Config Conf, ThinBackend Backend = nullptr,
+ LTO(Config Conf, ThinBackend Backend = {},
unsigned ParallelCodeGenParallelismLevel = 1,
LTOKind LTOMode = LTOK_Default);
~LTO();
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index a9fa291db25dfc..978193815b4d5b 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -581,10 +581,10 @@ LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
CombinedModule->IsNewDbgInfoFormat = UseNewDbgInfoFormat;
}
-LTO::ThinLTOState::ThinLTOState(ThinBackend Backend)
- : Backend(Backend), CombinedIndex(/*HaveGVs*/ false) {
- if (!Backend)
- this->Backend =
+LTO::ThinLTOState::ThinLTOState(ThinBackend BackendParam)
+ : Backend(std::move(BackendParam)), CombinedIndex(/*HaveGVs*/ false) {
+ if (!Backend.isValid())
+ Backend =
createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
}
@@ -1371,75 +1371,33 @@ SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) {
return LibcallSymbols;
}
-/// This class defines the interface to the ThinLTO backend.
-class lto::ThinBackendProc {
-protected:
- const Config &Conf;
- ModuleSummaryIndex &CombinedIndex;
- const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries;
- lto::IndexWriteCallback OnWrite;
- bool ShouldEmitImportsFiles;
- DefaultThreadPool BackendThreadPool;
- std::optional<Error> Err;
- std::mutex ErrMu;
+Error ThinBackendProc::emitFiles(
+ const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
+ const std::string &NewModulePath) const {
+ ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
+ GVSummaryPtrSet DeclarationSummaries;
-public:
- ThinBackendProc(
- const Config &Conf, ModuleSummaryIndex &CombinedIndex,
- const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles,
- ThreadPoolStrategy ThinLTOParallelism)
- : Conf(Conf), CombinedIndex(CombinedIndex),
- ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries),
- OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles),
- BackendThreadPool(ThinLTOParallelism) {}
-
- virtual ~ThinBackendProc() = default;
- virtual Error start(
- unsigned Task, BitcodeModule BM,
- const FunctionImporter::ImportMapTy &ImportList,
- const FunctionImporter::ExportSetTy &ExportList,
- const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
- MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
- Error wait() {
- BackendThreadPool.wait();
- if (Err)
- return std::move(*Err);
- return Error::success();
- }
- unsigned getThreadCount() { return BackendThreadPool.getMaxConcurrency(); }
- virtual bool isSensitiveToInputOrder() { return false; }
-
- // Write sharded indices and (optionally) imports to disk
- Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
- llvm::StringRef ModulePath,
- const std::string &NewModulePath) const {
- ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
- GVSummaryPtrSet DeclarationSummaries;
-
- std::error_code EC;
- gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
- ImportList, ModuleToSummariesForIndex,
- DeclarationSummaries);
-
- raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
- sys::fs::OpenFlags::OF_None);
- if (EC)
- return createFileError("cannot open " + NewModulePath + ".thinlto.bc",
- EC);
-
- writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex,
- &DeclarationSummaries);
-
- if (ShouldEmitImportsFiles) {
- Error ImportFilesError = EmitImportsFiles(
- ModulePath, NewModulePath + ".imports", ModuleToSummariesForIndex);
- if (ImportFilesError)
- return ImportFilesError;
- }
- return Error::success();
+ std::error_code EC;
+ gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
+ ImportList, ModuleToSummariesForIndex,
+ DeclarationSummaries);
+
+ raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
+ sys::fs::OpenFlags::OF_None);
+ if (EC)
+ return createFileError("cannot open " + NewModulePath + ".thinlto.bc", EC);
+
+ writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex,
+ &DeclarationSummaries);
+
+ if (ShouldEmitImportsFiles) {
+ Error ImportFilesError = EmitImportsFiles(
+ ModulePath, NewModulePath + ".imports", ModuleToSummariesForIndex);
+ if (ImportFilesError)
+ return ImportFilesError;
}
-};
+ return Error::success();
+}
namespace {
class InProcessThinBackend : public ThinBackendProc {
@@ -1627,7 +1585,7 @@ ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
lto::IndexWriteCallback OnWrite,
bool ShouldEmitIndexFiles,
bool ShouldEmitImportsFiles) {
- return
+ auto Func =
[=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, FileCache Cache) {
@@ -1636,6 +1594,7 @@ ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
AddStream, Cache, OnWrite, ShouldEmitIndexFiles,
ShouldEmitImportsFiles);
};
+ return ThinBackend(Func, Parallelism);
}
StringLiteral lto::getThinLTODefaultCPU(const Triple &TheTriple) {
@@ -1747,7 +1706,7 @@ ThinBackend lto::createWriteIndexesThinBackend(
std::string NewPrefix, std::string NativeObjectPrefix,
bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile,
IndexWriteCallback OnWrite) {
- return
+ auto Func =
[=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, FileCache Cache) {
@@ -1756,6 +1715,7 @@ ThinBackend lto::createWriteIndexesThinBackend(
OldPrefix, NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles,
LinkedObjectsFile, OnWrite);
};
+ return ThinBackend(Func, Parallelism);
}
Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
>From 0c029762566b3e6236fccadbe3e2800c0eaeca9a Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Sun, 29 Sep 2024 10:38:46 -0700
Subject: [PATCH 5/5] Address comments from teresajohnson
---
clang/lib/CodeGen/BackendUtil.cpp | 9 +-
llvm/include/llvm/CGData/CodeGenData.h | 52 ++++-
llvm/include/llvm/CGData/CodeGenDataReader.h | 5 +-
llvm/include/llvm/LTO/LTO.h | 6 +-
llvm/include/llvm/LTO/LTOBackend.h | 6 +-
llvm/lib/CGData/CMakeLists.txt | 2 +
llvm/lib/CGData/CodeGenData.cpp | 99 +++++----
llvm/lib/CGData/CodeGenDataReader.cpp | 7 +-
llvm/lib/LTO/LTO.cpp | 188 ++++++++++++++----
llvm/lib/LTO/LTOBackend.cpp | 7 +-
.../AArch64/cgdata-two-rounds-caching.ll | 173 ++++++++++++++++
11 files changed, 452 insertions(+), 102 deletions(-)
create mode 100644 llvm/test/ThinLTO/AArch64/cgdata-two-rounds-caching.ll
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index abc936f2c686dd..f018130807519d 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1321,10 +1321,11 @@ static void runThinLTOBackend(
Conf.CGFileType = getCodeGenFileType(Action);
break;
}
- if (Error E = thinBackend(
- Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
- ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
- /* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) {
+ if (Error E =
+ thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
+ ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
+ /*ModuleMap=*/nullptr, Conf.CodeGenOnly,
+ /*IRAddStream=*/nullptr, CGOpts.CmdArgs)) {
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
});
diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h
index 72b52e6e9b8fd1..e8e331f0189ac1 100644
--- a/llvm/include/llvm/CGData/CodeGenData.h
+++ b/llvm/include/llvm/CGData/CodeGenData.h
@@ -15,11 +15,13 @@
#define LLVM_CGDATA_CODEGENDATA_H
#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/StableHashing.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CGData/OutlinedHashTree.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TargetParser/Triple.h"
#include <mutex>
@@ -164,22 +166,60 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}
-void initializeTwoCodegenRounds();
+struct StreamCacheData {
+ /// Backing buffer for serialized data stream.
+ SmallVector<SmallString<0>> Outputs;
+ /// Callback function to add serialized data to the stream.
+ AddStreamFn AddStream;
+ /// Backing buffer for cached data.
+ SmallVector<std::unique_ptr<MemoryBuffer>> Files;
+ /// Cache mechanism for storing and retrieving data.
+ FileCache Cache;
+
+ StreamCacheData(unsigned Size) : Outputs(Size), Files(Size) {}
+ StreamCacheData() = delete;
+
+ /// Retrieve results from either the cache or the stream.
+ SmallVector<StringRef> getResult() {
+ unsigned NumOutputs = Outputs.size();
+ SmallVector<StringRef> Result(NumOutputs);
+ for (unsigned I = 0; I < NumOutputs; ++I)
+ if (Files[I])
+ Result[I] = Files[I]->getBuffer();
+ else
+ Result[I] = Outputs[I];
+ return Result;
+ }
+};
+
+/// Establish additional streams and caches for accessing object and IR files.
+/// \p OrigCache refers to the original cache used for accessing the final
+/// object files, which has already been configured and provided by the linker,
+/// if applicable. This cache will be utilized during the second round of the
+/// run. Additionally, we add two more caches at the same location for the first
+/// round of the run.
+void initializeTwoCodegenRounds(StreamCacheData &CG, StreamCacheData &IR,
+ const FileCache &OrigCache);
/// Save \p TheModule before the first codegen round.
/// \p Task represents the partition number in the parallel code generation
/// process.
-void saveModuleForTwoRounds(const Module &TheModule, unsigned Task);
+/// \p AddStream is the callback used to add the serialized module to the
+/// stream.
+void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
+ AddStreamFn AddStream);
/// Load the optimized module before the second codegen round.
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
unsigned Task,
- LLVMContext &Context);
+ LLVMContext &Context,
+ ArrayRef<StringRef> IRFiles);
/// Merge the codegen data from the input files in scratch vector in ThinLTO
-/// two-codegen rounds.
-Error mergeCodeGenData(
- const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles);
+/// two-codegen rounds. Optionally, \p CombinedHash can be used to compuate
+/// the combined hash of the merged data.
+Error mergeCodeGenData(ArrayRef<StringRef> CGFiles,
+ stable_hash *CombinedHash = nullptr);
void warn(Error E, StringRef Whence = "");
void warn(Twine Message, std::string Whence = "", std::string Hint = "");
diff --git a/llvm/include/llvm/CGData/CodeGenDataReader.h b/llvm/include/llvm/CGData/CodeGenDataReader.h
index 1ee4bfbe480233..7e4882df2116e2 100644
--- a/llvm/include/llvm/CGData/CodeGenDataReader.h
+++ b/llvm/include/llvm/CGData/CodeGenDataReader.h
@@ -54,8 +54,11 @@ class CodeGenDataReader {
/// Extract the cgdata embedded in sections from the given object file and
/// merge them into the GlobalOutlineRecord. This is a static helper that
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
+ /// Optionally, \p CombinedHash can be used to compuate the combined hash of
+ /// the merged data.
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
- OutlinedHashTreeRecord &GlobalOutlineRecord);
+ OutlinedHashTreeRecord &GlobalOutlineRecord,
+ stable_hash *CombinedHash = nullptr);
protected:
/// The outlined hash tree that has been read. When it's released by
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index a6b9ede2da54e1..1a4f984f2f3a99 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -65,7 +65,8 @@ void thinLTOInternalizeAndPromoteInIndex(
isPrevailing);
/// Computes a unique hash for the Module considering the current list of
-/// export/import and other global analysis results.
+/// export/import and other global analysis results. Optionally, \p ExtraID
+/// can be used to add an extra identifier to the hash.
std::string computeLTOCacheKey(
const lto::Config &Conf, const ModuleSummaryIndex &Index,
StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList,
@@ -73,7 +74,8 @@ std::string computeLTOCacheKey(
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
const DenseSet<GlobalValue::GUID> &CfiFunctionDefs = {},
- const DenseSet<GlobalValue::GUID> &CfiFunctionDecls = {});
+ const DenseSet<GlobalValue::GUID> &CfiFunctionDecls = {},
+ StringRef ExtraID = {});
namespace lto {
diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h
index 098c0491dfe70a..2769e58f249053 100644
--- a/llvm/include/llvm/LTO/LTOBackend.h
+++ b/llvm/include/llvm/LTO/LTOBackend.h
@@ -51,13 +51,15 @@ Error backend(const Config &C, AddStreamFn AddStream,
/// are saved in the ModuleMap. If \p ModuleMap is nullptr, module files will
/// be mapped to memory on demand and at any given time during importing, only
/// one source module will be kept open at the most. If \p CodeGenOnly is true,
-/// the backend will skip optimization and only perform code generation.
+/// the backend will skip optimization and only perform code generation. If
+/// \p IRAddStream is not nullptr, it will be called just before code generation
+/// to serialize the optimized IR.
Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
Module &M, const ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, BitcodeModule> *ModuleMap,
- bool CodeGenOnly,
+ bool CodeGenOnly, AddStreamFn IRAddStream = nullptr,
const std::vector<uint8_t> &CmdArgs = std::vector<uint8_t>());
Error finalizeOptimizationRemarks(
diff --git a/llvm/lib/CGData/CMakeLists.txt b/llvm/lib/CGData/CMakeLists.txt
index ff1aab920e7a8c..157b0dfb7f9fcf 100644
--- a/llvm/lib/CGData/CMakeLists.txt
+++ b/llvm/lib/CGData/CMakeLists.txt
@@ -12,6 +12,8 @@ add_llvm_component_library(LLVMCGData
intrinsics_gen
LINK_COMPONENTS
+ BitReader
+ BitWriter
Core
Support
Object
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index 4e21045a67cba6..460f01aa3b1e98 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -15,6 +15,7 @@
#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
@@ -37,9 +38,6 @@ cl::opt<bool> CodeGenDataThinLTOTwoRounds(
"emits codegen data, while the second round uses the emitted "
"codegen data for further optimizations."));
-// Path to where the optimized bitcodes are saved and restored for ThinLTO.
-static SmallString<128> CodeGenDataThinLTOTwoRoundsPath;
-
static std::string getCGDataErrString(cgdata_error Err,
const std::string &ErrMsg = "") {
std::string Msg;
@@ -224,59 +222,78 @@ void warn(Error E, StringRef Whence) {
}
}
-static std::string getPath(StringRef Dir, unsigned Task) {
- llvm::SmallString<128> Path(Dir);
- llvm::sys::path::append(Path, llvm::Twine(Task) + ".saved_copy.bc");
- return std::string(Path);
-}
-
-void initializeTwoCodegenRounds() {
+void initializeTwoCodegenRounds(StreamCacheData &CG, StreamCacheData &IR,
+ const FileCache &OrigCache) {
assert(CodeGenDataThinLTOTwoRounds);
- if (auto EC = llvm::sys::fs::createUniqueDirectory(
- "cgdata", CodeGenDataThinLTOTwoRoundsPath))
- report_fatal_error(Twine("Failed to create directory: ") + EC.message());
+ CG.AddStream = [&](size_t Task, const Twine &ModuleName) {
+ return std::make_unique<CachedFileStream>(
+ std::make_unique<raw_svector_ostream>(CG.Outputs[Task]));
+ };
+ IR.AddStream = [&](size_t Task, const Twine &ModuleName) {
+ return std::make_unique<CachedFileStream>(
+ std::make_unique<raw_svector_ostream>(IR.Outputs[Task]));
+ };
+
+ if (OrigCache.isValid()) {
+ auto CGCacheOrErr =
+ localCache("ThinLTO", "CG", OrigCache.getCacheDirectoryPath(),
+ [&](size_t Task, const Twine &ModuleName,
+ std::unique_ptr<MemoryBuffer> MB) {
+ CG.Files[Task] = std::move(MB);
+ });
+ if (Error Err = CGCacheOrErr.takeError())
+ report_fatal_error(std::move(Err));
+ CG.Cache = std::move(*CGCacheOrErr);
+ auto IRCacheOrErr =
+ localCache("ThinLTO", "IR", OrigCache.getCacheDirectoryPath(),
+ [&](size_t Task, const Twine &NoduleName,
+ std::unique_ptr<MemoryBuffer> MB) {
+ IR.Files[Task] = std::move(MB);
+ });
+ if (Error Err = IRCacheOrErr.takeError())
+ report_fatal_error(std::move(Err));
+ IR.Cache = std::move(*IRCacheOrErr);
+ }
}
-void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) {
- assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
- std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
- std::error_code EC;
- raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
- if (EC)
- report_fatal_error(Twine("Failed to open ") + Path +
- " to save optimized bitcode: " + EC.message());
- WriteBitcodeToFile(TheModule, OS, /*ShouldPreserveUseListOrder=*/true);
+void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
+ AddStreamFn AddStream) {
+ LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier()
+ << " in Task " << Task << "\n");
+ Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
+ AddStream(Task, TheModule.getModuleIdentifier());
+ if (Error Err = StreamOrErr.takeError())
+ report_fatal_error(std::move(Err));
+ std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
+
+ WriteBitcodeToFile(TheModule, *Stream->OS,
+ /*ShouldPreserveUseListOrder=*/true);
}
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
unsigned Task,
- LLVMContext &Context) {
- assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
- std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
- auto FileOrError = MemoryBuffer::getFile(Path);
- if (auto EC = FileOrError.getError())
- report_fatal_error(Twine("Failed to open ") + Path +
- " to load optimized bitcode: " + EC.message());
-
- std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
+ LLVMContext &Context,
+ ArrayRef<StringRef> IRFiles) {
+ LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier()
+ << " in Task " << Task << "\n");
+ std::unique_ptr<MemoryBuffer> FileBuffer = MemoryBuffer::getMemBuffer(
+ IRFiles[Task], "in-memory IR file", /*RequiresNullTerminator=*/false);
auto RestoredModule = parseBitcodeFile(*FileBuffer, Context);
if (!RestoredModule)
- report_fatal_error(Twine("Failed to parse optimized bitcode loaded from ") +
- Path + "\n");
+ report_fatal_error(
+ Twine("Failed to parse optimized bitcode loaded for Task: ") +
+ Twine(Task) + "\n");
// Restore the original module identifier.
(*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
return std::move(*RestoredModule);
}
-Error mergeCodeGenData(
- const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles) {
-
+Error mergeCodeGenData(ArrayRef<StringRef> CGFiles, stable_hash *CombinedHash) {
OutlinedHashTreeRecord GlobalOutlineRecord;
- for (auto &InputFile : *(InputFiles)) {
- if (InputFile.empty())
+ for (auto File : CGFiles) {
+ if (File.empty())
continue;
- StringRef File = StringRef(InputFile.data(), InputFile.size());
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
File, "in-memory object file", /*RequiresNullTerminator=*/false);
Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
@@ -285,8 +302,8 @@ Error mergeCodeGenData(
return BinOrErr.takeError();
std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
- if (auto E = CodeGenDataReader::mergeFromObjectFile(Obj.get(),
- GlobalOutlineRecord))
+ if (auto E = CodeGenDataReader::mergeFromObjectFile(
+ Obj.get(), GlobalOutlineRecord, CombinedHash))
return E;
}
diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp
index f7f3a8f42af7e1..2f2481ea60f822 100644
--- a/llvm/lib/CGData/CodeGenDataReader.cpp
+++ b/llvm/lib/CGData/CodeGenDataReader.cpp
@@ -31,8 +31,8 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
}
Error CodeGenDataReader::mergeFromObjectFile(
- const object::ObjectFile *Obj,
- OutlinedHashTreeRecord &GlobalOutlineRecord) {
+ const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
+ stable_hash *CombinedHash) {
Triple TT = Obj->makeTriple();
auto CGOutLineName =
getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
@@ -48,6 +48,9 @@ Error CodeGenDataReader::mergeFromObjectFile(
auto *EndData = Data + ContentsOrErr->size();
if (*NameOrErr == CGOutLineName) {
+ if (CombinedHash)
+ *CombinedHash =
+ stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr));
// In case dealing with an executable that has concatenated cgdata,
// we want to merge them into a single cgdata.
// Although it's not a typical workflow, we support this scenario.
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 978193815b4d5b..f90d0096f4ea0f 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -13,6 +13,7 @@
#include "llvm/LTO/LTO.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StableHashing.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -36,6 +37,7 @@
#include "llvm/Linker/IRMover.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
@@ -101,7 +103,7 @@ std::string llvm::computeLTOCacheKey(
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
const DenseSet<GlobalValue::GUID> &CfiFunctionDefs,
- const DenseSet<GlobalValue::GUID> &CfiFunctionDecls) {
+ const DenseSet<GlobalValue::GUID> &CfiFunctionDecls, StringRef ExtraID) {
// Compute the unique hash for this entry.
// This is based on the current compiler version, the module itself, the
// export list, the hash for every single module in the import list, the
@@ -341,6 +343,9 @@ std::string llvm::computeLTOCacheKey(
}
}
+ if (!ExtraID.empty())
+ AddString(ExtraID);
+
return toHex(Hasher.result());
}
@@ -1401,6 +1406,7 @@ Error ThinBackendProc::emitFiles(
namespace {
class InProcessThinBackend : public ThinBackendProc {
+protected:
AddStreamFn AddStream;
FileCache Cache;
DenseSet<GlobalValue::GUID> CfiFunctionDefs;
@@ -1523,25 +1529,89 @@ class InProcessThinBackend : public ThinBackendProc {
/// buffer. Note the codegen data stored in the scratch buffer will be extracted
/// and merged in the subsequent step.
class FirstRoundThinBackend : public InProcessThinBackend {
+ AddStreamFn IRAddStream;
+ FileCache IRCache;
+
public:
FirstRoundThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch)
- : InProcessThinBackend(
- Conf, CombinedIndex, ThinLTOParallelism, ModuleToDefinedGVSummaries,
- // Allocate a scratch buffer for each task to write output to.
- [Allocation = &*Scratch](unsigned Task, const Twine &ModuleName) {
- return std::make_unique<CachedFileStream>(
- std::make_unique<raw_svector_ostream>((*Allocation)[Task]));
- },
- FileCache(), /*OnWrite=*/nullptr, /*ShouldEmitIndexFiles=*/false,
- /*ShouldEmitImportsFiles=*/false),
- Scratch(std::move(Scratch)) {}
-
- /// Scratch space for writing output during the codegen.
- std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch;
+ AddStreamFn CGAddStream, FileCache CGCache, AddStreamFn IRAddStream,
+ FileCache IRCache)
+ : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
+ ModuleToDefinedGVSummaries, std::move(CGAddStream),
+ std::move(CGCache), /*OnWrite=*/nullptr,
+ /*ShouldEmitIndexFiles=*/false,
+ /*ShouldEmitImportsFiles=*/false),
+ IRAddStream(std::move(IRAddStream)), IRCache(std::move(IRCache)) {}
+
+ Error runThinLTOBackendThread(
+ AddStreamFn CGAddStream, FileCache CGCache, unsigned Task,
+ BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
+ const FunctionImporter::ImportMapTy &ImportList,
+ const FunctionImporter::ExportSetTy &ExportList,
+ const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+ const GVSummaryMapTy &DefinedGlobals,
+ MapVector<StringRef, BitcodeModule> &ModuleMap) override {
+ auto RunThinBackend = [&](AddStreamFn CGAddStream,
+ AddStreamFn IRAddStream) {
+ LTOLLVMContext BackendContext(Conf);
+ Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext);
+ if (!MOrErr)
+ return MOrErr.takeError();
+
+ return thinBackend(Conf, Task, CGAddStream, **MOrErr, CombinedIndex,
+ ImportList, DefinedGlobals, &ModuleMap,
+ Conf.CodeGenOnly, IRAddStream);
+ };
+
+ auto ModuleID = BM.getModuleIdentifier();
+
+ if (ShouldEmitIndexFiles) {
+ if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str()))
+ return E;
+ }
+
+ assert((CGCache.isValid() == IRCache.isValid()) &&
+ "Both caches for CG and IR should have matching availability");
+ if (!CGCache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) ||
+ all_of(CombinedIndex.getModuleHash(ModuleID),
+ [](uint32_t V) { return V == 0; }))
+ // Cache disabled or no entry for this module in the combined index or
+ // no module hash.
+ return RunThinBackend(CGAddStream, IRAddStream);
+
+ // Get CGKey for caching object in CGCache.
+ std::string CGKey = computeLTOCacheKey(
+ Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
+ DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
+ Expected<AddStreamFn> CacheCGAddStreamOrErr =
+ CGCache(Task, CGKey, ModuleID);
+ if (Error Err = CacheCGAddStreamOrErr.takeError())
+ return Err;
+ AddStreamFn &CacheCGAddStream = *CacheCGAddStreamOrErr;
+
+ // Get IRKey for caching (optimized) IR in IRCache.
+ std::string IRKey = computeLTOCacheKey(
+ Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
+ DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls, /*ExtraID=*/"IR");
+ Expected<AddStreamFn> CacheIRAddStreamOrErr =
+ IRCache(Task, IRKey, ModuleID);
+ if (Error Err = CacheIRAddStreamOrErr.takeError())
+ return Err;
+ AddStreamFn &CacheIRAddStream = *CacheIRAddStreamOrErr;
+
+ assert((CacheCGAddStream == nullptr) == (CacheIRAddStream == nullptr) &&
+ "Both CG and IR caching should be matched");
+ if (CacheIRAddStream) {
+ LLVM_DEBUG(dbgs() << "[FirstRound] Cache Miss for "
+ << BM.getModuleIdentifier() << "\n");
+ return RunThinBackend(CacheCGAddStream, CacheIRAddStream);
+ }
+
+ return Error::success();
+ }
};
/// This backend operates in the second round of a two-codegen round process.
@@ -1550,17 +1620,23 @@ class FirstRoundThinBackend : public InProcessThinBackend {
/// the code, utilizing the codegen data merged from the first round. Finally,
/// it writes the resulting object files as usual.
class SecondRoundThinBackend : public InProcessThinBackend {
+ ArrayRef<StringRef> IRFiles;
+ stable_hash CombinedCGDataHash;
+
public:
SecondRoundThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream)
+ AddStreamFn AddStream, FileCache CGCache, ArrayRef<StringRef> IRFiles,
+ stable_hash CombinedCGDataHash)
: InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
- ModuleToDefinedGVSummaries, AddStream, FileCache(),
+ ModuleToDefinedGVSummaries, AddStream,
+ std::move(CGCache),
/*OnWrite=*/nullptr,
/*ShouldEmitIndexFiles=*/false,
- /*ShouldEmitImportsFiles=*/false) {}
+ /*ShouldEmitImportsFiles=*/false),
+ IRFiles(IRFiles), CombinedCGDataHash(CombinedCGDataHash) {}
virtual Error runThinLTOBackendThread(
AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
@@ -1570,13 +1646,42 @@ class SecondRoundThinBackend : public InProcessThinBackend {
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
- LTOLLVMContext BackendContext(Conf);
- std::unique_ptr<Module> LoadedModule =
- cgdata::loadModuleForTwoRounds(BM, Task, BackendContext);
+ auto RunThinBackend = [&](AddStreamFn AddStream) {
+ LTOLLVMContext BackendContext(Conf);
+ std::unique_ptr<Module> LoadedModule =
+ cgdata::loadModuleForTwoRounds(BM, Task, BackendContext, IRFiles);
- return thinBackend(Conf, Task, AddStream, *LoadedModule, CombinedIndex,
- ImportList, DefinedGlobals, &ModuleMap,
- /*CodeGenOnly=*/true);
+ return thinBackend(Conf, Task, AddStream, *LoadedModule, CombinedIndex,
+ ImportList, DefinedGlobals, &ModuleMap,
+ /*CodeGenOnly=*/true);
+ };
+
+ auto ModuleID = BM.getModuleIdentifier();
+ if (!Cache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) ||
+ all_of(CombinedIndex.getModuleHash(ModuleID),
+ [](uint32_t V) { return V == 0; }))
+ // Cache disabled or no entry for this module in the combined index or
+ // no module hash.
+ return RunThinBackend(AddStream);
+
+ // Get Key for caching the final object file in Cache with the combined
+ // CGData hash.
+ std::string Key = computeLTOCacheKey(
+ Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
+ DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls,
+ /*ExtraID=*/std::to_string(CombinedCGDataHash));
+ Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID);
+ if (Error Err = CacheAddStreamOrErr.takeError())
+ return Err;
+ AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
+
+ if (CacheAddStream) {
+ LLVM_DEBUG(dbgs() << "[SecondRound] Cache Miss for "
+ << BM.getModuleIdentifier() << "\n");
+ return RunThinBackend(CacheAddStream);
+ }
+
+ return Error::success();
}
};
} // end anonymous namespace
@@ -1932,32 +2037,33 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
// 1. First round: Run optimization and code generation with a scratch output.
// 2. Merge codegen data extracted from the scratch output.
// 3. Second round: Run code generation again using the merged data.
- LLVM_DEBUG(dbgs() << "Running ThinLTO two-codegen rounds\n");
+ LLVM_DEBUG(dbgs() << "[TwoRounds] Initializing ThinLTO two-codegen rounds\n");
- // Initialize a temporary path to store and retrieve optimized IRs for
- // two-round code generation.
- cgdata::initializeTwoCodegenRounds();
+ unsigned MaxTasks = getMaxTasks();
+ auto Parallelism = ThinLTO.Backend.getParallelism();
+ cgdata::StreamCacheData CG(MaxTasks), IR(MaxTasks);
+ cgdata::initializeTwoCodegenRounds(CG, IR, Cache);
- // Create a scratch output to hold intermediate results.
- auto Outputs =
- std::make_unique<std::vector<llvm::SmallString<0>>>(getMaxTasks());
- auto FirstRoundLTO = std::make_unique<FirstRoundThinBackend>(
- Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
- ModuleToDefinedGVSummaries, std::move(Outputs));
// First round: Run optimization and code generation with a scratch output.
- // Before code generation, serialize modules.
+ // Before code generation, serialize the optimized IR modules.
+ LLVM_DEBUG(dbgs() << "[TwoRounds] Running the first round of codegen\n");
+ auto FirstRoundLTO = std::make_unique<FirstRoundThinBackend>(
+ Conf, ThinLTO.CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
+ CG.AddStream, CG.Cache, IR.AddStream, IR.Cache);
if (Error E = RunBackends(FirstRoundLTO.get()))
return E;
- // Merge codegen data extracted from the scratch output.
- if (Error E = cgdata::mergeCodeGenData(std::move(FirstRoundLTO->Scratch)))
+ LLVM_DEBUG(dbgs() << "[TwoRounds] Merging codegen data\n");
+ stable_hash CombinedHash = 0;
+ if (Error E = cgdata::mergeCodeGenData(CG.getResult(), &CombinedHash))
return E;
+ LLVM_DEBUG(dbgs() << "[TwoRounds] CGData hash: " << CombinedHash << "\n");
// Second round: Run code generation by reading IRs.
- std::unique_ptr<ThinBackendProc> SecondRoundLTO =
- std::make_unique<SecondRoundThinBackend>(
- Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
- ModuleToDefinedGVSummaries, AddStream);
+ LLVM_DEBUG(dbgs() << "[TwoRounds] Running the second round of codegen\n");
+ auto SecondRoundLTO = std::make_unique<SecondRoundThinBackend>(
+ Conf, ThinLTO.CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
+ AddStream, Cache, IR.getResult(), CombinedHash);
Error E = RunBackends(SecondRoundLTO.get());
return E;
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index b66989fe520b42..fd2e9c9169514c 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -568,7 +568,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, BitcodeModule> *ModuleMap,
- bool CodeGenOnly, const std::vector<uint8_t> &CmdArgs) {
+ bool CodeGenOnly, AddStreamFn IRAddStream,
+ const std::vector<uint8_t> &CmdArgs) {
Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
if (!TOrErr)
return TOrErr.takeError();
@@ -612,8 +613,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
// Note that the second codegen round runs only `codegen()` without
// running `opt()`. We're not reaching here as it's bailed out earlier
// with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
- if (CodeGenDataThinLTOTwoRounds)
- cgdata::saveModuleForTwoRounds(Mod, Task);
+ if (IRAddStream)
+ cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream);
codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
diff --git a/llvm/test/ThinLTO/AArch64/cgdata-two-rounds-caching.ll b/llvm/test/ThinLTO/AArch64/cgdata-two-rounds-caching.ll
new file mode 100644
index 00000000000000..61131ad6d3887f
--- /dev/null
+++ b/llvm/test/ThinLTO/AArch64/cgdata-two-rounds-caching.ll
@@ -0,0 +1,173 @@
+; This test verifies whether we can outline a singleton instance (i.e., an instance that does not repeat)
+; by running two codegen rounds.
+; This test also verifies if caches for the two-round codegens are correctly working.
+
+; REQUIRES: asserts
+; RUN: rm -rf %t
+; RUN: split-file %s %t
+
+; 0. Base case without a cache.
+; Verify each outlining instance is singleton with the global outlining for thinlto.
+; They will be identical, which can be folded by the linker with ICF.
+; RUN: opt -module-hash -module-summary %t/thin-one.ll -o %t/thin-one.bc
+; RUN: opt -module-hash -module-summary %t/thin-two.ll -o %t/thin-two.bc
+; RUN: llvm-lto2 run %t/thin-one.bc %t/thin-two.bc -o %t/thinlto \
+; RUN: -r %t/thin-one.bc,_f3,px -r %t/thin-one.bc,_g,x \
+; RUN: -r %t/thin-two.bc,_f1,px -r %t/thin-two.bc,_f2,px -r %t/thin-two.bc,_g,x \
+; RUN: -codegen-data-thinlto-two-rounds
+
+; thin-one.ll will have one outlining instance (matched in the global outlined hash tree)
+; RUN: llvm-objdump -d %t/thinlto.1 | FileCheck %s --check-prefix=THINLTO-1
+; THINLTO-1: _OUTLINED_FUNCTION{{.*}}>:
+; THINLTO-1-NEXT: mov
+; THINLTO-1-NEXT: mov
+; THINLTO-1-NEXT: b
+
+; thin-two.ll will have two outlining instances (matched in the global outlined hash tree)
+; RUN: llvm-objdump -d %t/thinlto.2 | FileCheck %s --check-prefix=THINLTO-2
+; THINLTO-2: _OUTLINED_FUNCTION{{.*}}>:
+; THINLTO-2-NEXT: mov
+; THINLTO-2-NEXT: mov
+; THINLTO-2-NEXT: b
+; THINLTO-2: _OUTLINED_FUNCTION{{.*}}>:
+; THINLTO-2-NEXT: mov
+; THINLTO-2-NEXT: mov
+; THINLTO-2-NEXT: b
+
+; 1. Run this with a cache for the first time.
+; RUN: rm -rf %t.cache
+; RUN: llvm-lto2 run %t/thin-one.bc %t/thin-two.bc -o %t/thinlto-cold \
+; RUN: -r %t/thin-one.bc,_f3,px -r %t/thin-one.bc,_g,x \
+; RUN: -r %t/thin-two.bc,_f1,px -r %t/thin-two.bc,_f2,px -r %t/thin-two.bc,_g,x \
+; RUN: -codegen-data-thinlto-two-rounds -cache-dir %t.cache -debug-only=lto -thinlto-threads 1 > %t.log-cold.txt 2>&1
+; RUN: cat %t.log-cold.txt | FileCheck %s --check-prefix=COLD
+; diff %t/thinlto.1 %t/thinlto-cold.1
+; diff %t/thinlto.2 %t/thinlto-cold.2
+
+; COLD: [FirstRound] Cache Miss for {{.*}}thin-one.bc
+; COLD: [FirstRound] Cache Miss for {{.*}}thin-two.bc
+; COLD: [SecondRound] Cache Miss for {{.*}}thin-one.bc
+; COLD: [SecondRound] Cache Miss for {{.*}}thin-two.bc
+
+; 2. Without any changes, simply re-running it will hit the cache.
+; RUN: llvm-lto2 run %t/thin-one.bc %t/thin-two.bc -o %t/thinlto-warm \
+; RUN: -r %t/thin-one.bc,_f3,px -r %t/thin-one.bc,_g,x \
+; RUN: -r %t/thin-two.bc,_f1,px -r %t/thin-two.bc,_f2,px -r %t/thin-two.bc,_g,x \
+; RUN: -codegen-data-thinlto-two-rounds -cache-dir %t.cache -debug-only=lto -thinlto-threads 1 > %t.log-warm.txt 2>&1
+; RUN: cat %t.log-warm.txt | FileCheck %s --check-prefix=WARM
+; diff %t/thinlto.1 %t/thinlto-warm.1
+; diff %t/thinlto.2 %t/thinlto-warm.2
+
+; WARM-NOT: Cache Miss
+
+; 3. Assume thin-one.ll is modified to mimic thin-one-modified.ll
+; The merged CG data remains unchanged as this modification does not affect the hash tree built from thin-two.bc.
+; Therefore, both the first and second round runs update only this module.
+; RUN: opt -module-hash -module-summary %t/thin-one-modified.ll -o %t/thin-one.bc
+; RUN: llvm-lto2 run %t/thin-one.bc %t/thin-two.bc -o %t/thinlto-warm-modified \
+; RUN: -r %t/thin-one.bc,_f3,px -r %t/thin-one.bc,_g,x \
+; RUN: -r %t/thin-two.bc,_f1,px -r %t/thin-two.bc,_f2,px -r %t/thin-two.bc,_g,x \
+; RUN: -codegen-data-thinlto-two-rounds -cache-dir %t.cache -debug-only=lto -thinlto-threads 1 > %t.log-warm-modified.txt 2>&1
+; RUN: cat %t.log-warm-modified.txt | FileCheck %s --check-prefix=WARM-MODIFIED
+; diff %t/thinlto.1 %t/thinlto-warm-modified.1
+; diff %t/thinlto.2 %t/thinlto-warm-modified.2
+
+; WARM-MODIFIED: [FirstRound] Cache Miss for {{.*}}thin-one.bc
+; WARM-MODIFIED-NOT: [FirstRound] Cache Miss for {{.*}}thin-two.bc
+; WARM-MODIFIED: [SecondRound] Cache Miss for {{.*}}thin-one.bc
+; WARM-MODIFIED-NOT: [SecondRound] Cache Miss for {{.*}}thin-two.bc
+
+; 4. Additionally, thin-two.ll is modified to mimic thin-two-modified.ll.
+; In this case, the merged CG data, which is global, is updated.
+; Although the first round run updates only the thin-two.ll module, the second round run
+; will update all modules, resulting in different binaries.
+; RUN: opt -module-hash -module-summary %t/thin-one-modified.ll -o %t/thin-one.bc
+; RUN: opt -module-hash -module-summary %t/thin-two-modified.ll -o %t/thin-two.bc
+; RUN: llvm-lto2 run %t/thin-one.bc %t/thin-two.bc -o %t/thinlto-warm-modified-all \
+; RUN: -r %t/thin-one.bc,_f3,px -r %t/thin-one.bc,_g,x \
+; RUN: -r %t/thin-two.bc,_f1,px -r %t/thin-two.bc,_f2,px -r %t/thin-two.bc,_g,x \
+; RUN: -codegen-data-thinlto-two-rounds -cache-dir %t.cache -debug-only=lto -thinlto-threads 1 > %t.log-warm-modified-all.txt 2>&1
+; RUN: cat %t.log-warm-modified-all.txt | FileCheck %s --check-prefix=WARM-MODIFIED-ALL
+; RUN: not diff %t/thinlto.1 %t/thinlto-warm-modified-all.1
+; RUN: not diff %t/thinlto.2 %t/thinlto-warm-modified-all.2
+
+; WARM-MODIFIED-ALL-NOT: [FirstRound] Cache Miss for {{.*}}thin-one.bc
+; WARM-MODIFIED-ALL: [FirstRound] Cache Miss for {{.*}}thin-two.bc
+; WARM-MODIFIED-ALL: [SecondRound] Cache Miss for {{.*}}thin-one.bc
+; WARM-MODIFIED-ALL: [SecondRound] Cache Miss for {{.*}}thin-two.bc
+
+; thin-one-modified.ll won't be outlined.
+; RUN: llvm-objdump -d %t/thinlto-warm-modified-all.1 | FileCheck %s --check-prefix=THINLTO-1-MODIFIED-ALL
+; THINLTO-1-MODIFIED-ALL-NOT: _OUTLINED_FUNCTION{{.*}}>:
+
+; thin-two-modified.ll will have two (longer) outlining instances (matched in the global outlined hash tree)
+; RUN: llvm-objdump -d %t/thinlto-warm-modified-all.2| FileCheck %s --check-prefix=THINLTO-2-MODIFIED-ALL
+; THINLTO-2-MODIFIED-ALL: _OUTLINED_FUNCTION{{.*}}>:
+; THINLTO-2-MODIFIED-ALL: mov
+; THINLTO-2-MODIFIED-ALL: mov
+; THINLTO-2-MODIFIED-ALL: mov
+; THINLTO-2-MODIFIED-ALL: b
+; THINLTO-2-MODIFIED-ALL: _OUTLINED_FUNCTION{{.*}}>:
+; THINLTO-2-MODIFIED-ALL: mov
+; THINLTO-2-MODIFIED-ALL: mov
+; THINLTO-2-MODIFIED-ALL: mov
+; THINLTO-2-MODIFIED-ALL: b
+
+; 5. Re-running it will hit the cache.
+; RUN: llvm-lto2 run %t/thin-one.bc %t/thin-two.bc -o %t/thinlto-warm-again \
+; RUN: -r %t/thin-one.bc,_f3,px -r %t/thin-one.bc,_g,x \
+; RUN: -r %t/thin-two.bc,_f1,px -r %t/thin-two.bc,_f2,px -r %t/thin-two.bc,_g,x \
+; RUN: -codegen-data-thinlto-two-rounds -cache-dir %t.cache -debug-only=lto -thinlto-threads 1 > %t.log-warm-again.txt 2>&1
+; RUN: cat %t.log-warm-again.txt | FileCheck %s --check-prefix=WARM-AGAIN
+; RUN: diff %t/thinlto-warm-modified-all.1 %t/thinlto-warm-again.1
+; RUN: diff %t/thinlto-warm-modified-all.2 %t/thinlto-warm-again.2
+
+; WARM-AGAIN-NOT: Cache Miss
+
+;--- thin-one.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- thin-one-modified.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 31, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- thin-two.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- thin-two-modified.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
More information about the cfe-commits
mailing list