[llvm] [ThinLTO] Allow importing based on a workload definition (PR #74545)
Mircea Trofin via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 12 21:31:57 PST 2023
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/74545
>From 7e256dc28b3aa5f666ea2fa3c14dee2818f014e6 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Tue, 5 Dec 2023 16:58:48 -0800
Subject: [PATCH 1/8] [ThinLTO] Allow importing based on a workload definition
An example of a "workload definition" would be "the transitive closure
of functions actually called to satisfy a RPC request", i.e. a
(typically significantly) smaller subset of the transitive
closure (static + possible indirect call targets) of callees. This means
this workload definition is a type of flat dynamic profile.
Producing one is not in scope - it can be produced offline from traces,
or from sample-based profiles, etc.
This patch adds awareness to ThinLTO of such a concept. A workload is
defined as a root and a list of functions. All function references are
by-name (more readable than GUIDs). In the case of aliases, the
expectation is the list contains all the alternative names.
The workload definitions are presented to the linker as a json file,
containing a dictionary. The keys are the roots, the values are the list
of functions.
The import list for a module defining a root will be the functions
listed for it in the profile.
Using names this way assumes unique names for internal functions, i.e.
clang's `-funique-internal-linkage-names`.
Note that the behavior affects the entire module where a root is defined
(i.e. different workloads best be defined in different modules), and
does not affect modules that don't define roots.
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 233 ++++++++++++++++++++-
llvm/test/ThinLTO/X86/Inputs/workload1.ll | 25 +++
llvm/test/ThinLTO/X86/Inputs/workload2.ll | 22 ++
llvm/test/ThinLTO/X86/Inputs/workload3.ll | 9 +
llvm/test/ThinLTO/X86/workload.ll | 63 ++++++
5 files changed, 342 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/ThinLTO/X86/Inputs/workload1.ll
create mode 100644 llvm/test/ThinLTO/X86/Inputs/workload2.ll
create mode 100644 llvm/test/ThinLTO/X86/Inputs/workload3.ll
create mode 100644 llvm/test/ThinLTO/X86/workload.ll
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 9c546b531dff49..bcf141b42be16f 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/Internalize.h"
@@ -138,6 +139,9 @@ static cl::opt<bool>
ImportAllIndex("import-all-index",
cl::desc("Import all external functions in index."));
+static cl::opt<std::string> WorkloadDefinitions("thinlto-workload-def",
+ cl::Hidden);
+
// Load lazily a module from \p FileName in \p Context.
static std::unique_ptr<Module> loadFile(const std::string &FileName,
LLVMContext &Context) {
@@ -369,14 +373,16 @@ class GlobalsImporter final {
}
};
+static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
+
/// Determine the list of imports and exports for each module.
-class ModuleImportsManager final {
+class ModuleImportsManager {
+protected:
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing;
const ModuleSummaryIndex &Index;
DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
-public:
ModuleImportsManager(
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing,
@@ -384,14 +390,221 @@ class ModuleImportsManager final {
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
: IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
+public:
+ virtual ~ModuleImportsManager() = default;
+
/// Given the list of globals defined in a module, compute the list of imports
/// as well as the list of "exports", i.e. the list of symbols referenced from
/// another module (that may require promotion).
- void computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
- StringRef ModName,
- FunctionImporter::ImportMapTy &ImportList);
+ virtual void
+ computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
+ StringRef ModName,
+ FunctionImporter::ImportMapTy &ImportList);
+
+ static std::unique_ptr<ModuleImportsManager>
+ create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing,
+ const ModuleSummaryIndex &Index,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
+ nullptr);
};
+class WorkloadImportsManager : public ModuleImportsManager {
+ // Keep a module name -> defined value infos association. We use it to
+ // determine if a module's import list should be done by the base
+ // ModuleImportsManager or by us.
+ StringMap<DenseSet<ValueInfo>> Workloads;
+
+ void
+ computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
+ StringRef ModName,
+ FunctionImporter::ImportMapTy &ImportList) override {
+ auto SetIter = Workloads.find(ModName);
+ if (SetIter == Workloads.end()) {
+ LLVM_DEBUG(dbgs() << "[Workload] " << ModName
+ << " does not contain the root of any context.\n");
+ return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
+ ModName, ImportList);
+ }
+ LLVM_DEBUG(dbgs() << "[Workload] " << ModName
+ << " contains the root(s) of context(s).\n");
+
+ GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
+ ExportLists);
+ auto &ValueInfos = SetIter->second;
+ SmallVector<EdgeInfo, 128> GlobWorklist;
+ for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
+ auto Candidates =
+ qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
+
+ const GlobalValueSummary *GVS = nullptr;
+ FunctionImporter::ImportFailureReason LastReason =
+ FunctionImporter::ImportFailureReason::None;
+ for (const auto &Candidate : Candidates) {
+ LastReason = Candidate.first;
+ if (Candidate.first == FunctionImporter::ImportFailureReason::None) {
+ const bool Prevailing = IsPrevailing(VI.getGUID(), Candidate.second);
+ if (Prevailing || !GVS) {
+ if (!GVS && !Prevailing)
+ LLVM_DEBUG(dbgs()
+ << "[Workload] Considering " << VI.name() << " from "
+ << Candidate.second->modulePath() << " with linkage "
+ << Candidate.second->linkage()
+ << " although it's not prevailing, but it's the "
+ "first available candidate.\n");
+ GVS = Candidate.second;
+ if (Prevailing) {
+ LLVM_DEBUG(dbgs()
+ << "[Workload] Considering " << VI.name() << " from "
+ << GVS->modulePath() << " with linkage "
+ << GVS->linkage() << " because it's prevailing.\n");
+ break;
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "[Workload] Skipping " << VI.name() << " from "
+ << Candidate.second->modulePath()
+ << " with linkage " << Candidate.second->linkage()
+ << " because it's not prevailing\n");
+ }
+ }
+ }
+ if (!GVS) {
+ LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
+ << " because can't select Callee. Guid is: "
+ << Function::getGUID(VI.name())
+ << ". The reason was: " << getFailureName(LastReason)
+ << "\n");
+ continue;
+ }
+ const auto *CFS = cast<FunctionSummary>(GVS->getBaseObject());
+ auto ExportingModule = CFS->modulePath();
+ if (ExportingModule == ModName) {
+ LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
+ << " because its defining module is the same as the "
+ "current module\n");
+ continue;
+ }
+ if (!shouldImport(DefinedGVSummaries, VI.getGUID(), CFS)) {
+ LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
+ << " because we have a local copy.\n");
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
+ << ExportingModule << " : "
+ << Function::getGUID(VI.name()) << "\n");
+ ImportList[ExportingModule].insert(VI.getGUID());
+ GVI.onImportingSummary(*GVS);
+ if (ExportLists)
+ (*ExportLists)[ExportingModule].insert(VI);
+ }
+ LLVM_DEBUG(dbgs() << "[Workload] Done\n");
+ }
+
+ bool shouldImport(const GVSummaryMapTy &DefinedGVSummaries,
+ Function::GUID Guid, const GlobalValueSummary *Candidate) {
+ auto DefinedSummary = DefinedGVSummaries.find(Guid);
+ if (DefinedSummary == DefinedGVSummaries.end())
+ return true;
+
+ // See shouldImportGlobal for the justificaton of the isInterposableLinkage.
+ if (!IsPrevailing(Guid, DefinedSummary->second) &&
+ GlobalValue::isInterposableLinkage(DefinedSummary->second->linkage()) &&
+ IsPrevailing(Guid, Candidate)) {
+ LLVM_DEBUG(dbgs() << "[Workload] " << Guid
+ << ": local non-prevailing in module. Importing from "
+ << Candidate->modulePath() << "\n");
+ return true;
+ }
+ LLVM_DEBUG(dbgs() << "[Workload] " << Guid
+ << ": ignored! Target already in destination module.\n");
+ return false;
+ }
+
+public:
+ WorkloadImportsManager(
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing,
+ const ModuleSummaryIndex &Index,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
+ : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
+ StringMap<ValueInfo> CtxGuidToValueInfo;
+ for (auto &I : Index) {
+ ValueInfo VI(Index.haveGVs(), &I);
+ CtxGuidToValueInfo[VI.name()] = VI;
+ }
+ std::error_code EC;
+ auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ report_fatal_error("Failed to open context file");
+ return;
+ }
+ auto Buffer = std::move(BufferOrErr.get());
+ std::map<std::string, std::vector<std::string>> WorkloadDefs;
+ json::Path::Root NullRoot;
+ auto Parsed = json::parse(Buffer->getBuffer());
+ if (!Parsed)
+ report_fatal_error(Parsed.takeError());
+ if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
+ report_fatal_error("Invalid thinlto contextual profile format.");
+ for (const auto &Workload : WorkloadDefs) {
+ const auto &Root = Workload.first;
+ LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
+ const auto &AllCallees = Workload.second;
+ auto RootIt = CtxGuidToValueInfo.find(Root);
+ if (RootIt == CtxGuidToValueInfo.end()) {
+ LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
+ << " not found in this linkage unit.\n");
+ continue;
+ }
+ auto RootVI = RootIt->second;
+ if (RootVI.getSummaryList().size() != 1) {
+ LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
+ << " should have exactly one summary, but has "
+ << RootVI.getSummaryList().size() << ". Skipping.\n");
+ continue;
+ }
+ StringRef RootDefiningModule =
+ RootVI.getSummaryList().front()->modulePath();
+ LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
+ << " is : " << RootDefiningModule << "\n");
+ auto &Set = Workloads[RootDefiningModule];
+ for (const auto &Callee : AllCallees) {
+ LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
+ auto ElemIt = CtxGuidToValueInfo.find(Callee);
+ if (ElemIt == CtxGuidToValueInfo.end()) {
+ LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
+ continue;
+ }
+ Set.insert(ElemIt->second);
+ }
+ LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << " we have "
+ << Set.size() << " distinct callees.\n");
+ LLVM_DEBUG( //
+ for (const auto &VI
+ : Set) {
+ dbgs() << "[Workload] Root: " << Root
+ << " Would include: " << VI.getGUID() << "\n";
+ });
+ }
+ }
+};
+
+std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing,
+ const ModuleSummaryIndex &Index,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
+ if (WorkloadDefinitions.empty()) {
+ LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
+ return std::unique_ptr<ModuleImportsManager>(
+ new ModuleImportsManager(IsPrevailing, Index, ExportLists));
+ }
+ LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
+ return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
+ ExportLists);
+}
+
static const char *
getFailureName(FunctionImporter::ImportFailureReason Reason) {
switch (Reason) {
@@ -732,14 +945,14 @@ void llvm::ComputeCrossModuleImport(
isPrevailing,
DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
- ModuleImportsManager MIS(isPrevailing, Index, &ExportLists);
+ auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
// For each module that has function defined, compute the import/export lists.
for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
auto &ImportList = ImportLists[DefinedGVSummaries.first];
LLVM_DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first << "'\n");
- MIS.computeImportForModule(DefinedGVSummaries.second,
- DefinedGVSummaries.first, ImportList);
+ MIS->computeImportForModule(DefinedGVSummaries.second,
+ DefinedGVSummaries.first, ImportList);
}
// When computing imports we only added the variables and functions being
@@ -855,8 +1068,8 @@ static void ComputeCrossModuleImportForModuleForTest(
// Compute the import list for this module.
LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
- ModuleImportsManager MIS(isPrevailing, Index);
- MIS.computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
+ auto MIS = ModuleImportsManager::create(isPrevailing, Index);
+ MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
#ifndef NDEBUG
dumpImportListForModule(Index, ModulePath, ImportList);
diff --git a/llvm/test/ThinLTO/X86/Inputs/workload1.ll b/llvm/test/ThinLTO/X86/Inputs/workload1.ll
new file mode 100644
index 00000000000000..0037232f9fec3a
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/Inputs/workload1.ll
@@ -0,0 +1,25 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare void @m1_variant()
+
+define dso_local void @m1_f1() {
+ call void @m1_f2()
+ call void @noninterposable_f()
+ ret void
+}
+
+define internal void @m1_f2() {
+ call void @interposable_f()
+ ret void
+}
+
+define linkonce void @interposable_f() {
+ call void @m1_variant()
+ ret void
+}
+
+define linkonce_odr void @noninterposable_f() {
+ call void @m1_variant()
+ ret void
+}
\ No newline at end of file
diff --git a/llvm/test/ThinLTO/X86/Inputs/workload2.ll b/llvm/test/ThinLTO/X86/Inputs/workload2.ll
new file mode 100644
index 00000000000000..cbb51b6b11ad96
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/Inputs/workload2.ll
@@ -0,0 +1,22 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare void @m2_variant()
+
+define dso_local void @m2_f1() {
+ call void @interposable_f()
+ call void @noninterposable_f()
+ ret void
+}
+
+ at m2_f1_alias = alias void (...), ptr @m2_f1
+
+define linkonce_odr void @interposable_f() {
+ call void @m2_variant()
+ ret void
+}
+
+define linkonce_odr void @noninterposable_f() {
+ call void @m2_variant()
+ ret void
+}
\ No newline at end of file
diff --git a/llvm/test/ThinLTO/X86/Inputs/workload3.ll b/llvm/test/ThinLTO/X86/Inputs/workload3.ll
new file mode 100644
index 00000000000000..01b79a4f334fdb
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/Inputs/workload3.ll
@@ -0,0 +1,9 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare void @m1_f1()
+
+define dso_local void @m3_f1() {
+ call void @m1_f1()
+ ret void
+}
diff --git a/llvm/test/ThinLTO/X86/workload.ll b/llvm/test/ThinLTO/X86/workload.ll
new file mode 100644
index 00000000000000..d98ecf630de261
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/workload.ll
@@ -0,0 +1,63 @@
+; RUN: mkdir -p %t_baseline
+; RUN: echo '{"m1_f1":["m2_f1", "m2_f1_alias", "interposable_f", "noninterposable_f"], \
+; RUN: "m2_f1":["m1_f1", "m1_f2"]}' > %t/workload_defs.json
+; RUN: opt -module-summary %S/Inputs/workload1.ll -o %t_baseline/workload1.bc
+; RUN: opt -module-summary %S/Inputs/workload2.ll -o %t_baseline/workload2.bc
+; RUN: opt -module-summary %S/Inputs/workload3.ll -o %t_baseline/workload3.bc
+;
+; Normal run. The first module shouldn't get m2_f1
+;
+; RUN: llvm-lto2 run %t_baseline/workload1.bc %t_baseline/workload2.bc %t_baseline/workload3.bc \
+; RUN: -o %t_baseline/result.o -save-temps \
+; RUN: -r %t_baseline/workload1.bc,m1_f1,plx \
+; RUN: -r %t_baseline/workload1.bc,interposable_f \
+; RUN: -r %t_baseline/workload1.bc,noninterposable_f \
+; RUN: -r %t_baseline/workload1.bc,m1_variant \
+; RUN: -r %t_baseline/workload2.bc,m2_f1,plx \
+; RUN: -r %t_baseline/workload2.bc,m2_f1_alias \
+; RUN: -r %t_baseline/workload2.bc,interposable_f,p \
+; RUN: -r %t_baseline/workload2.bc,noninterposable_f,p \
+; RUN: -r %t_baseline/workload2.bc,m2_variant \
+; RUN: -r %t_baseline/workload3.bc,m1_f1 \
+; RUN: -r %t_baseline/workload3.bc,m3_f1,plx
+; RUN: llvm-dis %t_baseline/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=NOPROF
+;
+; NOPROF-NOT: m2_f1
+;
+; The run with workload definitions - same other options.
+;
+; RUN: llvm-lto2 run %t/workload1.bc %t/workload2.bc %t/workload3.bc \
+; RUN: -thinlto-workload-def=%t/workload_defs.json -o %t/result.o -save-temps \
+; RUN: -r %t/workload1.bc,m1_f1,plx \
+; RUN: -r %t/workload1.bc,interposable_f \
+; RUN: -r %t/workload1.bc,noninterposable_f \
+; RUN: -r %t/workload1.bc,m1_variant \
+; RUN: -r %t/workload2.bc,m2_f1,plx \
+; RUN: -r %t/workload2.bc,m2_f1_alias \
+; RUN: -r %t/workload2.bc,interposable_f,p \
+; RUN: -r %t/workload2.bc,noninterposable_f,p \
+; RUN: -r %t/workload2.bc,m2_variant \
+; RUN: -r %t/workload3.bc,m1_f1 \
+; RUN: -r %t/workload3.bc,m3_f1,plx
+; RUN: llvm-dis %t/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST
+; RUN: llvm-dis %t/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND
+; RUN: llvm-dis %t/result.o.3.3.import.bc -o - | FileCheck %s --check-prefix=THIRD
+;
+; The third module is bitwse-identical to the "normal" run, as the
+; RUN: diff %t_baseline/result.o.3.3.import.bc %t/result.o.3.3.import.bc
+;
+; This time, we expect m1 to have m2_f1 and the m2 variant of interposable_f,
+; while keeping its variant of noninterposable_f
+;
+; FIRST-LABEL: @m1_f1
+; FIRST-LABEL: @m1_f2
+; FIRST-LABEL: define available_externally void @noninterposable_f
+; FIRST-NEXT: call void @m1_variant
+; FIRST-LABEL: @m2_f1
+; FIRST-LABEL: define available_externally void @interposable_f
+; FIRST-NEXT: call void @m2_variant
+; FIRST-NOT: @m2_f1_alias
+; SECOND-LABEL: @m2_f1
+; SECOND-LABEL: @m1_f1
+; SECOND-LABEL: @m1_f2
+; THIRD-LABEL: define available_externally void @m1_f1
\ No newline at end of file
>From 37bc16a0cd984500d689c6dd9f7a4c90332d51a6 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Tue, 5 Dec 2023 22:16:45 -0800
Subject: [PATCH 2/8] Fix test - wasn't `mkdir`-ing the second dir.
---
llvm/test/ThinLTO/X86/workload.ll | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/test/ThinLTO/X86/workload.ll b/llvm/test/ThinLTO/X86/workload.ll
index d98ecf630de261..c30645c8bbd101 100644
--- a/llvm/test/ThinLTO/X86/workload.ll
+++ b/llvm/test/ThinLTO/X86/workload.ll
@@ -26,6 +26,7 @@
;
; The run with workload definitions - same other options.
;
+; RUN: mkdir -p %t
; RUN: llvm-lto2 run %t/workload1.bc %t/workload2.bc %t/workload3.bc \
; RUN: -thinlto-workload-def=%t/workload_defs.json -o %t/result.o -save-temps \
; RUN: -r %t/workload1.bc,m1_f1,plx \
>From f71bf9a80181ca503f93bea95902103566a81919 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Wed, 6 Dec 2023 11:19:11 -0800
Subject: [PATCH 3/8] actually fix dir creation.
---
llvm/test/ThinLTO/X86/workload.ll | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/llvm/test/ThinLTO/X86/workload.ll b/llvm/test/ThinLTO/X86/workload.ll
index c30645c8bbd101..d54a06d06140a5 100644
--- a/llvm/test/ThinLTO/X86/workload.ll
+++ b/llvm/test/ThinLTO/X86/workload.ll
@@ -1,6 +1,4 @@
; RUN: mkdir -p %t_baseline
-; RUN: echo '{"m1_f1":["m2_f1", "m2_f1_alias", "interposable_f", "noninterposable_f"], \
-; RUN: "m2_f1":["m1_f1", "m1_f2"]}' > %t/workload_defs.json
; RUN: opt -module-summary %S/Inputs/workload1.ll -o %t_baseline/workload1.bc
; RUN: opt -module-summary %S/Inputs/workload2.ll -o %t_baseline/workload2.bc
; RUN: opt -module-summary %S/Inputs/workload3.ll -o %t_baseline/workload3.bc
@@ -27,6 +25,11 @@
; The run with workload definitions - same other options.
;
; RUN: mkdir -p %t
+; RUN: opt -module-summary %S/Inputs/workload1.ll -o %t/workload1.bc
+; RUN: opt -module-summary %S/Inputs/workload2.ll -o %t/workload2.bc
+; RUN: opt -module-summary %S/Inputs/workload3.ll -o %t/workload3.bc
+; RUN: echo '{"m1_f1":["m2_f1", "m2_f1_alias", "interposable_f", "noninterposable_f"], \
+; RUN: "m2_f1":["m1_f1", "m1_f2"]}' > %t/workload_defs.json
; RUN: llvm-lto2 run %t/workload1.bc %t/workload2.bc %t/workload3.bc \
; RUN: -thinlto-workload-def=%t/workload_defs.json -o %t/result.o -save-temps \
; RUN: -r %t/workload1.bc,m1_f1,plx \
>From 72cf3dc7fa5503c23fd1685c689fbdc412c7b247 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Sat, 9 Dec 2023 20:48:48 -0800
Subject: [PATCH 4/8] addressed feedback in FunctionImport.cpp
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 93 +++++++++++++++++-----
1 file changed, 71 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index bcf141b42be16f..44268a1826817f 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -139,8 +139,22 @@ static cl::opt<bool>
ImportAllIndex("import-all-index",
cl::desc("Import all external functions in index."));
-static cl::opt<std::string> WorkloadDefinitions("thinlto-workload-def",
- cl::Hidden);
+/// Pass a workload description file - an example of workload would be the
+/// functions executed to satisfy a RPC request. A workload is defined by a root
+/// function and the list of functions that are (frequently) needed to satisfy
+/// it. The module that defines the root will have all those functions imported.
+/// The file contains a JSON dictionary. The keys are root functions, the values
+/// are lists of functions to import in the module defining the root. It is
+/// assumed -funique-internal-linkage-names was used, thus ensuring function
+/// names are unique even for local linkage ones.
+static cl::opt<std::string> WorkloadDefinitions(
+ "thinlto-workload-def",
+ cl::desc("Pass a workload definition. This is a file containing a JSON "
+ "dictionary. The keys are root functions, the values are lists of "
+ "functions to import in the module defining the root. It is "
+ "assumed -funique-internal-linkage-names was used, to ensure "
+ "local linkage functions have unique names."),
+ cl::Hidden);
// Load lazily a module from \p FileName in \p Context.
static std::unique_ptr<Module> loadFile(const std::string &FileName,
@@ -409,8 +423,11 @@ class ModuleImportsManager {
nullptr);
};
+/// A ModuleImportsManager that operates based on a workload definition (see
+/// -thinlto-workload-def). For modules that do not define workload roots, it
+/// applies the base ModuleImportsManager import policy.
class WorkloadImportsManager : public ModuleImportsManager {
- // Keep a module name -> defined value infos association. We use it to
+ // Keep a module name -> value infos to import association. We use it to
// determine if a module's import list should be done by the base
// ModuleImportsManager or by us.
StringMap<DenseSet<ValueInfo>> Workloads;
@@ -441,6 +458,19 @@ class WorkloadImportsManager : public ModuleImportsManager {
FunctionImporter::ImportFailureReason LastReason =
FunctionImporter::ImportFailureReason::None;
for (const auto &Candidate : Candidates) {
+ /// We will prefer importing the prevailing candidate, if not, we'll
+ /// still pick the first available candidate. The reason we want to make
+ /// sure we do import the prevailing candidate is because the goal of
+ /// workload-awareness is to enable optimizations specializing the call
+ /// graph of that workload. Suppose a function is already defined in the
+ /// module, but it's not the prevailing variant. Suppose also we do not
+ /// inline it, but we could specialize it to the workload in other ways.
+ /// However, the linker would drop it in the favor of the prevailing
+ /// copy. Instead, by importing the prevailing variant (assuming also
+ /// the use of `-avail-extern-to-local`), we keep the specialization. We
+ /// could alteranatively make the non-prevailing variant local, but the
+ /// prevailing one is also the one for which we would have previously
+ /// collected profiles, making it preferrable.
LastReason = Candidate.first;
if (Candidate.first == FunctionImporter::ImportFailureReason::None) {
const bool Prevailing = IsPrevailing(VI.getGUID(), Candidate.second);
@@ -470,7 +500,7 @@ class WorkloadImportsManager : public ModuleImportsManager {
}
if (!GVS) {
LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
- << " because can't select Callee. Guid is: "
+ << " because can't find eligible Callee. Guid is: "
<< Function::getGUID(VI.name())
<< ". The reason was: " << getFailureName(LastReason)
<< "\n");
@@ -484,9 +514,10 @@ class WorkloadImportsManager : public ModuleImportsManager {
"current module\n");
continue;
}
- if (!shouldImport(DefinedGVSummaries, VI.getGUID(), CFS)) {
- LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
- << " because we have a local copy.\n");
+ if (moduleAlreadyHasPreferredDef(DefinedGVSummaries, VI.getGUID(), CFS)) {
+ LLVM_DEBUG(
+ dbgs() << "[Workload] Not importing " << VI.name()
+ << " because we have a copy already in this module.\n");
continue;
}
@@ -501,24 +532,26 @@ class WorkloadImportsManager : public ModuleImportsManager {
LLVM_DEBUG(dbgs() << "[Workload] Done\n");
}
- bool shouldImport(const GVSummaryMapTy &DefinedGVSummaries,
- Function::GUID Guid, const GlobalValueSummary *Candidate) {
+ bool moduleAlreadyHasPreferredDef(const GVSummaryMapTy &DefinedGVSummaries,
+ Function::GUID Guid,
+ const FunctionSummary *Candidate) {
auto DefinedSummary = DefinedGVSummaries.find(Guid);
if (DefinedSummary == DefinedGVSummaries.end())
- return true;
+ return false;
// See shouldImportGlobal for the justificaton of the isInterposableLinkage.
if (!IsPrevailing(Guid, DefinedSummary->second) &&
GlobalValue::isInterposableLinkage(DefinedSummary->second->linkage()) &&
IsPrevailing(Guid, Candidate)) {
LLVM_DEBUG(dbgs() << "[Workload] " << Guid
- << ": local non-prevailing in module. Importing from "
+ << ": non-prevailing preexisting definition in module. "
+ "Importing from "
<< Candidate->modulePath() << "\n");
- return true;
+ return false;
}
LLVM_DEBUG(dbgs() << "[Workload] " << Guid
<< ": ignored! Target already in destination module.\n");
- return false;
+ return true;
}
public:
@@ -528,11 +561,23 @@ class WorkloadImportsManager : public ModuleImportsManager {
const ModuleSummaryIndex &Index,
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
: ModuleImportsManager(IsPrevailing, Index, ExportLists) {
- StringMap<ValueInfo> CtxGuidToValueInfo;
+ // Since the workload def uses names, we need a quick lookup
+ // name->ValueInfo.
+ StringMap<ValueInfo> NameToValueInfo;
+ StringSet<> AmbiguousNames;
for (auto &I : Index) {
- ValueInfo VI(Index.haveGVs(), &I);
- CtxGuidToValueInfo[VI.name()] = VI;
+ ValueInfo VI = Index.getValueInfo(I);
+ if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
+ AmbiguousNames.insert(VI.name());
}
+ auto DbgReportIfAmbiguous = [&](StringRef Name) {
+ if (AmbiguousNames.count(Name) > 0)
+ LLVM_DEBUG(
+ dbgs()
+ << "[Workload] Function name " << Name
+ << " present in the workload definition is ambiguous. Consider "
+ "compiling with -funique-internal-linkage-names.");
+ };
std::error_code EC;
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
if (std::error_code EC = BufferOrErr.getError()) {
@@ -542,6 +587,8 @@ class WorkloadImportsManager : public ModuleImportsManager {
auto Buffer = std::move(BufferOrErr.get());
std::map<std::string, std::vector<std::string>> WorkloadDefs;
json::Path::Root NullRoot;
+ // The JSON is supposed to contain a dictionary matching the type of
+ // WorkloadDefs.
auto Parsed = json::parse(Buffer->getBuffer());
if (!Parsed)
report_fatal_error(Parsed.takeError());
@@ -549,10 +596,11 @@ class WorkloadImportsManager : public ModuleImportsManager {
report_fatal_error("Invalid thinlto contextual profile format.");
for (const auto &Workload : WorkloadDefs) {
const auto &Root = Workload.first;
+ DbgReportIfAmbiguous(Root);
LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
const auto &AllCallees = Workload.second;
- auto RootIt = CtxGuidToValueInfo.find(Root);
- if (RootIt == CtxGuidToValueInfo.end()) {
+ auto RootIt = NameToValueInfo.find(Root);
+ if (RootIt == NameToValueInfo.end()) {
LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
<< " not found in this linkage unit.\n");
continue;
@@ -571,16 +619,17 @@ class WorkloadImportsManager : public ModuleImportsManager {
auto &Set = Workloads[RootDefiningModule];
for (const auto &Callee : AllCallees) {
LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
- auto ElemIt = CtxGuidToValueInfo.find(Callee);
- if (ElemIt == CtxGuidToValueInfo.end()) {
+ DbgReportIfAmbiguous(Callee);
+ auto ElemIt = NameToValueInfo.find(Callee);
+ if (ElemIt == NameToValueInfo.end()) {
LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
continue;
}
Set.insert(ElemIt->second);
}
- LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << " we have "
- << Set.size() << " distinct callees.\n");
LLVM_DEBUG( //
+ dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
+ << " distinct callees.\n";
for (const auto &VI
: Set) {
dbgs() << "[Workload] Root: " << Root
>From d090aa1801f3340e64a6b621b1cb4aecaa3002c3 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Mon, 11 Dec 2023 14:06:34 -0800
Subject: [PATCH 5/8] split tests, handling aliases
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 32 ++--
llvm/test/ThinLTO/X86/Inputs/workload1.ll | 25 ---
llvm/test/ThinLTO/X86/Inputs/workload2.ll | 22 ---
llvm/test/ThinLTO/X86/Inputs/workload3.ll | 9 --
llvm/test/ThinLTO/X86/workload.ll | 167 +++++++++++++++------
5 files changed, 135 insertions(+), 120 deletions(-)
delete mode 100644 llvm/test/ThinLTO/X86/Inputs/workload1.ll
delete mode 100644 llvm/test/ThinLTO/X86/Inputs/workload2.ll
delete mode 100644 llvm/test/ThinLTO/X86/Inputs/workload3.ll
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 44268a1826817f..00602ba8ea3c0f 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -464,11 +464,12 @@ class WorkloadImportsManager : public ModuleImportsManager {
/// workload-awareness is to enable optimizations specializing the call
/// graph of that workload. Suppose a function is already defined in the
/// module, but it's not the prevailing variant. Suppose also we do not
- /// inline it, but we could specialize it to the workload in other ways.
- /// However, the linker would drop it in the favor of the prevailing
- /// copy. Instead, by importing the prevailing variant (assuming also
- /// the use of `-avail-extern-to-local`), we keep the specialization. We
- /// could alteranatively make the non-prevailing variant local, but the
+ /// inline it (in fact, if it were interposable, we can't inline it),
+ /// but we could specialize it to the workload in other ways. However,
+ /// the linker would drop it in the favor of the prevailing copy.
+ /// Instead, by importing the prevailing variant (assuming also the use
+ /// of `-avail-extern-to-local`), we keep the specialization. We could
+ /// alteranatively make the non-prevailing variant local, but the
/// prevailing one is also the one for which we would have previously
/// collected profiles, making it preferrable.
LastReason = Candidate.first;
@@ -504,17 +505,17 @@ class WorkloadImportsManager : public ModuleImportsManager {
<< Function::getGUID(VI.name())
<< ". The reason was: " << getFailureName(LastReason)
<< "\n");
+ (void)LastReason; // Only used in LLVM_DEBUG above.
continue;
}
- const auto *CFS = cast<FunctionSummary>(GVS->getBaseObject());
- auto ExportingModule = CFS->modulePath();
+ auto ExportingModule = GVS->modulePath();
if (ExportingModule == ModName) {
LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
<< " because its defining module is the same as the "
"current module\n");
continue;
}
- if (moduleAlreadyHasPreferredDef(DefinedGVSummaries, VI.getGUID(), CFS)) {
+ if (moduleAlreadyHasPreferredDef(DefinedGVSummaries, VI.getGUID(), GVS)) {
LLVM_DEBUG(
dbgs() << "[Workload] Not importing " << VI.name()
<< " because we have a copy already in this module.\n");
@@ -534,7 +535,7 @@ class WorkloadImportsManager : public ModuleImportsManager {
bool moduleAlreadyHasPreferredDef(const GVSummaryMapTy &DefinedGVSummaries,
Function::GUID Guid,
- const FunctionSummary *Candidate) {
+ const GlobalValueSummary *Candidate) {
auto DefinedSummary = DefinedGVSummaries.find(Guid);
if (DefinedSummary == DefinedGVSummaries.end())
return false;
@@ -568,15 +569,14 @@ class WorkloadImportsManager : public ModuleImportsManager {
for (auto &I : Index) {
ValueInfo VI = Index.getValueInfo(I);
if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
- AmbiguousNames.insert(VI.name());
+ LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
}
auto DbgReportIfAmbiguous = [&](StringRef Name) {
- if (AmbiguousNames.count(Name) > 0)
- LLVM_DEBUG(
- dbgs()
- << "[Workload] Function name " << Name
- << " present in the workload definition is ambiguous. Consider "
- "compiling with -funique-internal-linkage-names.");
+ LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
+ dbgs() << "[Workload] Function name " << Name
+ << " present in the workload definition is ambiguous. Consider "
+ "compiling with -funique-internal-linkage-names.";
+ });
};
std::error_code EC;
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
diff --git a/llvm/test/ThinLTO/X86/Inputs/workload1.ll b/llvm/test/ThinLTO/X86/Inputs/workload1.ll
deleted file mode 100644
index 0037232f9fec3a..00000000000000
--- a/llvm/test/ThinLTO/X86/Inputs/workload1.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-declare void @m1_variant()
-
-define dso_local void @m1_f1() {
- call void @m1_f2()
- call void @noninterposable_f()
- ret void
-}
-
-define internal void @m1_f2() {
- call void @interposable_f()
- ret void
-}
-
-define linkonce void @interposable_f() {
- call void @m1_variant()
- ret void
-}
-
-define linkonce_odr void @noninterposable_f() {
- call void @m1_variant()
- ret void
-}
\ No newline at end of file
diff --git a/llvm/test/ThinLTO/X86/Inputs/workload2.ll b/llvm/test/ThinLTO/X86/Inputs/workload2.ll
deleted file mode 100644
index cbb51b6b11ad96..00000000000000
--- a/llvm/test/ThinLTO/X86/Inputs/workload2.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-declare void @m2_variant()
-
-define dso_local void @m2_f1() {
- call void @interposable_f()
- call void @noninterposable_f()
- ret void
-}
-
- at m2_f1_alias = alias void (...), ptr @m2_f1
-
-define linkonce_odr void @interposable_f() {
- call void @m2_variant()
- ret void
-}
-
-define linkonce_odr void @noninterposable_f() {
- call void @m2_variant()
- ret void
-}
\ No newline at end of file
diff --git a/llvm/test/ThinLTO/X86/Inputs/workload3.ll b/llvm/test/ThinLTO/X86/Inputs/workload3.ll
deleted file mode 100644
index 01b79a4f334fdb..00000000000000
--- a/llvm/test/ThinLTO/X86/Inputs/workload3.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-declare void @m1_f1()
-
-define dso_local void @m3_f1() {
- call void @m1_f1()
- ret void
-}
diff --git a/llvm/test/ThinLTO/X86/workload.ll b/llvm/test/ThinLTO/X86/workload.ll
index d54a06d06140a5..7639e4b416999a 100644
--- a/llvm/test/ThinLTO/X86/workload.ll
+++ b/llvm/test/ThinLTO/X86/workload.ll
@@ -1,66 +1,137 @@
+; Set up
+; RUN: rm -rf %t
+; RUN: mkdir -p %t
+; RUN: split-file %s %t
+;
+;--- m1.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare void @m1_variant()
+declare void @m2_f1_alias()
+
+define dso_local void @m1_f1() {
+ call void @m1_f2()
+ call void @noninterposable_f()
+ ret void
+}
+
+define internal void @m1_f2() {
+ call void @interposable_f()
+ ret void
+}
+
+define linkonce void @interposable_f() {
+ call void @m1_variant()
+ ret void
+}
+
+define linkonce_odr void @noninterposable_f() {
+ call void @m1_variant()
+ ret void
+}
+;--- m2.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare void @m2_variant()
+
+define dso_local void @m2_f1() {
+ call void @interposable_f()
+ call void @noninterposable_f()
+ ret void
+}
+
+ at m2_f1_alias = alias void (...), ptr @m2_f1
+
+define external void @interposable_f() {
+ call void @m2_variant()
+ ret void
+}
+
+define linkonce_odr void @noninterposable_f() {
+ call void @m2_variant()
+ ret void
+}
+;--- m3.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare void @m1_f1()
+
+define dso_local void @m3_f1() {
+ call void @m1_f1()
+ ret void
+}
+;
+; RUN: opt -module-summary %t/m1.ll -o %t/m1.bc
+; RUN: opt -module-summary %t/m2.ll -o %t/m2.bc
+; RUN: opt -module-summary %t/m3.ll -o %t/m3.bc
+; RUN: rm -rf %t_baseline
+; RUN: rm -rf %t_exp
; RUN: mkdir -p %t_baseline
-; RUN: opt -module-summary %S/Inputs/workload1.ll -o %t_baseline/workload1.bc
-; RUN: opt -module-summary %S/Inputs/workload2.ll -o %t_baseline/workload2.bc
-; RUN: opt -module-summary %S/Inputs/workload3.ll -o %t_baseline/workload3.bc
+; RUN: mkdir -p %t_exp
;
-; Normal run. The first module shouldn't get m2_f1
+; Normal run. m1 shouldn't get m2_f1 because it's not referenced from there.
;
-; RUN: llvm-lto2 run %t_baseline/workload1.bc %t_baseline/workload2.bc %t_baseline/workload3.bc \
+; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc %t/m3.bc \
; RUN: -o %t_baseline/result.o -save-temps \
-; RUN: -r %t_baseline/workload1.bc,m1_f1,plx \
-; RUN: -r %t_baseline/workload1.bc,interposable_f \
-; RUN: -r %t_baseline/workload1.bc,noninterposable_f \
-; RUN: -r %t_baseline/workload1.bc,m1_variant \
-; RUN: -r %t_baseline/workload2.bc,m2_f1,plx \
-; RUN: -r %t_baseline/workload2.bc,m2_f1_alias \
-; RUN: -r %t_baseline/workload2.bc,interposable_f,p \
-; RUN: -r %t_baseline/workload2.bc,noninterposable_f,p \
-; RUN: -r %t_baseline/workload2.bc,m2_variant \
-; RUN: -r %t_baseline/workload3.bc,m1_f1 \
-; RUN: -r %t_baseline/workload3.bc,m3_f1,plx
+; RUN: -r %t/m1.bc,m1_f1,plx \
+; RUN: -r %t/m1.bc,interposable_f \
+; RUN: -r %t/m1.bc,noninterposable_f \
+; RUN: -r %t/m1.bc,m1_variant \
+; RUN: -r %t/m1.bc,m2_f1_alias \
+; RUN: -r %t/m2.bc,m2_f1,plx \
+; RUN: -r %t/m2.bc,m2_f1_alias,plx \
+; RUN: -r %t/m2.bc,interposable_f,p \
+; RUN: -r %t/m2.bc,noninterposable_f,p \
+; RUN: -r %t/m2.bc,m2_variant \
+; RUN: -r %t/m3.bc,m1_f1 \
+; RUN: -r %t/m3.bc,m3_f1,plx
; RUN: llvm-dis %t_baseline/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=NOPROF
;
-; NOPROF-NOT: m2_f1
+; NOPROF-NOT: m2_f1()
;
; The run with workload definitions - same other options.
;
-; RUN: mkdir -p %t
-; RUN: opt -module-summary %S/Inputs/workload1.ll -o %t/workload1.bc
-; RUN: opt -module-summary %S/Inputs/workload2.ll -o %t/workload2.bc
-; RUN: opt -module-summary %S/Inputs/workload3.ll -o %t/workload3.bc
; RUN: echo '{"m1_f1":["m2_f1", "m2_f1_alias", "interposable_f", "noninterposable_f"], \
-; RUN: "m2_f1":["m1_f1", "m1_f2"]}' > %t/workload_defs.json
-; RUN: llvm-lto2 run %t/workload1.bc %t/workload2.bc %t/workload3.bc \
-; RUN: -thinlto-workload-def=%t/workload_defs.json -o %t/result.o -save-temps \
-; RUN: -r %t/workload1.bc,m1_f1,plx \
-; RUN: -r %t/workload1.bc,interposable_f \
-; RUN: -r %t/workload1.bc,noninterposable_f \
-; RUN: -r %t/workload1.bc,m1_variant \
-; RUN: -r %t/workload2.bc,m2_f1,plx \
-; RUN: -r %t/workload2.bc,m2_f1_alias \
-; RUN: -r %t/workload2.bc,interposable_f,p \
-; RUN: -r %t/workload2.bc,noninterposable_f,p \
-; RUN: -r %t/workload2.bc,m2_variant \
-; RUN: -r %t/workload3.bc,m1_f1 \
-; RUN: -r %t/workload3.bc,m3_f1,plx
-; RUN: llvm-dis %t/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST
-; RUN: llvm-dis %t/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND
-; RUN: llvm-dis %t/result.o.3.3.import.bc -o - | FileCheck %s --check-prefix=THIRD
+; RUN: "m2_f1":["m1_f1", "m1_f2"]}' > %t_exp/workload_defs.json
+;
+; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc %t/m3.bc \
+; RUN: -o %t_exp/result.o -save-temps \
+; RUN: -thinlto-workload-def=%t_exp/workload_defs.json \
+; RUN: -r %t/m1.bc,m1_f1,plx \
+; RUN: -r %t/m1.bc,interposable_f \
+; RUN: -r %t/m1.bc,noninterposable_f \
+; RUN: -r %t/m1.bc,m1_variant \
+; RUN: -r %t/m1.bc,m2_f1_alias \
+; RUN: -r %t/m2.bc,m2_f1,plx \
+; RUN: -r %t/m2.bc,m2_f1_alias,plx \
+; RUN: -r %t/m2.bc,interposable_f,p \
+; RUN: -r %t/m2.bc,noninterposable_f,p \
+; RUN: -r %t/m2.bc,m2_variant \
+; RUN: -r %t/m3.bc,m1_f1 \
+; RUN: -r %t/m3.bc,m3_f1,plx
+; RUN: llvm-dis %t_exp/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST
+; RUN: llvm-dis %t_exp/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND
+; RUN: llvm-dis %t_exp/result.o.3.3.import.bc -o - | FileCheck %s --check-prefix=THIRD
+;
+; The third module is bitwse-identical to the "normal" run, as the workload
+; defintion doesn't mention it.
;
-; The third module is bitwse-identical to the "normal" run, as the
-; RUN: diff %t_baseline/result.o.3.3.import.bc %t/result.o.3.3.import.bc
+; RUN: diff %t_baseline/result.o.3.3.import.bc %t_exp/result.o.3.3.import.bc
;
; This time, we expect m1 to have m2_f1 and the m2 variant of interposable_f,
; while keeping its variant of noninterposable_f
;
-; FIRST-LABEL: @m1_f1
-; FIRST-LABEL: @m1_f2
-; FIRST-LABEL: define available_externally void @noninterposable_f
-; FIRST-NEXT: call void @m1_variant
-; FIRST-LABEL: @m2_f1
-; FIRST-LABEL: define available_externally void @interposable_f
-; FIRST-NEXT: call void @m2_variant
-; FIRST-NOT: @m2_f1_alias
+; FIRST-LABEL: @m1_f1
+; FIRST-LABEL: @m1_f2
+; FIRST-LABEL: define available_externally void @noninterposable_f
+; FIRST-NEXT: call void @m1_variant
+; FIRST-LABEL: @m2_f1
+; FIRST-LABEL: define available_externally void @interposable_f
+; FIRST-NEXT: call void @m2_variant
+; FIRST-LABEL: @m2_f1_alias
; SECOND-LABEL: @m2_f1
; SECOND-LABEL: @m1_f1
; SECOND-LABEL: @m1_f2
>From 569ed19d9068d227b2faf789556627e469556cba Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Tue, 12 Dec 2023 15:17:55 -0800
Subject: [PATCH 6/8] Handling interposable and non-interposable the same way.
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 132 +++++++++------------
llvm/test/ThinLTO/X86/workload.ll | 36 +++---
2 files changed, 73 insertions(+), 95 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 00602ba8ea3c0f..30fd5925c9cf51 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -451,77 +451,75 @@ class WorkloadImportsManager : public ModuleImportsManager {
auto &ValueInfos = SetIter->second;
SmallVector<EdgeInfo, 128> GlobWorklist;
for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
+ auto It = DefinedGVSummaries.find(VI.getGUID());
+ if (It != DefinedGVSummaries.end() &&
+ IsPrevailing(VI.getGUID(), It->second)) {
+ LLVM_DEBUG(
+ dbgs() << "[Workload] " << VI.name()
+ << " has the prevailing variant already in the module "
+ << ModName << ". No need to import\n");
+ continue;
+ }
auto Candidates =
qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
const GlobalValueSummary *GVS = nullptr;
- FunctionImporter::ImportFailureReason LastReason =
- FunctionImporter::ImportFailureReason::None;
- for (const auto &Candidate : Candidates) {
- /// We will prefer importing the prevailing candidate, if not, we'll
- /// still pick the first available candidate. The reason we want to make
- /// sure we do import the prevailing candidate is because the goal of
- /// workload-awareness is to enable optimizations specializing the call
- /// graph of that workload. Suppose a function is already defined in the
- /// module, but it's not the prevailing variant. Suppose also we do not
- /// inline it (in fact, if it were interposable, we can't inline it),
- /// but we could specialize it to the workload in other ways. However,
- /// the linker would drop it in the favor of the prevailing copy.
- /// Instead, by importing the prevailing variant (assuming also the use
- /// of `-avail-extern-to-local`), we keep the specialization. We could
- /// alteranatively make the non-prevailing variant local, but the
- /// prevailing one is also the one for which we would have previously
- /// collected profiles, making it preferrable.
- LastReason = Candidate.first;
- if (Candidate.first == FunctionImporter::ImportFailureReason::None) {
- const bool Prevailing = IsPrevailing(VI.getGUID(), Candidate.second);
- if (Prevailing || !GVS) {
- if (!GVS && !Prevailing)
- LLVM_DEBUG(dbgs()
- << "[Workload] Considering " << VI.name() << " from "
- << Candidate.second->modulePath() << " with linkage "
- << Candidate.second->linkage()
- << " although it's not prevailing, but it's the "
- "first available candidate.\n");
- GVS = Candidate.second;
- if (Prevailing) {
- LLVM_DEBUG(dbgs()
- << "[Workload] Considering " << VI.name() << " from "
- << GVS->modulePath() << " with linkage "
- << GVS->linkage() << " because it's prevailing.\n");
- break;
- }
- } else {
- LLVM_DEBUG(dbgs() << "[Workload] Skipping " << VI.name() << " from "
- << Candidate.second->modulePath()
- << " with linkage " << Candidate.second->linkage()
- << " because it's not prevailing\n");
- }
- }
- }
- if (!GVS) {
+ auto PotentialCandidates = llvm::map_range(
+ llvm::make_filter_range(
+ Candidates,
+ [&](const auto &Candidate) {
+ return Candidate.first ==
+ FunctionImporter::ImportFailureReason::None;
+ }),
+ [](const auto &Candidate) { return Candidate.second; });
+ if (PotentialCandidates.empty()) {
LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
<< " because can't find eligible Callee. Guid is: "
- << Function::getGUID(VI.name())
- << ". The reason was: " << getFailureName(LastReason)
- << "\n");
- (void)LastReason; // Only used in LLVM_DEBUG above.
+ << Function::getGUID(VI.name()) << "\n");
continue;
}
+ /// We will prefer importing the prevailing candidate, if not, we'll
+ /// still pick the first available candidate. The reason we want to make
+ /// sure we do import the prevailing candidate is because the goal of
+ /// workload-awareness is to enable optimizations specializing the call
+ /// graph of that workload. Suppose a function is already defined in the
+ /// module, but it's not the prevailing variant. Suppose also we do not
+ /// inline it (in fact, if it were interposable, we can't inline it),
+ /// but we could specialize it to the workload in other ways. However,
+ /// the linker would drop it in the favor of the prevailing copy.
+ /// Instead, by importing the prevailing variant (assuming also the use
+ /// of `-avail-extern-to-local`), we keep the specialization. We could
+ /// alteranatively make the non-prevailing variant local, but the
+ /// prevailing one is also the one for which we would have previously
+ /// collected profiles, making it preferrable.
+ auto PrevailingCandidates = llvm::make_filter_range(
+ PotentialCandidates, [&](const auto *Candidate) {
+ return IsPrevailing(VI.getGUID(), Candidate);
+ });
+ if (PrevailingCandidates.empty()) {
+ if (!llvm::hasSingleElement(PotentialCandidates))
+ LLVM_DEBUG(
+ dbgs()
+ << "[Workload] Found multiple non-prevailing candidates for "
+ << VI.name()
+ << ". This is unexpected. Are module paths passed to the "
+ "compiler unique for the modules passed to the linker?");
+ GVS = *PotentialCandidates.begin();
+ } else {
+ assert(llvm::hasSingleElement(PrevailingCandidates));
+ GVS = *PrevailingCandidates.begin();
+ }
+
auto ExportingModule = GVS->modulePath();
+ // We checked that for the prevailing case, but if we happen to have for
+ // example an internal that's defined in this module, it'd have no
+ // PrevailingCandidates.
if (ExportingModule == ModName) {
LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
<< " because its defining module is the same as the "
"current module\n");
continue;
}
- if (moduleAlreadyHasPreferredDef(DefinedGVSummaries, VI.getGUID(), GVS)) {
- LLVM_DEBUG(
- dbgs() << "[Workload] Not importing " << VI.name()
- << " because we have a copy already in this module.\n");
- continue;
- }
-
LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
<< ExportingModule << " : "
<< Function::getGUID(VI.name()) << "\n");
@@ -533,28 +531,6 @@ class WorkloadImportsManager : public ModuleImportsManager {
LLVM_DEBUG(dbgs() << "[Workload] Done\n");
}
- bool moduleAlreadyHasPreferredDef(const GVSummaryMapTy &DefinedGVSummaries,
- Function::GUID Guid,
- const GlobalValueSummary *Candidate) {
- auto DefinedSummary = DefinedGVSummaries.find(Guid);
- if (DefinedSummary == DefinedGVSummaries.end())
- return false;
-
- // See shouldImportGlobal for the justificaton of the isInterposableLinkage.
- if (!IsPrevailing(Guid, DefinedSummary->second) &&
- GlobalValue::isInterposableLinkage(DefinedSummary->second->linkage()) &&
- IsPrevailing(Guid, Candidate)) {
- LLVM_DEBUG(dbgs() << "[Workload] " << Guid
- << ": non-prevailing preexisting definition in module. "
- "Importing from "
- << Candidate->modulePath() << "\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << "[Workload] " << Guid
- << ": ignored! Target already in destination module.\n");
- return true;
- }
-
public:
WorkloadImportsManager(
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
diff --git a/llvm/test/ThinLTO/X86/workload.ll b/llvm/test/ThinLTO/X86/workload.ll
index 7639e4b416999a..74b221180f57f5 100644
--- a/llvm/test/ThinLTO/X86/workload.ll
+++ b/llvm/test/ThinLTO/X86/workload.ll
@@ -21,7 +21,7 @@ define internal void @m1_f2() {
ret void
}
-define linkonce void @interposable_f() {
+define external void @interposable_f() {
call void @m1_variant()
ret void
}
@@ -44,12 +44,12 @@ define dso_local void @m2_f1() {
@m2_f1_alias = alias void (...), ptr @m2_f1
-define external void @interposable_f() {
+define linkonce void @interposable_f() {
call void @m2_variant()
ret void
}
-define linkonce_odr void @noninterposable_f() {
+define external void @noninterposable_f() {
call void @m2_variant()
ret void
}
@@ -77,13 +77,13 @@ define dso_local void @m3_f1() {
; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc %t/m3.bc \
; RUN: -o %t_baseline/result.o -save-temps \
; RUN: -r %t/m1.bc,m1_f1,plx \
-; RUN: -r %t/m1.bc,interposable_f \
+; RUN: -r %t/m1.bc,interposable_f,p \
; RUN: -r %t/m1.bc,noninterposable_f \
; RUN: -r %t/m1.bc,m1_variant \
; RUN: -r %t/m1.bc,m2_f1_alias \
; RUN: -r %t/m2.bc,m2_f1,plx \
; RUN: -r %t/m2.bc,m2_f1_alias,plx \
-; RUN: -r %t/m2.bc,interposable_f,p \
+; RUN: -r %t/m2.bc,interposable_f \
; RUN: -r %t/m2.bc,noninterposable_f,p \
; RUN: -r %t/m2.bc,m2_variant \
; RUN: -r %t/m3.bc,m1_f1 \
@@ -94,20 +94,20 @@ define dso_local void @m3_f1() {
;
; The run with workload definitions - same other options.
;
-; RUN: echo '{"m1_f1":["m2_f1", "m2_f1_alias", "interposable_f", "noninterposable_f"], \
-; RUN: "m2_f1":["m1_f1", "m1_f2"]}' > %t_exp/workload_defs.json
+; RUN: echo '{"m1_f1":["m1_f1", "m2_f1", "m2_f1_alias", "interposable_f", "noninterposable_f"], \
+; RUN: "m2_f1":["m1_f1", "m1_f2", "interposable_f"]}' > %t_exp/workload_defs.json
;
; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc %t/m3.bc \
; RUN: -o %t_exp/result.o -save-temps \
; RUN: -thinlto-workload-def=%t_exp/workload_defs.json \
; RUN: -r %t/m1.bc,m1_f1,plx \
-; RUN: -r %t/m1.bc,interposable_f \
-; RUN: -r %t/m1.bc,noninterposable_f \
+; RUN: -r %t/m1.bc,interposable_f,p \
+; RUN: -r %t/m1.bc,noninterposable_f,p \
; RUN: -r %t/m1.bc,m1_variant \
; RUN: -r %t/m1.bc,m2_f1_alias \
; RUN: -r %t/m2.bc,m2_f1,plx \
; RUN: -r %t/m2.bc,m2_f1_alias,plx \
-; RUN: -r %t/m2.bc,interposable_f,p \
+; RUN: -r %t/m2.bc,interposable_f \
; RUN: -r %t/m2.bc,noninterposable_f,p \
; RUN: -r %t/m2.bc,m2_variant \
; RUN: -r %t/m3.bc,m1_f1 \
@@ -121,18 +121,20 @@ define dso_local void @m3_f1() {
;
; RUN: diff %t_baseline/result.o.3.3.import.bc %t_exp/result.o.3.3.import.bc
;
-; This time, we expect m1 to have m2_f1 and the m2 variant of interposable_f,
-; while keeping its variant of noninterposable_f
+; This time, we expect m1 to have m2_f1 and the m2 variant of both interposable_f
+; and noninterposable_f
;
; FIRST-LABEL: @m1_f1
-; FIRST-LABEL: @m1_f2
-; FIRST-LABEL: define available_externally void @noninterposable_f
+; FIRST-LABEL: @m1_f2.llvm.0
+; FIRST-LABEL: define void @interposable_f
; FIRST-NEXT: call void @m1_variant
; FIRST-LABEL: @m2_f1
-; FIRST-LABEL: define available_externally void @interposable_f
+; FIRST-LABEL: define available_externally void @noninterposable_f
; FIRST-NEXT: call void @m2_variant
-; FIRST-LABEL: @m2_f1_alias
+; FIRST-LABEL: define available_externally void @m2_f1_alias
; SECOND-LABEL: @m2_f1
; SECOND-LABEL: @m1_f1
-; SECOND-LABEL: @m1_f2
+; SECOND-LABEL: define available_externally hidden void @m1_f2.llvm.0
+; SECOND-LABEL: define available_externally void @interposable_f
+; SECOND-NEXT: call void @m1_variant
; THIRD-LABEL: define available_externally void @m1_f1
\ No newline at end of file
>From 483e157b78448b585d46c7523392a18313961ff1 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Tue, 12 Dec 2023 15:24:45 -0800
Subject: [PATCH 7/8] some formatting
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 30fd5925c9cf51..6252288eccfee0 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -603,14 +603,14 @@ class WorkloadImportsManager : public ModuleImportsManager {
}
Set.insert(ElemIt->second);
}
- LLVM_DEBUG( //
- dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
- << " distinct callees.\n";
- for (const auto &VI
- : Set) {
- dbgs() << "[Workload] Root: " << Root
- << " Would include: " << VI.getGUID() << "\n";
- });
+ LLVM_DEBUG({
+ dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
+ << " distinct callees.\n";
+ for (const auto &VI : Set) {
+ dbgs() << "[Workload] Root: " << Root
+ << " Would include: " << VI.getGUID() << "\n";
+ }
+ });
}
}
};
>From 4be9562c99cf3ed07f4190bc49fba0fe5da2aeae Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Tue, 12 Dec 2023 21:31:32 -0800
Subject: [PATCH 8/8] fix the test
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 4 ++++
llvm/test/ThinLTO/X86/workload.ll | 6 +++---
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 6252288eccfee0..6e6f7c47d2fec5 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -468,6 +468,10 @@ class WorkloadImportsManager : public ModuleImportsManager {
llvm::make_filter_range(
Candidates,
[&](const auto &Candidate) {
+ LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
+ << " from " << Candidate.second->modulePath()
+ << " ImportFailureReason: "
+ << getFailureName(Candidate.first) << "\n");
return Candidate.first ==
FunctionImporter::ImportFailureReason::None;
}),
diff --git a/llvm/test/ThinLTO/X86/workload.ll b/llvm/test/ThinLTO/X86/workload.ll
index 74b221180f57f5..b630af89ae39f4 100644
--- a/llvm/test/ThinLTO/X86/workload.ll
+++ b/llvm/test/ThinLTO/X86/workload.ll
@@ -44,12 +44,12 @@ define dso_local void @m2_f1() {
@m2_f1_alias = alias void (...), ptr @m2_f1
-define linkonce void @interposable_f() {
+define weak void @interposable_f() {
call void @m2_variant()
ret void
}
-define external void @noninterposable_f() {
+define linkonce_odr void @noninterposable_f() {
call void @m2_variant()
ret void
}
@@ -102,7 +102,7 @@ define dso_local void @m3_f1() {
; RUN: -thinlto-workload-def=%t_exp/workload_defs.json \
; RUN: -r %t/m1.bc,m1_f1,plx \
; RUN: -r %t/m1.bc,interposable_f,p \
-; RUN: -r %t/m1.bc,noninterposable_f,p \
+; RUN: -r %t/m1.bc,noninterposable_f \
; RUN: -r %t/m1.bc,m1_variant \
; RUN: -r %t/m1.bc,m2_f1_alias \
; RUN: -r %t/m2.bc,m2_f1,plx \
More information about the llvm-commits
mailing list