[llvm] [ctx_prof] Extend `WorkloadImportsManager` to use the contextual profile (PR #98682)
Mircea Trofin via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 29 12:32:25 PDT 2024
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/98682
>From 3ffabdfe77cee4c0769fafb18ed72c5fc5e41c36 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Fri, 12 Jul 2024 12:08:58 -0700
Subject: [PATCH 1/6] [ctx_prof] Extend `WorkloadImportsManager` to use the
contextual profile
Keeping the json-based input as it's useful for diagnostics or for driving
the import by other means than contextual composition.
The support for the contextual profile is just another modality for
constructing the import list (`WorkloadImportsManager::Workloads`).
Everything else - i.e. the actual importing logic - is already independent
from how that list was obtained.
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 76 +++++++++++++++++--
.../PGOCtxProfReaderWriterTest.cpp | 9 +++
2 files changed, 77 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 038785114a0cf..985e419c2bb5e 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitstream/BitstreamWriter.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -30,6 +31,7 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/IRMover.h"
+#include "llvm/ProfileData/PGOCtxProfReader.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -174,6 +176,10 @@ static cl::opt<std::string> WorkloadDefinitions(
"}"),
cl::Hidden);
+static cl::opt<std::string>
+ ContextualProfile("thinlto-pgo-ctx-prof",
+ cl::desc("Path to a contextual profile."), cl::Hidden);
+
namespace llvm {
extern cl::opt<bool> EnableMemProfContextDisambiguation;
}
@@ -586,13 +592,7 @@ class WorkloadImportsManager : public ModuleImportsManager {
LLVM_DEBUG(dbgs() << "[Workload] Done\n");
}
-public:
- WorkloadImportsManager(
- function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
- IsPrevailing,
- const ModuleSummaryIndex &Index,
- DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
- : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
+ void loadFromJson() {
// Since the workload def uses names, we need a quick lookup
// name->ValueInfo.
StringMap<ValueInfo> NameToValueInfo;
@@ -672,6 +672,66 @@ class WorkloadImportsManager : public ModuleImportsManager {
});
}
}
+
+ void loadFromCtxProf() {
+ std::error_code EC;
+ auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ContextualProfile);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ report_fatal_error("Failed to open contextual profile file");
+ return;
+ }
+ auto Buffer = std::move(BufferOrErr.get());
+
+ BitstreamCursor Cursor(*Buffer);
+ PGOCtxProfileReader Reader(Cursor);
+ auto Ctx = Reader.loadContexts();
+ if (!Ctx) {
+ report_fatal_error("Failed to parse contextual profiles");
+ return;
+ }
+ const auto &CtxMap = *Ctx;
+ for (const auto &[RootGuid, Root] : CtxMap) {
+ auto RootVI = Index.getValueInfo(RootGuid);
+ if (!RootVI) {
+ LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
+ << " not found in this linkage unit.\n");
+ continue;
+ }
+ if (RootVI.getSummaryList().size() != 1) {
+ LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
+ << " should have exactly one summary, but has "
+ << RootVI.getSummaryList().size() << ". Skipping.\n");
+ continue;
+ }
+ StringRef RootDefiningModule =
+ RootVI.getSummaryList().front()->modulePath();
+ LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << RootGuid
+ << " is : " << RootDefiningModule << "\n");
+ DenseSet<GlobalValue::GUID> ContainedGUIDs;
+ auto &Set = Workloads[RootDefiningModule];
+ Root.getContainedGuids(ContainedGUIDs);
+ for (auto Guid : ContainedGUIDs)
+ if (auto VI = Index.getValueInfo(Guid))
+ Set.insert(VI);
+ }
+ }
+
+public:
+ WorkloadImportsManager(
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing,
+ const ModuleSummaryIndex &Index,
+ DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
+ : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
+ if (ContextualProfile.empty() == WorkloadDefinitions.empty()) {
+ report_fatal_error(
+ "Pass only one of: -thinlto-pgo-ctx-prof or -thinlto-workload-def");
+ return;
+ }
+ if (!ContextualProfile.empty())
+ loadFromCtxProf();
+ loadFromJson();
+ }
};
std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
@@ -679,7 +739,7 @@ std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
IsPrevailing,
const ModuleSummaryIndex &Index,
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
- if (WorkloadDefinitions.empty()) {
+ if (WorkloadDefinitions.empty() && ContextualProfile.empty()) {
LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
return std::unique_ptr<ModuleImportsManager>(
new ModuleImportsManager(IsPrevailing, Index, ExportLists));
diff --git a/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp b/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp
index d2cdbb28e2fce..6c6798ded00b5 100644
--- a/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp
+++ b/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp
@@ -115,6 +115,15 @@ TEST_F(PGOCtxProfRWTest, RoundTrip) {
EXPECT_EQ(Ctxes.size(), 2U);
for (auto &[G, R] : roots())
checkSame(*R, Ctxes.find(G)->second);
+
+ DenseSet<GlobalValue::GUID> Guids;
+ Ctxes.at(1U).getContainedGuids(Guids);
+ EXPECT_THAT(Guids,
+ testing::WhenSorted(testing::ElementsAre(1U, 2U, 4U, 5U)));
+
+ Guids.clear();
+ Ctxes.at(3U).getContainedGuids(Guids);
+ EXPECT_THAT(Guids, testing::ElementsAre(3U));
}
}
>From 8e105503ce6060d51494f07cad37d535e4959522 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Fri, 12 Jul 2024 12:15:38 -0700
Subject: [PATCH 2/6] Removed spurious include of BitstreamWriter, as well as
(oportunistically) unused one to Constants.
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 985e419c2bb5e..9656ff2eaa00a 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -18,9 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitstream/BitstreamWriter.h"
#include "llvm/IR/AutoUpgrade.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
>From b6479f0c4f387e1bf2f46b8f0ab9b3c060571558 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Fri, 12 Jul 2024 12:18:23 -0700
Subject: [PATCH 3/6] reuse the `ContainedGUIDs` container.
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 9656ff2eaa00a..28e24e437972e 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -688,7 +688,12 @@ class WorkloadImportsManager : public ModuleImportsManager {
return;
}
const auto &CtxMap = *Ctx;
+ DenseSet<GlobalValue::GUID> ContainedGUIDs;
for (const auto &[RootGuid, Root] : CtxMap) {
+ // Avoid ContainedGUIDs to get in/out of scope. Reuse its memory for
+ // subsequent roots, but clear its contents.
+ ContainedGUIDs.clear();
+
auto RootVI = Index.getValueInfo(RootGuid);
if (!RootVI) {
LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
@@ -705,7 +710,6 @@ class WorkloadImportsManager : public ModuleImportsManager {
RootVI.getSummaryList().front()->modulePath();
LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << RootGuid
<< " is : " << RootDefiningModule << "\n");
- DenseSet<GlobalValue::GUID> ContainedGUIDs;
auto &Set = Workloads[RootDefiningModule];
Root.getContainedGuids(ContainedGUIDs);
for (auto Guid : ContainedGUIDs)
>From 157d38cb1be0f9728055eac3012b740959e5a99c Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Wed, 17 Jul 2024 13:31:42 -0700
Subject: [PATCH 4/6] llvm_dbgs
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 28e24e437972e..d0bc98217f2ee 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -660,14 +660,6 @@ class WorkloadImportsManager : public ModuleImportsManager {
}
Set.insert(ElemIt->second);
}
- LLVM_DEBUG({
- dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
- << " distinct callees.\n";
- for (const auto &VI : Set) {
- dbgs() << "[Workload] Root: " << Root
- << " Would include: " << VI.getGUID() << "\n";
- }
- });
}
}
@@ -733,6 +725,16 @@ class WorkloadImportsManager : public ModuleImportsManager {
if (!ContextualProfile.empty())
loadFromCtxProf();
loadFromJson();
+ LLVM_DEBUG({
+ for (const auto &[Root, Set] : Workloads) {
+ dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
+ << " distinct callees.\n";
+ for (const auto &VI : Set) {
+ dbgs() << "[Workload] Root: " << Root
+ << " Would include: " << VI.getGUID() << "\n";
+ }
+ }
+ });
}
};
>From 7cab5929301ba409c77a13e11ec4987f55a996fd Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Mon, 29 Jul 2024 10:54:49 -0700
Subject: [PATCH 5/6] added test
---
llvm/lib/Transforms/IPO/FunctionImport.cpp | 6 +-
llvm/test/ThinLTO/X86/ctxprof.ll | 72 ++++++++++++++++++++++
2 files changed, 75 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/ThinLTO/X86/ctxprof.ll
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 5aa939899ddb3..6c4f4001aab35 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -690,8 +690,7 @@ class WorkloadImportsManager : public ModuleImportsManager {
}
auto Buffer = std::move(BufferOrErr.get());
- BitstreamCursor Cursor(*Buffer);
- PGOCtxProfileReader Reader(Cursor);
+ PGOCtxProfileReader Reader(Buffer->getBuffer());
auto Ctx = Reader.loadContexts();
if (!Ctx) {
report_fatal_error("Failed to parse contextual profiles");
@@ -742,7 +741,8 @@ class WorkloadImportsManager : public ModuleImportsManager {
}
if (!ContextualProfile.empty())
loadFromCtxProf();
- loadFromJson();
+ else
+ loadFromJson();
LLVM_DEBUG({
for (const auto &[Root, Set] : Workloads) {
dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
diff --git a/llvm/test/ThinLTO/X86/ctxprof.ll b/llvm/test/ThinLTO/X86/ctxprof.ll
new file mode 100644
index 0000000000000..4c19c3dcd6539
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/ctxprof.ll
@@ -0,0 +1,72 @@
+; Test workload based importing via -thinlto-pgo-ctx-prof
+; Use external linkage symbols so we don't depend on module paths which are
+; used when computing the GUIDs of internal linkage symbols.
+; The functionality is shared with what workload.ll tests, so here we only care
+; about testing the ctx profile is loaded and handled correctly.
+;
+; Set up
+; RUN: rm -rf %t
+; RUN: mkdir -p %t
+; RUN: split-file %s %t
+;
+; RUN: opt -module-summary %t/m1.ll -o %t/m1.bc
+; RUN: opt -module-summary %t/m2.ll -o %t/m2.bc
+; RUN: llvm-dis %t/m1.bc -o - | FileCheck %s --check-prefix=GUIDS-1
+; RUN: llvm-dis %t/m2.bc -o - | FileCheck %s --check-prefix=GUIDS-2
+;
+; GUIDS-1: name: "m1_f1"
+; GUIDS-1-SAME: guid = 6019442868614718803
+; GUIDS-2: name: "m2_f1"
+; GUIDS-2-SAME: guid = 15593096274670919754
+;
+; RUN: rm -rf %t_baseline
+; RUN: rm -rf %t_exp
+; RUN: mkdir -p %t_baseline
+; RUN: mkdir -p %t_exp
+;
+; Normal run. m1 shouldn't get m2_f1 because it's not referenced from there.
+;
+; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \
+; RUN: -o %t_baseline/result.o -save-temps \
+; RUN: -r %t/m1.bc,m1_f1,plx \
+; RUN: -r %t/m2.bc,m2_f1,plx
+; RUN: llvm-dis %t_baseline/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=NOPROF-1
+; RUN: llvm-dis %t_baseline/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=NOPROF-2
+;
+; NOPROF-1-NOT: m2_f1()
+; NOPROF-2-NOT: m1_f1()
+;
+; The run with workload definitions - same other options.
+;
+; RUN: echo '[ \
+; RUN: {"Guid": 6019442868614718803, "Counters": [1], "Callsites": [[{"Guid": 15593096274670919754, "Counters": [1]}]]}, \
+; RUN: {"Guid": 15593096274670919754, "Counters": [1], "Callsites": [[{"Guid": 6019442868614718803, "Counters": [1]}]]} \
+; RUN: ]' > %t_exp/ctxprof.json
+; RUN: llvm-ctxprof-util fromJSON --input %t_exp/ctxprof.json --output %t_exp/ctxprof.bitstream
+; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \
+; RUN: -o %t_exp/result.o -save-temps \
+; RUN: -thinlto-pgo-ctx-prof=%t_exp/ctxprof.bitstream \
+; RUN: -r %t/m1.bc,m1_f1,plx \
+; RUN: -r %t/m2.bc,m2_f1,plx
+; RUN: llvm-dis %t_exp/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST
+; RUN: llvm-dis %t_exp/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND
+;
+;
+; FIRST: m2_f1()
+; SECOND: m1_f1()
+;
+;--- m1.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+define dso_local void @m1_f1() {
+ ret void
+}
+
+;--- m2.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+define dso_local void @m2_f1() {
+ ret void
+}
>From e5f9ec4d8ed40834eb3e80c7e38c5a647a106222 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Mon, 29 Jul 2024 12:31:57 -0700
Subject: [PATCH 6/6] fixes
---
llvm/test/ThinLTO/X86/ctxprof.ll | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/test/ThinLTO/X86/ctxprof.ll b/llvm/test/ThinLTO/X86/ctxprof.ll
index 4c19c3dcd6539..4c86ec9f4c479 100644
--- a/llvm/test/ThinLTO/X86/ctxprof.ll
+++ b/llvm/test/ThinLTO/X86/ctxprof.ll
@@ -24,7 +24,8 @@
; RUN: mkdir -p %t_baseline
; RUN: mkdir -p %t_exp
;
-; Normal run. m1 shouldn't get m2_f1 because it's not referenced from there.
+; Normal run. m1 shouldn't get m2_f1 because it's not referenced from there, and
+; m1_f1 shouldn't go to m2.
;
; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \
; RUN: -o %t_baseline/result.o -save-temps \
More information about the llvm-commits
mailing list