[llvm] [InstrProf] Fix trace reservoir sampling (PR #152563)
Ellis Hoag via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 8 09:46:40 PDT 2025
https://github.com/ellishg updated https://github.com/llvm/llvm-project/pull/152563
>From 2f7c303260a829373f1bc8490bf9bd24b0e36e47 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Thu, 7 Aug 2025 10:52:47 -0700
Subject: [PATCH] [InstrProf] Fix trace reservoir sampling
---
.../llvm/ProfileData/InstrProfWriter.h | 2 -
llvm/lib/ProfileData/InstrProfWriter.cpp | 63 ++++++-------------
.../tools/llvm-profdata/merge-traces.proftext | 9 +++
.../tools/llvm-profdata/trace-limit.proftext | 2 +-
4 files changed, 28 insertions(+), 48 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index f339fe2c2a9eb..1b24425e68a9e 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -226,8 +226,6 @@ class InstrProfWriter {
void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
uint64_t Weight, function_ref<void(Error)> Warn);
bool shouldEncodeData(const ProfilingData &PD);
- /// Add \p Trace using reservoir sampling.
- void addTemporalProfileTrace(TemporalProfTraceTy Trace);
/// Add a memprof record for a function identified by its \p Id.
void addMemProfRecord(const GlobalValue::GUID Id,
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 7ca26aa138012..df807fc02b910 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -331,61 +331,34 @@ void InstrProfWriter::addDataAccessProfData(
DataAccessProfileData = std::move(DataAccessProfDataIn);
}
-void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
- assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
- assert(!Trace.FunctionNameRefs.empty());
- if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {
- // Simply append the trace if we have not yet hit our reservoir size limit.
- TemporalProfTraces.push_back(std::move(Trace));
- } else {
- // Otherwise, replace a random trace in the stream.
- std::uniform_int_distribution<uint64_t> Distribution(
- 0, TemporalProfTraceStreamSize);
- uint64_t RandomIndex = Distribution(RNG);
- if (RandomIndex < TemporalProfTraces.size())
- TemporalProfTraces[RandomIndex] = std::move(Trace);
- }
- ++TemporalProfTraceStreamSize;
-}
-
void InstrProfWriter::addTemporalProfileTraces(
SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
+ if (TemporalProfTraces.size() > TemporalProfTraceReservoirSize)
+ TemporalProfTraces.truncate(TemporalProfTraceReservoirSize);
for (auto &Trace : SrcTraces)
if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });
- // Assume that the source has the same reservoir size as the destination to
- // avoid needing to record it in the indexed profile format.
- bool IsDestSampled =
- (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);
- bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);
- if (!IsDestSampled && IsSrcSampled) {
- // If one of the traces are sampled, ensure that it belongs to Dest.
- std::swap(TemporalProfTraces, SrcTraces);
- std::swap(TemporalProfTraceStreamSize, SrcStreamSize);
- std::swap(IsDestSampled, IsSrcSampled);
- }
- if (!IsSrcSampled) {
- // If the source stream is not sampled, we add each source trace normally.
- for (auto &Trace : SrcTraces)
- addTemporalProfileTrace(std::move(Trace));
+ // If there are no source traces, it is probably because
+ // --temporal-profile-max-trace-length=0 was set to deliberately remove all
+ // traces. In that case, we do not want to increase the stream size
+ if (SrcTraces.empty())
return;
- }
- // Otherwise, we find the traces that would have been removed if we added
- // the whole source stream.
- SmallSetVector<uint64_t, 8> IndicesToReplace;
- for (uint64_t I = 0; I < SrcStreamSize; I++) {
- std::uniform_int_distribution<uint64_t> Distribution(
- 0, TemporalProfTraceStreamSize);
+ // Add traces until our reservoir is full or we run out of source traces
+ auto SrcTraceIt = SrcTraces.begin();
+ while (TemporalProfTraces.size() < TemporalProfTraceReservoirSize &&
+ SrcTraceIt < SrcTraces.end())
+ TemporalProfTraces.push_back(*SrcTraceIt++);
+ // Our reservoir is full, we need to sample the source stream
+ llvm::shuffle(SrcTraceIt, SrcTraces.end(), RNG);
+ for (uint64_t I = TemporalProfTraces.size();
+ I < SrcStreamSize && SrcTraceIt < SrcTraces.end(); I++) {
+ std::uniform_int_distribution<uint64_t> Distribution(0, I);
uint64_t RandomIndex = Distribution(RNG);
if (RandomIndex < TemporalProfTraces.size())
- IndicesToReplace.insert(RandomIndex);
- ++TemporalProfTraceStreamSize;
+ TemporalProfTraces[RandomIndex] = *SrcTraceIt++;
}
- // Then we insert a random sample of the source traces.
- llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);
- for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))
- TemporalProfTraces[Index] = std::move(Trace);
+ TemporalProfTraceStreamSize += SrcStreamSize;
}
void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
diff --git a/llvm/test/tools/llvm-profdata/merge-traces.proftext b/llvm/test/tools/llvm-profdata/merge-traces.proftext
index 57ea16cd72fdb..3512f33cd06a9 100644
--- a/llvm/test/tools/llvm-profdata/merge-traces.proftext
+++ b/llvm/test/tools/llvm-profdata/merge-traces.proftext
@@ -12,15 +12,24 @@
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %t-2.profdata %s %s --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE2
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %t-2.profdata %t-2.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE2
+# Test that we can increase the reservoir size, even if inputs are sampled
+# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %s %s %s %s -o %t-4.profdata
+# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=4 %t-4.profdata %t-4.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN8,SAMPLE4
+
+# Test that decreasing the reservoir size truncates traces
+# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=1 %t-4.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE1
+
# CHECK: :temporal_prof_traces
# CHECK: # Num Temporal Profile Traces:
# SAMPLE1: 1
# SAMPLE2: 2
+# SAMPLE4: 4
# CHECK: # Temporal Profile Trace Stream Size:
# SEEN1: 1
# SEEN2: 2
# SEEN3: 3
# SEEN4: 4
+# SEEN8: 8
# CHECK: a,b,c,
# Header
diff --git a/llvm/test/tools/llvm-profdata/trace-limit.proftext b/llvm/test/tools/llvm-profdata/trace-limit.proftext
index e246ee890ba31..6b4f974add169 100644
--- a/llvm/test/tools/llvm-profdata/trace-limit.proftext
+++ b/llvm/test/tools/llvm-profdata/trace-limit.proftext
@@ -11,7 +11,7 @@
# RUN: llvm-profdata merge --temporal-profile-max-trace-length=1000 %s -o %t.profdata
# RUN: llvm-profdata show --temporal-profile-traces %t.profdata | FileCheck %s --check-prefixes=CHECK,ALL
-# NONE: Temporal Profile Traces (samples=0
+# NONE: Temporal Profile Traces (samples=0 seen=0):
# CHECK: Temporal Profile Traces (samples=1 seen=1):
# SOME: Trace 0 (weight=1 count=2):
# ALL: Trace 0 (weight=1 count=3):
More information about the llvm-commits
mailing list