[llvm] [InstrProf] Fix trace reservoir sampling (PR #152563)

Ellis Hoag via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 8 09:46:40 PDT 2025


https://github.com/ellishg updated https://github.com/llvm/llvm-project/pull/152563

>From 2f7c303260a829373f1bc8490bf9bd24b0e36e47 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Thu, 7 Aug 2025 10:52:47 -0700
Subject: [PATCH] [InstrProf] Fix trace reservoir sampling

---
 .../llvm/ProfileData/InstrProfWriter.h        |  2 -
 llvm/lib/ProfileData/InstrProfWriter.cpp      | 63 ++++++-------------
 .../tools/llvm-profdata/merge-traces.proftext |  9 +++
 .../tools/llvm-profdata/trace-limit.proftext  |  2 +-
 4 files changed, 28 insertions(+), 48 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index f339fe2c2a9eb..1b24425e68a9e 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -226,8 +226,6 @@ class InstrProfWriter {
   void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
                  uint64_t Weight, function_ref<void(Error)> Warn);
   bool shouldEncodeData(const ProfilingData &PD);
-  /// Add \p Trace using reservoir sampling.
-  void addTemporalProfileTrace(TemporalProfTraceTy Trace);
 
   /// Add a memprof record for a function identified by its \p Id.
   void addMemProfRecord(const GlobalValue::GUID Id,
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 7ca26aa138012..df807fc02b910 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -331,61 +331,34 @@ void InstrProfWriter::addDataAccessProfData(
   DataAccessProfileData = std::move(DataAccessProfDataIn);
 }
 
-void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
-  assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
-  assert(!Trace.FunctionNameRefs.empty());
-  if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {
-    // Simply append the trace if we have not yet hit our reservoir size limit.
-    TemporalProfTraces.push_back(std::move(Trace));
-  } else {
-    // Otherwise, replace a random trace in the stream.
-    std::uniform_int_distribution<uint64_t> Distribution(
-        0, TemporalProfTraceStreamSize);
-    uint64_t RandomIndex = Distribution(RNG);
-    if (RandomIndex < TemporalProfTraces.size())
-      TemporalProfTraces[RandomIndex] = std::move(Trace);
-  }
-  ++TemporalProfTraceStreamSize;
-}
-
 void InstrProfWriter::addTemporalProfileTraces(
     SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
+  if (TemporalProfTraces.size() > TemporalProfTraceReservoirSize)
+    TemporalProfTraces.truncate(TemporalProfTraceReservoirSize);
   for (auto &Trace : SrcTraces)
     if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
       Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
   llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });
-  // Assume that the source has the same reservoir size as the destination to
-  // avoid needing to record it in the indexed profile format.
-  bool IsDestSampled =
-      (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);
-  bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);
-  if (!IsDestSampled && IsSrcSampled) {
-    // If one of the traces are sampled, ensure that it belongs to Dest.
-    std::swap(TemporalProfTraces, SrcTraces);
-    std::swap(TemporalProfTraceStreamSize, SrcStreamSize);
-    std::swap(IsDestSampled, IsSrcSampled);
-  }
-  if (!IsSrcSampled) {
-    // If the source stream is not sampled, we add each source trace normally.
-    for (auto &Trace : SrcTraces)
-      addTemporalProfileTrace(std::move(Trace));
+  // If there are no source traces, it is probably because
+  // --temporal-profile-max-trace-length=0 was set to deliberately remove all
+  // traces. In that case, we do not want to increase the stream size
+  if (SrcTraces.empty())
     return;
-  }
-  // Otherwise, we find the traces that would have been removed if we added
-  // the whole source stream.
-  SmallSetVector<uint64_t, 8> IndicesToReplace;
-  for (uint64_t I = 0; I < SrcStreamSize; I++) {
-    std::uniform_int_distribution<uint64_t> Distribution(
-        0, TemporalProfTraceStreamSize);
+  // Add traces until our reservoir is full or we run out of source traces
+  auto SrcTraceIt = SrcTraces.begin();
+  while (TemporalProfTraces.size() < TemporalProfTraceReservoirSize &&
+         SrcTraceIt < SrcTraces.end())
+    TemporalProfTraces.push_back(*SrcTraceIt++);
+  // Our reservoir is full, we need to sample the source stream
+  llvm::shuffle(SrcTraceIt, SrcTraces.end(), RNG);
+  for (uint64_t I = TemporalProfTraces.size();
+       I < SrcStreamSize && SrcTraceIt < SrcTraces.end(); I++) {
+    std::uniform_int_distribution<uint64_t> Distribution(0, I);
     uint64_t RandomIndex = Distribution(RNG);
     if (RandomIndex < TemporalProfTraces.size())
-      IndicesToReplace.insert(RandomIndex);
-    ++TemporalProfTraceStreamSize;
+      TemporalProfTraces[RandomIndex] = *SrcTraceIt++;
   }
-  // Then we insert a random sample of the source traces.
-  llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);
-  for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))
-    TemporalProfTraces[Index] = std::move(Trace);
+  TemporalProfTraceStreamSize += SrcStreamSize;
 }
 
 void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
diff --git a/llvm/test/tools/llvm-profdata/merge-traces.proftext b/llvm/test/tools/llvm-profdata/merge-traces.proftext
index 57ea16cd72fdb..3512f33cd06a9 100644
--- a/llvm/test/tools/llvm-profdata/merge-traces.proftext
+++ b/llvm/test/tools/llvm-profdata/merge-traces.proftext
@@ -12,15 +12,24 @@
 # RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %t-2.profdata %s %s --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE2
 # RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %t-2.profdata %t-2.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE2
 
+# Test that we can increase the reservoir size, even if inputs are sampled
+# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %s %s %s %s -o %t-4.profdata
+# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=4 %t-4.profdata %t-4.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN8,SAMPLE4
+
+# Test that decreasing the reservoir size truncates traces
+# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=1 %t-4.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE1
+
 # CHECK: :temporal_prof_traces
 # CHECK: # Num Temporal Profile Traces:
 # SAMPLE1: 1
 # SAMPLE2: 2
+# SAMPLE4: 4
 # CHECK: # Temporal Profile Trace Stream Size:
 # SEEN1: 1
 # SEEN2: 2
 # SEEN3: 3
 # SEEN4: 4
+# SEEN8: 8
 # CHECK: a,b,c,
 
 # Header
diff --git a/llvm/test/tools/llvm-profdata/trace-limit.proftext b/llvm/test/tools/llvm-profdata/trace-limit.proftext
index e246ee890ba31..6b4f974add169 100644
--- a/llvm/test/tools/llvm-profdata/trace-limit.proftext
+++ b/llvm/test/tools/llvm-profdata/trace-limit.proftext
@@ -11,7 +11,7 @@
 # RUN: llvm-profdata merge --temporal-profile-max-trace-length=1000 %s -o %t.profdata
 # RUN: llvm-profdata show --temporal-profile-traces %t.profdata | FileCheck %s --check-prefixes=CHECK,ALL
 
-# NONE: Temporal Profile Traces (samples=0
+# NONE: Temporal Profile Traces (samples=0 seen=0):
 # CHECK: Temporal Profile Traces (samples=1 seen=1):
 # SOME:   Trace 0 (weight=1 count=2):
 # ALL:    Trace 0 (weight=1 count=3):



More information about the llvm-commits mailing list