[llvm] 3b884db - [InstrProf] Fix trace reservoir sampling (#152563)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 8 10:21:32 PDT 2025


Author: Ellis Hoag
Date: 2025-08-08T10:21:29-07:00
New Revision: 3b884db6ef458148ddc71e8133b334f8e1794ff9

URL: https://github.com/llvm/llvm-project/commit/3b884db6ef458148ddc71e8133b334f8e1794ff9
DIFF: https://github.com/llvm/llvm-project/commit/3b884db6ef458148ddc71e8133b334f8e1794ff9.diff

LOG: [InstrProf] Fix trace reservoir sampling (#152563)

`InstrProfWriter::addTemporalProfileTraces()` did not correctly account
for when the sources traces are sampled, but the reservoir size is
larger than what it was before, meaning there is room for more traces.
Also, if the reservoir size decreased, meaning traces should be
truncated.

Depends on https://github.com/llvm/llvm-project/pull/152550 for the test
refactor

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/InstrProfWriter.h
    llvm/lib/ProfileData/InstrProfWriter.cpp
    llvm/test/tools/llvm-profdata/merge-traces.proftext
    llvm/test/tools/llvm-profdata/trace-limit.proftext

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index f339fe2c2a9eb..1b24425e68a9e 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -226,8 +226,6 @@ class InstrProfWriter {
   void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
                  uint64_t Weight, function_ref<void(Error)> Warn);
   bool shouldEncodeData(const ProfilingData &PD);
-  /// Add \p Trace using reservoir sampling.
-  void addTemporalProfileTrace(TemporalProfTraceTy Trace);
 
   /// Add a memprof record for a function identified by its \p Id.
   void addMemProfRecord(const GlobalValue::GUID Id,

diff  --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 7ca26aa138012..df807fc02b910 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -331,61 +331,34 @@ void InstrProfWriter::addDataAccessProfData(
   DataAccessProfileData = std::move(DataAccessProfDataIn);
 }
 
-void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
-  assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
-  assert(!Trace.FunctionNameRefs.empty());
-  if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {
-    // Simply append the trace if we have not yet hit our reservoir size limit.
-    TemporalProfTraces.push_back(std::move(Trace));
-  } else {
-    // Otherwise, replace a random trace in the stream.
-    std::uniform_int_distribution<uint64_t> Distribution(
-        0, TemporalProfTraceStreamSize);
-    uint64_t RandomIndex = Distribution(RNG);
-    if (RandomIndex < TemporalProfTraces.size())
-      TemporalProfTraces[RandomIndex] = std::move(Trace);
-  }
-  ++TemporalProfTraceStreamSize;
-}
-
 void InstrProfWriter::addTemporalProfileTraces(
     SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
+  if (TemporalProfTraces.size() > TemporalProfTraceReservoirSize)
+    TemporalProfTraces.truncate(TemporalProfTraceReservoirSize);
   for (auto &Trace : SrcTraces)
     if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
       Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
   llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });
-  // Assume that the source has the same reservoir size as the destination to
-  // avoid needing to record it in the indexed profile format.
-  bool IsDestSampled =
-      (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);
-  bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);
-  if (!IsDestSampled && IsSrcSampled) {
-    // If one of the traces are sampled, ensure that it belongs to Dest.
-    std::swap(TemporalProfTraces, SrcTraces);
-    std::swap(TemporalProfTraceStreamSize, SrcStreamSize);
-    std::swap(IsDestSampled, IsSrcSampled);
-  }
-  if (!IsSrcSampled) {
-    // If the source stream is not sampled, we add each source trace normally.
-    for (auto &Trace : SrcTraces)
-      addTemporalProfileTrace(std::move(Trace));
+  // If there are no source traces, it is probably because
+  // --temporal-profile-max-trace-length=0 was set to deliberately remove all
+  // traces. In that case, we do not want to increase the stream size
+  if (SrcTraces.empty())
     return;
-  }
-  // Otherwise, we find the traces that would have been removed if we added
-  // the whole source stream.
-  SmallSetVector<uint64_t, 8> IndicesToReplace;
-  for (uint64_t I = 0; I < SrcStreamSize; I++) {
-    std::uniform_int_distribution<uint64_t> Distribution(
-        0, TemporalProfTraceStreamSize);
+  // Add traces until our reservoir is full or we run out of source traces
+  auto SrcTraceIt = SrcTraces.begin();
+  while (TemporalProfTraces.size() < TemporalProfTraceReservoirSize &&
+         SrcTraceIt < SrcTraces.end())
+    TemporalProfTraces.push_back(*SrcTraceIt++);
+  // Our reservoir is full, we need to sample the source stream
+  llvm::shuffle(SrcTraceIt, SrcTraces.end(), RNG);
+  for (uint64_t I = TemporalProfTraces.size();
+       I < SrcStreamSize && SrcTraceIt < SrcTraces.end(); I++) {
+    std::uniform_int_distribution<uint64_t> Distribution(0, I);
     uint64_t RandomIndex = Distribution(RNG);
     if (RandomIndex < TemporalProfTraces.size())
-      IndicesToReplace.insert(RandomIndex);
-    ++TemporalProfTraceStreamSize;
+      TemporalProfTraces[RandomIndex] = *SrcTraceIt++;
   }
-  // Then we insert a random sample of the source traces.
-  llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);
-  for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))
-    TemporalProfTraces[Index] = std::move(Trace);
+  TemporalProfTraceStreamSize += SrcStreamSize;
 }
 
 void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,

diff  --git a/llvm/test/tools/llvm-profdata/merge-traces.proftext b/llvm/test/tools/llvm-profdata/merge-traces.proftext
index 57ea16cd72fdb..3512f33cd06a9 100644
--- a/llvm/test/tools/llvm-profdata/merge-traces.proftext
+++ b/llvm/test/tools/llvm-profdata/merge-traces.proftext
@@ -12,15 +12,24 @@
 # RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %t-2.profdata %s %s --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE2
 # RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %t-2.profdata %t-2.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE2
 
+# Test that we can increase the reservoir size, even if inputs are sampled
+# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %s %s %s %s -o %t-4.profdata
+# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=4 %t-4.profdata %t-4.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN8,SAMPLE4
+
+# Test that decreasing the reservoir size truncates traces
+# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=1 %t-4.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE1
+
 # CHECK: :temporal_prof_traces
 # CHECK: # Num Temporal Profile Traces:
 # SAMPLE1: 1
 # SAMPLE2: 2
+# SAMPLE4: 4
 # CHECK: # Temporal Profile Trace Stream Size:
 # SEEN1: 1
 # SEEN2: 2
 # SEEN3: 3
 # SEEN4: 4
+# SEEN8: 8
 # CHECK: a,b,c,
 
 # Header

diff  --git a/llvm/test/tools/llvm-profdata/trace-limit.proftext b/llvm/test/tools/llvm-profdata/trace-limit.proftext
index e246ee890ba31..6b4f974add169 100644
--- a/llvm/test/tools/llvm-profdata/trace-limit.proftext
+++ b/llvm/test/tools/llvm-profdata/trace-limit.proftext
@@ -11,7 +11,7 @@
 # RUN: llvm-profdata merge --temporal-profile-max-trace-length=1000 %s -o %t.profdata
 # RUN: llvm-profdata show --temporal-profile-traces %t.profdata | FileCheck %s --check-prefixes=CHECK,ALL
 
-# NONE: Temporal Profile Traces (samples=0
+# NONE: Temporal Profile Traces (samples=0 seen=0):
 # CHECK: Temporal Profile Traces (samples=1 seen=1):
 # SOME:   Trace 0 (weight=1 count=2):
 # ALL:    Trace 0 (weight=1 count=3):


        


More information about the llvm-commits mailing list