[llvm] 8a0406d - [llvm-profgen] Filter out invalid LBR ranges.

Thu Apr 7 21:42:09 PDT 2022

Author: Hongtao Yu
Date: 2022-04-07T21:42:01-07:00
New Revision: 8a0406dcc8ec601e5612638a9d5ca049637c9366

URL: https://github.com/llvm/llvm-project/commit/8a0406dcc8ec601e5612638a9d5ca049637c9366
DIFF: https://github.com/llvm/llvm-project/commit/8a0406dcc8ec601e5612638a9d5ca049637c9366.diff

LOG: [llvm-profgen] Filter out invalid LBR ranges.

The profiler can sometimes give us a LBR trace that implicates bogus code ranges. For example,

    0xc5acb56/0xc66c6c0 0xc628195/0xf31fbb0 0xc611261/0xc628130 0xc5c1a21/0xc6111c0 0x1f7edfd3/0xc5c3a50 0xc5c154f/0x1f7edec0 0xe8eed07/0xc5c11e0

, note that the first two pairs are supposed to form a linear execution range, in this case, it is [0xf31fbb0, 0xc5acb56] , which doesn't make sense.

Such bogus ranges should be ruled out to avoid generating a bad profile. I'm fixing this for both CS and non-CS cases.

Reviewed By: wenlei

Differential Revision: https://reviews.llvm.org/D123271

Added: 
    llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
    llvm/test/tools/llvm-profgen/invalid-range.test

Modified: 
    llvm/tools/llvm-profgen/PerfReader.cpp
    llvm/tools/llvm-profgen/PerfReader.h

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
new file mode 100644
index 0000000000000..d6f855301b7ae

--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
@@ -0,0 +1,10 @@
+ PERF_RECORD_MMAP2 1243676/1243676: [0x201000(0x1000) @ 0 00:1d 224517108 1044165]: r-xp /home/noinline-cs-pseudoprobe.perfbin
+
+	          20179e
+	          2017f9
+	    7f83e84e7793
+	5541f689495641d7
+ 0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017d8/0x2017e3/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0
+
+
+// The consecutive pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf], should be ignored to avoid bogus instruction ranges.

diff  --git a/llvm/test/tools/llvm-profgen/invalid-range.test b/llvm/test/tools/llvm-profgen/invalid-range.test
new file mode 100644
index 0000000000000..6aa0ef1df941a
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/invalid-range.test
@@ -0,0 +1,68 @@
+; In the perfscript input, the consecutive branch pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf].
+; We are testing only the invalid range is dropped to avoid bogus instruction ranges. All other ranges and all branch samples should be kept.
+;
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/invalid-range.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t1 --skip-symbolization --ignore-stack-samples --use-offset=0
+; RUN: FileCheck %s --input-file %t1 --check-prefix=NOCS
+
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/invalid-range.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t2 --skip-symbolization --use-offset=0
+; RUN: FileCheck %s --input-file %t2 --check-prefix=CS
+
+
+; NOCS:      4
+; NOCS-NEXT: 201760-20177f:2
+; NOCS-NEXT: 20179e-2017bf:1
+; NOCS-NEXT: 2017c4-2017cf:1
+; NOCS-NEXT: 2017c4-2017d8:1
+; NOCS-NEXT: 4
+; NOCS-NEXT: 20177f->2017c4:2
+; NOCS-NEXT: 2017bf->201760:2
+; NOCS-NEXT: 2017cf->20179e:2
+; NOCS-NEXT: 2017d8->2017e3:1
+
+
+; CS:      []
+; CS-NEXT:  3
+; CS-NEXT:  201760-20177f:1
+; CS-NEXT:  20179e-2017bf:1
+; CS-NEXT:  2017c4-2017d8:1
+; CS-NEXT:  4
+; CS-NEXT:  20177f->2017c4:1
+; CS-NEXT:  2017bf->201760:1
+; CS-NEXT:  2017cf->20179e:1
+; CS-NEXT:  2017d8->2017e3:1
+; CS-NEXT: [0x7f4]
+; CS-NEXT:  1
+; CS-NEXT:  2017c4-2017cf:1
+; CS-NEXT:  2
+; CS-NEXT:  2017bf->201760:1
+; CS-NEXT:  2017cf->20179e:1
+; CS-NEXT: [0x7f4 @ 0x7bf]
+; CS-NEXT:  1
+; CS-NEXT:  201760-20177f:1
+; CS-NEXT:  1
+; CS-NEXT:  20177f->2017c4:1
+
+; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
+; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
+; -g test.c  -o a.out
+
+#include <stdio.h>
+
+int bar(int x, int y) {
+  if (x % 3) {
+    return x - y;
+  }
+  return x + y;
+}
+
+void foo() {
+  int s, i = 0;
+  while (i++ < 4000 * 4000)
+    if (i % 91) s = bar(i, s); else s += 30;
+  printf("sum is %d\n", s);
+}
+
+int main() {
+  foo();
+  return 0;
+}

diff  --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 02f653e68e899..ec4362c856747 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -100,6 +100,11 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
   InstructionPointer &IP = State.InstPtr;
   uint64_t Target = State.getCurrentLBRTarget();
   uint64_t End = IP.Address;
+  if (Target > End) {
+    // Skip unwinding the rest of LBR trace when a bogus range is seen.
+    State.setInvalid();
+    return;
+  }
   if (Binary->usePseudoProbes()) {
     // We don't need to top frame probe since it should be extracted
     // from the range.
@@ -303,19 +308,28 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
       // extra frame when processing the return paired with this call.
       unwindCall(State);
     } else if (isReturnState(State)) {
-      // Unwind returns - check whether the IP is indeed at a return instruction
+      // Unwind returns - check whether the IP is indeed at a return
+      // instruction
       unwindReturn(State);
-    } else {
+    } else if (isValidState(State)) {
       // Unwind branches
-      // For regular intra function branches, we only need to record branch with
-      // context. For an artificial branch cross function boundaries, we got an
-      // issue with returning to external code. Take the two LBR enties for
-      // example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf reader,
-      // we only get[foo:8(RETURN), bar:1], unwinder will be confused like foo
-      // return to bar. Here we detect and treat this case as BRANCH instead of
-      // RETURN which only update the source address.
+      // For regular intra function branches, we only need to record branch
+      // with context. For an artificial branch cross function boundaries, we
+      // got an issue with returning to external code. Take the two LBR enties
+      // for example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf
+      // reader, we only get[foo:8(RETURN), bar:1], unwinder will be confused
+      // like foo return to bar. Here we detect and treat this case as BRANCH
+      // instead of RETURN which only update the source address.
       unwindBranch(State);
+    } else {
+      // Skip unwinding the rest of LBR trace. Reset the stack and update the
+      // state so that the rest of the trace can still be processed as if they
+      // do not have stack samples.
+      State.clearCallStack();
+      State.InstPtr.update(State.getCurrentLBRSource());
+      State.pushFrame(State.InstPtr.Address);
     }
+
     State.advanceLBR();
     // Record `branch` with calling context after unwinding.
     recordBranchCount(Branch, State, Repeat);
@@ -720,7 +734,9 @@ void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
   //          ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
   //
   std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
-
+#ifndef NDEBUG
+  Sample->Linenum = TraceIt.getLineNumber();
+#endif
   // Parsing call stack and populate into PerfSample.CallStack
   if (!extractCallstack(TraceIt, Sample->CallStack)) {
     // Skip the next LBR line matched current call stack
@@ -915,8 +931,10 @@ void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
     // If this not the first LBR, update the range count between TO of current
     // LBR and FROM of next LBR.
     uint64_t StartOffset = TargetOffset;
-    if (EndOffeset != 0)
-      Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
+    if (EndOffeset != 0) {
+      if (StartOffset <= EndOffeset)
+        Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
+    }
     EndOffeset = SourceOffset;
   }
 }
@@ -1161,41 +1179,55 @@ void PerfScriptReader::warnInvalidRange() {
   const char *RangeCrossFuncMsg =
       "Fall through range should not cross function boundaries, likely due to "
       "profile and binary mismatch.";
+  const char *BogusRangeMsg = "Range start is after range end.";
 
+  uint64_t TotalRangeNum = 0;
   uint64_t InstNotBoundary = 0;
   uint64_t UnmatchedRange = 0;
   uint64_t RangeCrossFunc = 0;
+  uint64_t BogusRange = 0;
 
   for (auto &I : Ranges) {
     uint64_t StartOffset = I.first.first;
     uint64_t EndOffset = I.first.second;
+    TotalRangeNum += I.second;
 
     if (!Binary->offsetIsCode(StartOffset) ||
         !Binary->offsetIsTransfer(EndOffset)) {
-      InstNotBoundary++;
+      InstNotBoundary += I.second;
       WarnInvalidRange(StartOffset, EndOffset, EndNotBoundaryMsg);
     }
 
     auto *FRange = Binary->findFuncRangeForOffset(StartOffset);
     if (!FRange) {
-      UnmatchedRange++;
+      UnmatchedRange += I.second;
       WarnInvalidRange(StartOffset, EndOffset, DanglingRangeMsg);
       continue;
     }
 
     if (EndOffset >= FRange->EndOffset) {
-      RangeCrossFunc++;
+      RangeCrossFunc += I.second;
       WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg);
     }
+
+    if (StartOffset > EndOffset) {
+      BogusRange += I.second;
+      WarnInvalidRange(StartOffset, EndOffset, BogusRangeMsg);
+    }
   }
 
-  uint64_t TotalRangeNum = Ranges.size();
-  emitWarningSummary(InstNotBoundary, TotalRangeNum,
-                     "of profiled ranges are not on instruction boundary.");
-  emitWarningSummary(UnmatchedRange, TotalRangeNum,
-                     "of profiled ranges do not belong to any functions.");
-  emitWarningSummary(RangeCrossFunc, TotalRangeNum,
-                     "of profiled ranges do cross function boundaries.");
+  emitWarningSummary(
+      InstNotBoundary, TotalRangeNum,
+      "of samples are from ranges that are not on instruction boundary.");
+  emitWarningSummary(
+      UnmatchedRange, TotalRangeNum,
+      "of samples are from ranges that do not belong to any functions.");
+  emitWarningSummary(
+      RangeCrossFunc, TotalRangeNum,
+      "of samples are from ranges that do cross function boundaries.");
+  emitWarningSummary(
+      BogusRange, TotalRangeNum,
+      "of samples are from ranges that have range start after range end.");
 }
 
 void PerfScriptReader::parsePerfTraces() {

diff  --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index 021fb06fcbf43..052d1b4b3e8fa 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -197,7 +197,10 @@ struct PerfSample {
   }
 
 #ifndef NDEBUG
+  uint64_t Linenum = 0;
+
   void print() const {
+    dbgs() << "Line " << Linenum << "\n";
     dbgs() << "LBR stack\n";
     printLBRStack(LBRStack);
     dbgs() << "Call stack\n";
@@ -255,6 +258,9 @@ struct UnwindState {
   const SmallVector<LBREntry, 16> &LBRStack;
   // Used to iterate the address range
   InstructionPointer InstPtr;
+  // Indicate whether unwinding is currently in a bad state which requires to
+  // skip all subsequent unwinding.
+  bool Invalid = false;
   UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
       : Binary(Binary), LBRStack(Sample->LBRStack),
         InstPtr(Binary, Sample->CallStack.front()) {
@@ -284,6 +290,7 @@ struct UnwindState {
            "IP should align with context leaf");
   }
 
+  void setInvalid() { Invalid = true; }
   bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
   uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
   uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
@@ -476,10 +483,14 @@ class VirtualUnwinder {
   bool isCallState(UnwindState &State) const {
     // The tail call frame is always missing here in stack sample, we will
     // use a specific tail call tracker to infer it.
-    return Binary->addressIsCall(State.getCurrentLBRSource());
+    return isValidState(State) &&
+           Binary->addressIsCall(State.getCurrentLBRSource());
   }
 
   bool isReturnState(UnwindState &State) const {
+    if (!isValidState(State))
+      return false;
+
     // Simply check addressIsReturn, as ret is always reliable, both for
     // regular call and tail call.
     if (!Binary->addressIsReturn(State.getCurrentLBRSource()))
@@ -497,6 +508,8 @@ class VirtualUnwinder {
     return (CallAddr != 0);
   }
 
+  bool isValidState(UnwindState &State) const { return !State.Invalid; }
+
   void unwindCall(UnwindState &State);
   void unwindLinear(UnwindState &State, uint64_t Repeat);
   void unwindReturn(UnwindState &State);