[llvm] 5bf191a - [llvm-profgen] Fix index out of bounds error while using ip.advance

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 5 18:39:50 PDT 2021


Author: wlei
Date: 2021-11-05T18:38:40-07:00
New Revision: 5bf191a381bba84edc791710e914147de23691dd

URL: https://github.com/llvm/llvm-project/commit/5bf191a381bba84edc791710e914147de23691dd
DIFF: https://github.com/llvm/llvm-project/commit/5bf191a381bba84edc791710e914147de23691dd.diff

LOG: [llvm-profgen] Fix index out of bounds error while using ip.advance

Previously we assume there're some non-executing sections at the bottom of the text section so that we won't hit the array's bound. But on BOLTed binary, it turned out .bolt section is at the bottom of text section which can be profiled, then it crash llvm-profgen. This change try to fix it.

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D113238

Added: 
    llvm/test/tools/llvm-profgen/Inputs/out-of-bounds.raw.prof

Modified: 
    llvm/test/tools/llvm-profgen/inline-noprobe.test
    llvm/tools/llvm-profgen/ProfileGenerator.cpp
    llvm/tools/llvm-profgen/ProfiledBinary.cpp
    llvm/tools/llvm-profgen/ProfiledBinary.h

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-profgen/Inputs/out-of-bounds.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/out-of-bounds.raw.prof
new file mode 100644
index 0000000000000..b7c477f2eee62
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/out-of-bounds.raw.prof
@@ -0,0 +1,5 @@
+3
+0-0:1
+f-fff0:1
+ffff-ffff:1
+0

diff  --git a/llvm/test/tools/llvm-profgen/inline-noprobe.test b/llvm/test/tools/llvm-profgen/inline-noprobe.test
index 6ae2726b5a6aa..9d1473e097417 100644
--- a/llvm/test/tools/llvm-profgen/inline-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/inline-noprobe.test
@@ -9,6 +9,8 @@
 ; RUN: echo -e "0\n0" > %t
 ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 --fill-zero-for-all-funcs
 ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-ALL-ZERO
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/out-of-bounds.raw.prof --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1
+; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-OB
 
 CHECK: main:188:0
 CHECK:  0: 0
@@ -58,6 +60,33 @@ CHECK-RAW-PROFILE-NEXT: 2
 CHECK-RAW-PROFILE-NEXT: 677->650:21
 CHECK-RAW-PROFILE-NEXT: 691->669:43
 
+;CHECK-OB: foo:8:0
+;CHECK-OB:  0: 1
+;CHECK-OB:  2.1: 1
+;CHECK-OB:  3: 1
+;CHECK-OB:  3.2: 1
+;CHECK-OB:  4: 1
+;CHECK-OB:  3.1: bar:1
+;CHECK-OB:   1: 1
+;CHECK-OB:  3.2: bar:2
+;CHECK-OB:   1: 1
+;CHECK-OB:   7: 1
+;CHECK-OB: main:8:0
+;CHECK-OB:  0: 1
+;CHECK-OB:  2: 1
+;CHECK-OB:  1: foo:6
+;CHECK-OB:   2.1: 1
+;CHECK-OB:   3: 1
+;CHECK-OB:   3.2: 1
+;CHECK-OB:   4: 1
+;CHECK-OB:   3.1: bar:1
+;CHECK-OB:    1: 1
+;CHECK-OB:   3.2: bar:1
+;CHECK-OB:    1: 1
+;CHECK-OB: bar:2:0
+;CHECK-OB:  1: 1
+;CHECK-OB:  5: 1
+
 ; original code:
 ; clang -O3 -g -fdebug-info-for-profiling test.c -o a.out
 #include <stdio.h>

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 4c7a4a644e2f4..d0c074355629b 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -355,7 +355,7 @@ ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
   if (FillZeroForAllFuncs) {
     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
       for (auto &R : FuncI.second.Ranges) {
-        Ranges[{R.first, R.second}] += 0;
+        Ranges[{R.first, R.second - 1}] += 0;
       }
     }
   } else {
@@ -385,7 +385,10 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
     // Disjoint ranges may have range in the middle of two instr,
     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
     // can be Addr1+1 to Addr2-1. We should ignore such range.
-    while (IP.Address <= RangeEnd) {
+    if (IP.Address > RangeEnd)
+      continue;
+
+    do {
       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
       const SampleContextFrameVector &FrameVec =
           Binary->getFrameLocationStack(Offset);
@@ -394,9 +397,7 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
                                             Count);
       }
-      // Move to next IP within the range.
-      IP.advance();
-    }
+    } while (IP.advance() && IP.Address <= RangeEnd);
   }
 }
 
@@ -538,17 +539,17 @@ void CSProfileGenerator::populateBodySamplesForFunction(
     // Disjoint ranges may have range in the middle of two instr,
     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
     // can be Addr1+1 to Addr2-1. We should ignore such range.
-    while (IP.Address <= RangeEnd) {
+    if (IP.Address > RangeEnd)
+      continue;
+
+    do {
       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
       if (LeafLoc.hasValue()) {
         // Recording body sample for this specific context
         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
       }
-
-      // Move to next IP within the range
-      IP.advance();
-    }
+    } while (IP.advance() && IP.Address <= RangeEnd);
   }
 }
 
@@ -714,14 +715,13 @@ void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
       continue;
 
     InstructionPointer IP(Binary, RangeBegin, true);
-
     // Disjoint ranges may have range in the middle of two instr,
     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
     // can be Addr1+1 to Addr2-1. We should ignore such range.
     if (IP.Address > RangeEnd)
       continue;
 
-    while (IP.Address <= RangeEnd) {
+    do {
       const AddressProbesMap &Address2ProbesMap =
           Binary->getAddress2ProbesMap();
       auto It = Address2ProbesMap.find(IP.Address);
@@ -732,9 +732,7 @@ void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter,
           ProbeCounter[&Probe] += Count;
         }
       }
-
-      IP.advance();
-    }
+    } while (IP.advance() && IP.Address <= RangeEnd);
   }
 }
 

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index cec28626291ef..4fe737eb96c68 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -657,13 +657,19 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
 
 void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset,
                                                        uint64_t EndOffset) {
-  uint32_t Index = getIndexForOffset(StartOffset);
-  if (CodeAddrOffsets[Index] != StartOffset)
+  uint64_t RangeBegin = offsetToVirtualAddr(StartOffset);
+  uint64_t RangeEnd = offsetToVirtualAddr(EndOffset);
+  InstructionPointer IP(this, RangeBegin, true);
+
+  if (IP.Address != RangeBegin)
     WithColor::warning() << "Invalid start instruction at "
-                         << format("%8" PRIx64, StartOffset) << "\n";
+                         << format("%8" PRIx64, RangeBegin) << "\n";
+
+  if (IP.Address >= RangeEnd)
+    return;
 
-  uint64_t Offset = CodeAddrOffsets[Index];
-  while (Offset < EndOffset) {
+  do {
+    uint64_t Offset = virtualAddrToOffset(IP.Address);
     const SampleContextFrameVector &SymbolizedCallStack =
         getFrameLocationStack(Offset, UsePseudoProbes);
     uint64_t Size = Offset2InstSizeMap[Offset];
@@ -671,8 +677,7 @@ void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset,
     // Record instruction size for the corresponding context
     FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
 
-    Offset = CodeAddrOffsets[++Index];
-  }
+  } while (IP.advance() && IP.Address < RangeEnd);
 }
 
 InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
@@ -682,18 +687,31 @@ InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
   if (RoundToNext) {
     // we might get address which is not the code
     // it should round to the next valid address
-    this->Address = Binary->getAddressforIndex(Index);
+    if (Index >= Binary->getCodeOffsetsSize())
+      this->Address = UINT64_MAX;
+    else
+      this->Address = Binary->getAddressforIndex(Index);
   }
 }
 
-void InstructionPointer::advance() {
+bool InstructionPointer::advance() {
   Index++;
+  if (Index >= Binary->getCodeOffsetsSize()) {
+    Address = UINT64_MAX;
+    return false;
+  }
   Address = Binary->getAddressforIndex(Index);
+  return true;
 }
 
-void InstructionPointer::backward() {
+bool InstructionPointer::backward() {
+  if (Index == 0) {
+    Address = 0;
+    return false;
+  }
   Index--;
   Address = Binary->getAddressforIndex(Index);
+  return true;
 }
 
 void InstructionPointer::update(uint64_t Addr) {

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index b810a611a01a8..11a8f9b343cc4 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -64,8 +64,8 @@ struct InstructionPointer {
   uint64_t Index = 0;
   InstructionPointer(const ProfiledBinary *Binary, uint64_t Address,
                      bool RoundToNext = false);
-  void advance();
-  void backward();
+  bool advance();
+  bool backward();
   void update(uint64_t Addr);
 };
 
@@ -73,6 +73,7 @@ using RangesTy = std::vector<std::pair<uint64_t, uint64_t>>;
 
 struct BinaryFunction {
   StringRef FuncName;
+  // End of range is an exclusive bound.
   RangesTy Ranges;
 };
 
@@ -80,7 +81,7 @@ struct BinaryFunction {
 // non-continuous ranges, each range corresponds to one FuncRange.
 struct FuncRange {
   uint64_t StartOffset;
-  // EndOffset is a exclusive bound.
+  // EndOffset is an exclusive bound.
   uint64_t EndOffset;
   // Function the range belongs to
   BinaryFunction *Func;
@@ -105,7 +106,8 @@ struct PrologEpilogTracker {
     for (auto I : FuncStartOffsetMap) {
       PrologEpilogSet.insert(I.first);
       InstructionPointer IP(Binary, I.first);
-      IP.advance();
+      if (!IP.advance())
+        break;
       PrologEpilogSet.insert(IP.Offset);
     }
   }
@@ -115,7 +117,8 @@ struct PrologEpilogTracker {
     for (auto Addr : RetAddrs) {
       PrologEpilogSet.insert(Addr);
       InstructionPointer IP(Binary, Addr);
-      IP.backward();
+      if (!IP.backward())
+        break;
       PrologEpilogSet.insert(IP.Offset);
     }
   }
@@ -336,6 +339,8 @@ class ProfiledBinary {
     return offsetToVirtualAddr(CodeAddrOffsets[Index]);
   }
 
+  size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); }
+
   bool usePseudoProbes() const { return UsePseudoProbes; }
   // Get the index in CodeAddrOffsets for the address
   // As we might get an address which is not the code


        


More information about the llvm-commits mailing list