[llvm] 55356c0 - [CSSPGO][llvm-profgen] Continue disassembling after illegal instruction is seen.

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 3 10:14:19 PST 2021


Author: Hongtao Yu
Date: 2021-03-03T10:14:10-08:00
New Revision: 55356c011b60569668f60a89ddbe2ed76787b313

URL: https://github.com/llvm/llvm-project/commit/55356c011b60569668f60a89ddbe2ed76787b313
DIFF: https://github.com/llvm/llvm-project/commit/55356c011b60569668f60a89ddbe2ed76787b313.diff

LOG: [CSSPGO][llvm-profgen] Continue disassembling after illegal instruction is seen.

Previously we errored out when disassembling illegal instructions and there would be no profile generated. In fact illegal instructions are not uncommon and we'd better skip them and print "unknown" instead of erroring out. This matches the behavior of llvm-objdump (see disassembleObject in llvm-objdump.cpp).

Reviewed By: wlei, wenlei

Differential Revision: https://reviews.llvm.org/D97776

Added: 
    

Modified: 
    llvm/tools/llvm-profgen/ProfiledBinary.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index df6ef2a7699b..4c252943c1ac 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -217,14 +217,25 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
   if (ShowDisassemblyOnly)
     outs() << '<' << SymbolName << ">:\n";
 
+  auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
+    WithColor::warning() << "Invalid instructions at "
+                         << format("%8" PRIx64, Start) << " - "
+                         << format("%8" PRIx64, End) << "\n";
+  };
+
   uint64_t Offset = StartOffset;
+  // Size of a consecutive invalid instruction range starting from Offset -1
+  // backwards.
+  uint64_t InvalidInstLength = 0;
   while (Offset < EndOffset) {
     MCInst Inst;
     uint64_t Size;
     // Disassemble an instruction.
-    if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
-                                Offset + PreferredBaseAddress, nulls()))
-      return false;
+    bool Disassembled =
+        DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
+                               Offset + PreferredBaseAddress, nulls());
+    if (Size == 0)
+      Size = 1;
 
     if (ShowDisassemblyOnly) {
       if (ShowPseudoProbe) {
@@ -233,38 +244,52 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
       }
       outs() << format("%8" PRIx64 ":", Offset);
       size_t Start = outs().tell();
-      IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
+      if (Disassembled)
+        IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
+      else
+        outs() << "\t<unknown>";
       if (ShowSourceLocations) {
         unsigned Cur = outs().tell() - Start;
         if (Cur < 40)
           outs().indent(40 - Cur);
-        InstructionPointer Inst(this, Offset);
-        outs() << getReversedLocWithContext(symbolize(Inst));
+        InstructionPointer IP(this, Offset);
+        outs() << getReversedLocWithContext(symbolize(IP));
       }
       outs() << "\n";
     }
 
-    const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
-
-    // Populate a vector of the symbolized callsite at this location
-    // We don't need symbolized info for probe-based profile, just use an empty
-    // stack as an entry to indicate a valid binary offset
-    FrameLocationStack SymbolizedCallStack;
-    if (!UsePseudoProbes) {
-      InstructionPointer IP(this, Offset);
-      SymbolizedCallStack = symbolize(IP, true);
+    if (Disassembled) {
+      const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
+      // Populate a vector of the symbolized callsite at this location
+      // We don't need symbolized info for probe-based profile, just use an
+      // empty stack as an entry to indicate a valid binary offset
+      FrameLocationStack SymbolizedCallStack;
+      if (!UsePseudoProbes) {
+        InstructionPointer IP(this, Offset);
+        SymbolizedCallStack = symbolize(IP, true);
+      }
+      Offset2LocStackMap[Offset] = SymbolizedCallStack;
+      // Populate address maps.
+      CodeAddrs.push_back(Offset);
+      if (MCDesc.isCall())
+        CallAddrs.insert(Offset);
+      else if (MCDesc.isReturn())
+        RetAddrs.insert(Offset);
+
+      if (InvalidInstLength) {
+        WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
+        InvalidInstLength = 0;
+      }
+    } else {
+      InvalidInstLength += Size;
     }
-    Offset2LocStackMap[Offset] = SymbolizedCallStack;
-    // Populate address maps.
-    CodeAddrs.push_back(Offset);
-    if (MCDesc.isCall())
-      CallAddrs.insert(Offset);
-    else if (MCDesc.isReturn())
-      RetAddrs.insert(Offset);
 
     Offset += Size;
   }
 
+  if (InvalidInstLength)
+    WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
+
   if (ShowDisassemblyOnly)
     outs() << "\n";
 


        


More information about the llvm-commits mailing list