[llvm] 0196b45 - [CSSPGO][llvm-profgen] Instruction symbolization

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 20 14:27:06 PST 2020


Author: wlei
Date: 2020-11-20T14:26:27-08:00
New Revision: 0196b45ceaf8784eae058e6af4fd943f16a2d071

URL: https://github.com/llvm/llvm-project/commit/0196b45ceaf8784eae058e6af4fd943f16a2d071
DIFF: https://github.com/llvm/llvm-project/commit/0196b45ceaf8784eae058e6af4fd943f16a2d071.diff

LOG: [CSSPGO][llvm-profgen] Instruction symbolization

This stack of changes introduces `llvm-profgen` utility which generates a profile data file from given perf script data files for sample-based PGO. It’s part of(not only) the CSSPGO work. Specifically to support context-sensitive with/without pseudo probe profile, it implements a series of functionalities including perf trace parsing, instruction symbolization, LBR stack/call frame stack unwinding, pseudo probe decoding, etc. Also high throughput is achieved by multiple levels of sample aggregation and compatible format with one stop is generated at the end. Please refer to: https://groups.google.com/g/llvm-dev/c/1p1rdYbL93s for the CSSPGO RFC.

This change adds the support of instruction symbolization. Given the RVA on an instruction pointer, a full calling context can be printed side-by-side with the disassembly code.
E.g.
```
 Disassembly of section .text [0x0, 0x4a]:

 <funcA>:
     0:	mov	eax, edi                           funcA:0
     2:	mov	ecx, dword ptr [rip]               funcLeaf:2 @ funcA:1
     8:	lea	edx, [rcx + 3]                     fib:2 @ funcLeaf:2 @ funcA:1
     b:	cmp	ecx, 3                             fib:2 @ funcLeaf:2 @ funcA:1
     e:	cmovl	edx, ecx                           fib:2 @ funcLeaf:2 @ funcA:1
    11:	sub	eax, edx                           funcLeaf:2 @ funcA:1
    13:	ret                                        funcA:2
    14:	nop	word ptr cs:[rax + rax]
    1e:	nop

 <funcLeaf>:
    20:	mov	eax, edi                           funcLeaf:1
    22:	mov	ecx, dword ptr [rip]               funcLeaf:2
    28:	lea	edx, [rcx + 3]                     fib:2 @ funcLeaf:2
    2b:	cmp	ecx, 3                             fib:2 @ funcLeaf:2
    2e:	cmovl	edx, ecx                           fib:2 @ funcLeaf:2
    31:	sub	eax, edx                           funcLeaf:2
    33:	ret                                        funcLeaf:3
    34:	nop	word ptr cs:[rax + rax]
    3e:	nop

 <fib>:
    40:	lea	eax, [rdi + 3]                     fib:2
    43:	cmp	edi, 3                             fib:2
    46:	cmovl	eax, edi                           fib:2
    49:	ret                                        fib:8
```

Test Plan:
ninja check-llvm

Reviewed By: wenlei, wmi

Differential Revision: https://reviews.llvm.org/D89715

Added: 
    llvm/test/tools/llvm-profgen/symbolize.ll
    llvm/tools/llvm-profgen/CallContext.h

Modified: 
    llvm/tools/llvm-profgen/CMakeLists.txt
    llvm/tools/llvm-profgen/ProfiledBinary.cpp
    llvm/tools/llvm-profgen/ProfiledBinary.h

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-profgen/symbolize.ll b/llvm/test/tools/llvm-profgen/symbolize.ll
new file mode 100644
index 000000000000..2fbc59e3d00d
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/symbolize.ll
@@ -0,0 +1,126 @@
+; REQUIRES: x86-registered-target
+; RUN: llc -filetype=obj %s -o %t
+; RUN: llvm-profgen --binary=%t --perfscript=%s --output=%t1 --show-disassembly -x86-asm-syntax=intel --show-source-locations | FileCheck %s --match-full-lines
+
+; CHECK: Disassembly of section .text [0x0, 0x4a]:
+; CHECK: <funcA>:
+; CHECK:        0:	mov	eax, edi                         funcA:0
+; CHECK:        2:	mov	ecx, dword ptr [rip]             funcLeaf:2 @ funcA:1
+; CHECK:        8:	lea	edx, [rcx + 3]                   fib:2 @ funcLeaf:2 @ funcA:1
+; CHECK:        b:	cmp	ecx, 3                           fib:2 @ funcLeaf:2 @ funcA:1
+; CHECK:        e:	cmovl	edx, ecx                       fib:2 @ funcLeaf:2 @ funcA:1
+; CHECK:       11:	sub	eax, edx                         funcLeaf:2 @ funcA:1
+; CHECK:       13:	ret                                  funcA:2
+; CHECK:       14:	nop	word ptr cs:[rax + rax]
+; CHECK:       1e:	nop
+; CHECK: <funcLeaf>:
+; CHECK:      20:	mov	eax, edi                           funcLeaf:1
+; CHECK:      22:	mov	ecx, dword ptr [rip]               funcLeaf:2
+; CHECK:      28:	lea	edx, [rcx + 3]                     fib:2 @ funcLeaf:2
+; CHECK:      2b:	cmp	ecx, 3                             fib:2 @ funcLeaf:2
+; CHECK:      2e:	cmovl	edx, ecx                         fib:2 @ funcLeaf:2
+; CHECK:      31:	sub	eax, edx                           funcLeaf:2
+; CHECK:      33:	ret                                    funcLeaf:3
+; CHECK:      34:	nop	word ptr cs:[rax + rax]
+; CHECK:      3e:	nop
+; CHECK: <fib>:
+; CHECK:      40:	lea	eax, [rdi + 3]                     fib:2
+; CHECK:      43:	cmp	edi, 3                             fib:2
+; CHECK:      46:	cmovl	eax, edi                         fib:2
+; CHECK:      49:	ret                                    fib:8
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at factor = dso_local global i32 3
+
+define dso_local i32 @funcA(i32 %x) !dbg !12 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %x, metadata !16, metadata !DIExpression()), !dbg !18
+  call void @llvm.dbg.value(metadata i32 %x, metadata !19, metadata !DIExpression()), !dbg !22
+  %0 = load volatile i32, i32* @factor, align 4, !dbg !24, !tbaa !25
+  call void @llvm.dbg.value(metadata i32 %0, metadata !29, metadata !DIExpression()), !dbg !32
+  %cmp.i.i = icmp slt i32 %0, 3, !dbg !34
+  %add.i.i = add nsw i32 %0, 3, !dbg !36
+  %retval.0.i.i = select i1 %cmp.i.i, i32 %0, i32 %add.i.i, !dbg !36
+  %sub.i = sub nsw i32 %x, %retval.0.i.i, !dbg !37
+  call void @llvm.dbg.value(metadata i32 %sub.i, metadata !19, metadata !DIExpression()), !dbg !22
+  call void @llvm.dbg.value(metadata i32 %sub.i, metadata !17, metadata !DIExpression()), !dbg !18
+  ret i32 %sub.i, !dbg !38
+}
+
+define dso_local i32 @funcLeaf(i32 %x) !dbg !20 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %x, metadata !19, metadata !DIExpression()), !dbg !39
+  %0 = load volatile i32, i32* @factor, align 4, !dbg !40, !tbaa !25
+  call void @llvm.dbg.value(metadata i32 %0, metadata !29, metadata !DIExpression()), !dbg !41
+  %cmp.i = icmp slt i32 %0, 3, !dbg !43
+  %add.i = add nsw i32 %0, 3, !dbg !44
+  %retval.0.i = select i1 %cmp.i, i32 %0, i32 %add.i, !dbg !44
+  %sub = sub nsw i32 %x, %retval.0.i, !dbg !45
+  call void @llvm.dbg.value(metadata i32 %sub, metadata !19, metadata !DIExpression()), !dbg !39
+  ret i32 %sub, !dbg !46
+}
+
+define dso_local i32 @fib(i32 %x) !dbg !30 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %x, metadata !29, metadata !DIExpression()), !dbg !47
+  %cmp = icmp slt i32 %x, 3, !dbg !48
+  %add = add nsw i32 %x, 3, !dbg !49
+  %retval.0 = select i1 %cmp, i32 %x, i32 %add, !dbg !49
+  ret i32 %retval.0, !dbg !50
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!8, !9, !10}
+
+!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 3, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, emissionKind: FullDebug)
+!3 = !DIFile(filename: "test.c", directory: "test")
+!4 = !{}
+!6 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7)
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !{i32 7, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{i32 1, !"wchar_size", i32 4}
+!12 = distinct !DISubprogram(name: "funcA", scope: !3, file: !3, line: 6, type: !13, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !15)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!7, !7}
+!15 = !{!16, !17}
+!16 = !DILocalVariable(name: "x", arg: 1, scope: !12, file: !3, line: 6, type: !7)
+!17 = !DILocalVariable(name: "r", scope: !12, file: !3, line: 7, type: !7)
+!18 = !DILocation(line: 0, scope: !12)
+!19 = !DILocalVariable(name: "x", arg: 1, scope: !20, file: !3, line: 22, type: !7)
+!20 = distinct !DISubprogram(name: "funcLeaf", scope: !3, file: !3, line: 22, type: !13, scopeLine: 23, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!21 = !{!19}
+!22 = !DILocation(line: 0, scope: !20, inlinedAt: !23)
+!23 = distinct !DILocation(line: 7, column: 11, scope: !12)
+!24 = !DILocation(line: 24, column: 12, scope: !20, inlinedAt: !23)
+!25 = !{!26, !26, i64 0}
+!26 = !{!"int", !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C/C++ TBAA"}
+!29 = !DILocalVariable(name: "x", arg: 1, scope: !30, file: !3, line: 11, type: !7)
+!30 = distinct !DISubprogram(name: "fib", scope: !3, file: !3, line: 11, type: !13, scopeLine: 12, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !31)
+!31 = !{!29}
+!32 = !DILocation(line: 0, scope: !30, inlinedAt: !33)
+!33 = distinct !DILocation(line: 24, column: 8, scope: !20, inlinedAt: !23)
+!34 = !DILocation(line: 13, column: 9, scope: !35, inlinedAt: !33)
+!35 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 7)
+!36 = !DILocation(line: 13, column: 7, scope: !30, inlinedAt: !33)
+!37 = !DILocation(line: 24, column: 5, scope: !20, inlinedAt: !23)
+!38 = !DILocation(line: 8, column: 3, scope: !12)
+!39 = !DILocation(line: 0, scope: !20)
+!40 = !DILocation(line: 24, column: 12, scope: !20)
+!41 = !DILocation(line: 0, scope: !30, inlinedAt: !42)
+!42 = distinct !DILocation(line: 24, column: 8, scope: !20)
+!43 = !DILocation(line: 13, column: 9, scope: !35, inlinedAt: !42)
+!44 = !DILocation(line: 13, column: 7, scope: !30, inlinedAt: !42)
+!45 = !DILocation(line: 24, column: 5, scope: !20)
+!46 = !DILocation(line: 25, column: 3, scope: !20)
+!47 = !DILocation(line: 0, scope: !30)
+!48 = !DILocation(line: 13, column: 9, scope: !35)
+!49 = !DILocation(line: 13, column: 7, scope: !30)
+!50 = !DILocation(line: 19, column: 1, scope: !30)

diff  --git a/llvm/tools/llvm-profgen/CMakeLists.txt b/llvm/tools/llvm-profgen/CMakeLists.txt
index 6d13e10e3e51..ee15fb636062 100644
--- a/llvm/tools/llvm-profgen/CMakeLists.txt
+++ b/llvm/tools/llvm-profgen/CMakeLists.txt
@@ -10,6 +10,7 @@ set(LLVM_LINK_COMPONENTS
   MCDisassembler
   Object
   Support
+  Symbolize
   )
 
 add_llvm_tool(llvm-profgen

diff  --git a/llvm/tools/llvm-profgen/CallContext.h b/llvm/tools/llvm-profgen/CallContext.h
new file mode 100644
index 000000000000..85bf09185a6d
--- /dev/null
+++ b/llvm/tools/llvm-profgen/CallContext.h
@@ -0,0 +1,64 @@
+//===-- CallContext.h - Call Context Handler ---------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H
+#define LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H
+
+#include "llvm/ProfileData/SampleProf.h"
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace sampleprof {
+
+// Function name, LineLocation
+typedef std::pair<std::string, LineLocation> FrameLocation;
+
+typedef SmallVector<FrameLocation, 4> FrameLocationStack;
+
+inline std::string getCallSite(const FrameLocation &Callsite) {
+  std::string CallsiteStr = Callsite.first;
+  CallsiteStr += ":";
+  CallsiteStr += Twine(Callsite.second.LineOffset).str();
+  if (Callsite.second.Discriminator > 0) {
+    CallsiteStr += ".";
+    CallsiteStr += Twine(Callsite.second.Discriminator).str();
+  }
+  return CallsiteStr;
+}
+
+// TODO: This operation is expansive. If it ever gets called multiple times we
+// may think of making a class wrapper with internal states for it.
+inline std::string getLocWithContext(const FrameLocationStack &Context) {
+  std::ostringstream OContextStr;
+  for (const auto &Callsite : Context) {
+    if (OContextStr.str().size())
+      OContextStr << " @ ";
+    OContextStr << getCallSite(Callsite);
+  }
+  return OContextStr.str();
+}
+
+// Reverse call context, i.e., in the order of callee frames to caller frames,
+// is useful during instruction printing or pseudo probe printing.
+inline std::string
+getReversedLocWithContext(const FrameLocationStack &Context) {
+  std::ostringstream OContextStr;
+  for (const auto &Callsite : reverse(Context)) {
+    if (OContextStr.str().size())
+      OContextStr << " @ ";
+    OContextStr << getCallSite(Callsite);
+  }
+  return OContextStr.str();
+}
+
+} // end namespace sampleprof
+} // end namespace llvm
+
+#endif

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 97d9d8f55c03..a828b7ea3f0f 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/X86TargetParser.h"
@@ -24,6 +25,11 @@ static cl::opt<bool> ShowDisassembly("show-disassembly", cl::ReallyHidden,
                                      cl::init(false), cl::ZeroOrMore,
                                      cl::desc("Print disassembled code."));
 
+static cl::opt<bool> ShowSourceLocations("show-source-locations",
+                                         cl::ReallyHidden, cl::init(false),
+                                         cl::ZeroOrMore,
+                                         cl::desc("Print source locations."));
+
 namespace llvm {
 namespace sampleprof {
 
@@ -137,7 +143,15 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
 
     if (ShowDisassembly) {
       outs() << format("%8" PRIx64 ":", Offset);
+      size_t Start = outs().tell();
       IP->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
+      if (ShowSourceLocations) {
+        unsigned Cur = outs().tell() - Start;
+        if (Cur < 40)
+          outs().indent(40 - Cur);
+        InstructionPointer Inst(this, Offset);
+        outs() << getReversedLocWithContext(symbolize(Inst));
+      }
       outs() << "\n";
     }
 
@@ -259,5 +273,40 @@ void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
     }
   }
 }
+
+void ProfiledBinary::setupSymbolizer() {
+  symbolize::LLVMSymbolizer::Options SymbolizerOpts;
+  SymbolizerOpts.PrintFunctions =
+      DILineInfoSpecifier::FunctionNameKind::LinkageName;
+  SymbolizerOpts.Demangle = false;
+  SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
+  SymbolizerOpts.UseSymbolTable = false;
+  SymbolizerOpts.RelativeAddresses = false;
+  Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
+}
+
+FrameLocationStack ProfiledBinary::symbolize(const InstructionPointer &IP) {
+  assert(this == IP.Binary &&
+         "Binary should only symbolize its own instruction");
+  auto Addr = object::SectionedAddress{IP.Offset + PreferredBaseAddress,
+                                       object::SectionedAddress::UndefSection};
+  DIInliningInfo InlineStack =
+      unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName());
+
+  FrameLocationStack CallStack;
+
+  for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
+    const auto &CallerFrame = InlineStack.getFrame(I);
+    if (CallerFrame.FunctionName == "<invalid>")
+      break;
+    LineLocation Line(CallerFrame.Line - CallerFrame.StartLine,
+                      CallerFrame.Discriminator);
+    FrameLocation Callsite(CallerFrame.FunctionName, Line);
+    CallStack.push_back(Callsite);
+  }
+
+  return CallStack;
+}
+
 } // end namespace sampleprof
 } // end namespace llvm

diff  --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 2950bf8fd482..e1745884ed94 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -8,7 +8,10 @@
 
 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
 #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
+
+#include "CallContext.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -33,6 +36,21 @@ using namespace llvm::object;
 namespace llvm {
 namespace sampleprof {
 
+class ProfiledBinary;
+
+struct InstructionPointer {
+  ProfiledBinary *Binary;
+  // Offset to the base address of the executable segment of the binary.
+  uint64_t Offset;
+  // Index to the sorted code address array of the binary.
+  uint64_t Index;
+
+  InstructionPointer(ProfiledBinary *Binary, uint64_t Offset)
+      : Binary(Binary), Offset(Offset) {
+    Index = 0;
+  }
+};
+
 class ProfiledBinary {
   // Absolute path of the binary.
   std::string Path;
@@ -63,10 +81,14 @@ class ProfiledBinary {
   // A set of return instruction offsets. Used by virtual unwinding.
   std::unordered_set<uint64_t> RetAddrs;
 
+  // The symbolizer used to get inline context for an instruction.
+  std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
+
   void setPreferredBaseAddress(const ELFObjectFileBase *O);
 
   // Set up disassembler and related components.
   void setUpDisassembler(const ELFObjectFileBase *Obj);
+  void setupSymbolizer();
 
   /// Dissassemble the text section and build various address maps.
   void disassemble(const ELFObjectFileBase *O);
@@ -74,6 +96,8 @@ class ProfiledBinary {
   /// Helper function to dissassemble the symbol and extract info for unwinding
   bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
                           SectionSymbolsTy &Symbols, const SectionRef &Section);
+  /// Symbolize a given instruction pointer and return a full call context.
+  FrameLocationStack symbolize(const InstructionPointer &I);
 
   /// Decode the interesting parts of the binary and build internal data
   /// structures. On high level, the parts of interest are:
@@ -85,7 +109,10 @@ class ProfiledBinary {
   void load();
 
 public:
-  ProfiledBinary(StringRef Path) : Path(Path) { load(); }
+  ProfiledBinary(StringRef Path) : Path(Path) {
+    setupSymbolizer();
+    load();
+  }
 
   const StringRef getPath() const { return Path; }
   const StringRef getName() const { return llvm::sys::path::filename(Path); }


        


More information about the llvm-commits mailing list