[llvm] [BOLT] Extend calculateEmittedSize for Block Size Calculation (PR #73076)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 22 15:22:45 PST 2023


https://github.com/ShatianWang updated https://github.com/llvm/llvm-project/pull/73076

>From 5ee5d6577a008249c50221e11b0a20a0315af9a5 Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Thu, 2 Nov 2023 12:26:49 -0700
Subject: [PATCH 1/3] [BOLT] Extend calculateEmittedSize for Block Size
 Calculation

This commit modifies BinaryContext::calculateEmittedSize to update the
BinaryBasicBlock::OutputAddressRange for each basic block in the input
BF. The modification is done in place, where BB.OutputAddressRange.second
less BB.OutputAddressRange.first now gives the emitted size of the basic
block.
---
 bolt/include/bolt/Core/BinaryContext.h |  3 +++
 bolt/lib/Core/BinaryContext.cpp        | 35 +++++++++++++++++++++-----
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index ad1bf2baaeb5b1e..17e55a673e8b489 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -1230,6 +1230,9 @@ class BinaryContext {
   ///
   /// Return the pair where the first size is for the main part, and the second
   /// size is for the cold one.
+  /// Modify BinaryBasicBlock::OutputAddressRange for each basic block in the
+  /// function in place so that BB.OutputAddressRange.second less
+  /// BB.OutputAddressRange.first gives the emitted size of BB.
   std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF,
                                                  bool FixBranches = true);
 
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 06b68765909d20e..d04f00efd27ce04 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2331,14 +2331,37 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
   MCAsmLayout Layout(Assembler);
   Assembler.layout(Layout);
 
+  // Obtain fragment sizes.
+  std::vector<uint64_t> FragmentSizes;
+  // Main fragment size.
   const uint64_t HotSize =
       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
-  const uint64_t ColdSize =
-      std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL,
-                      [&](const uint64_t Accu, const LabelRange &Labels) {
-                        return Accu + Layout.getSymbolOffset(*Labels.second) -
-                               Layout.getSymbolOffset(*Labels.first);
-                      });
+  FragmentSizes.push_back(HotSize);
+  // Split fragment sizes.
+  uint64_t ColdSize = 0;
+  for (const auto &Labels : SplitLabels) {
+    uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
+                    Layout.getSymbolOffset(*Labels.first);
+    FragmentSizes.push_back(Size);
+    ColdSize += Size;
+  }
+
+  // Populate new start and end offsets of each basic block.
+  BinaryBasicBlock *PrevBB = nullptr;
+  uint64_t FragmentIndex = 0;
+  for (FunctionFragment &FF : BF.getLayout().fragments()) {
+    for (BinaryBasicBlock *BB : FF) {
+      const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
+      BB->setOutputStartAddress(BBStartOffset);
+      if (PrevBB)
+        PrevBB->setOutputEndAddress(BBStartOffset);
+      PrevBB = BB;
+    }
+    if (PrevBB)
+      PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
+    FragmentIndex++;
+    PrevBB = nullptr;
+  }
 
   // Clean-up the effect of the code emission.
   for (const MCSymbol &Symbol : Assembler.symbols()) {

>From ac9dd53386aaa64db1bc8ec1fe56149753b3598d Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Wed, 22 Nov 2023 13:13:45 -0800
Subject: [PATCH 2/3] Fix-up 1

---
 bolt/include/bolt/Core/BinaryContext.h | 4 ++--
 bolt/lib/Core/BinaryContext.cpp        | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 17e55a673e8b489..312678f475347a4 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -1231,8 +1231,8 @@ class BinaryContext {
   /// Return the pair where the first size is for the main part, and the second
   /// size is for the cold one.
   /// Modify BinaryBasicBlock::OutputAddressRange for each basic block in the
-  /// function in place so that BB.OutputAddressRange.second less
-  /// BB.OutputAddressRange.first gives the emitted size of BB.
+  /// function in place so that BinaryBasicBlock::getOutputSize() gives the
+  /// emitted size of the basic block.
   std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF,
                                                  bool FixBranches = true);
 
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index d04f00efd27ce04..7b78c9ba30a3032 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2347,9 +2347,9 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
   }
 
   // Populate new start and end offsets of each basic block.
-  BinaryBasicBlock *PrevBB = nullptr;
   uint64_t FragmentIndex = 0;
   for (FunctionFragment &FF : BF.getLayout().fragments()) {
+    BinaryBasicBlock *PrevBB = nullptr;
     for (BinaryBasicBlock *BB : FF) {
       const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
       BB->setOutputStartAddress(BBStartOffset);
@@ -2360,7 +2360,6 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
     if (PrevBB)
       PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
     FragmentIndex++;
-    PrevBB = nullptr;
   }
 
   // Clean-up the effect of the code emission.

>From 5b10432d0e1a479d906f898747b08ac75b70dbd4 Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Wed, 22 Nov 2023 15:20:44 -0800
Subject: [PATCH 3/3] fixup! Fix-up 2 -- adding a test

---
 bolt/lib/Core/BinaryFunction.cpp             |  13 +++
 bolt/test/X86/calculate-emitted-block-size.s | 114 +++++++++++++++++++
 2 files changed, 127 insertions(+)
 create mode 100644 bolt/test/X86/calculate-emitted-block-size.s

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index e81d58ef0b1047b..0a7adf41f9a6d40 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -108,6 +108,13 @@ cl::opt<bool>
                             cl::desc("try to preserve basic block alignment"),
                             cl::cat(BoltOptCategory));
 
+static cl::opt<bool> PrintOutputAddressRange(
+    "print-output-address-range",
+    cl::desc(
+        "print output address range for each basic block in the function when"
+        "BinaryFunction::print is called"),
+    cl::Hidden, cl::cat(BoltOptCategory));
+
 cl::opt<bool>
 PrintDynoStats("dyno-stats",
   cl::desc("print execution info based on profile"),
@@ -510,6 +517,12 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
       OS << BB->getName() << " (" << BB->size()
          << " instructions, align : " << BB->getAlignment() << ")\n";
 
+      if (opts::PrintOutputAddressRange)
+        OS << "  Output Start Address: " << BB->getOutputAddressRange().first
+           << "\n"
+           << "  Output End Address: " << BB->getOutputAddressRange().second
+           << "\n";
+
       if (isEntryPoint(*BB)) {
         if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB))
           OS << "  Secondary Entry Point: " << EntrySymbol->getName() << '\n';
diff --git a/bolt/test/X86/calculate-emitted-block-size.s b/bolt/test/X86/calculate-emitted-block-size.s
new file mode 100644
index 000000000000000..90f8b62390e1490
--- /dev/null
+++ b/bolt/test/X86/calculate-emitted-block-size.s
@@ -0,0 +1,114 @@
+# Test BinaryContext::calculateEmittedSize's functionality to update
+# BinaryBasicBlock::OutputAddressRange in place so that the emitted size
+# of each basic block is given by BinaryBasicBlock::getOutputSize()
+
+# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
+# RUN: link_fdata %s %t.o %t.fdata
+# RUN: llvm-strip --strip-unneeded %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --split-functions --split-strategy=all \
+# RUN:         --print-split --print-only=chain --print-output-address-range \
+# RUN:         --data=%t.fdata --reorder-blocks=ext-tsp \
+# RUN:     2>&1 | FileCheck --check-prefix=SPLITALL %s
+# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
+# RUN: link_fdata %s %t.o %t.fdata
+# RUN: llvm-strip --strip-unneeded %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --split-functions --print-split \
+# RUN:         --print-only=chain --print-output-address-range \
+# RUN:         --data=%t.fdata --reorder-blocks=ext-tsp \
+# RUN:     2>&1 | FileCheck --check-prefix=SPLITHOTCOLD %s
+
+# SPLITALL: Binary Function "chain" after split-functions
+# SPLITALL: {{^\.LBB00}}
+# SPLITALL: Output Start Address: 0
+# SPLITALL: Output End Address: 18
+# SPLITALL: {{^\.LFT0}}
+# SPLITALL: Output Start Address: 0
+# SPLITALL: Output End Address: 10
+# SPLITALL: {{^\.Ltmp1}}
+# SPLITALL: Output Start Address: 0
+# SPLITALL: Output End Address: 2
+# SPLITALL: {{^\.Ltmp0}}
+# SPLITALL: Output Start Address: 0
+# SPLITALL: Output End Address: 16
+# SPLITALL: {{^\.Ltmp2}}
+# SPLITALL: Output Start Address: 0
+# SPLITALL: Output End Address: 8
+# SPLITALL: {{^\.LFT1}}
+# SPLITALL: Output Start Address: 0
+# SPLITALL: Output End Address: 8
+
+# SPLITHOTCOLD: {{^\.LBB00}}
+# SPLITHOTCOLD: Output Start Address: 0
+# SPLITHOTCOLD: Output End Address: 9
+# SPLITHOTCOLD: {{^\.LFT0}}
+# SPLITHOTCOLD: Output Start Address: 9
+# SPLITHOTCOLD: Output End Address: 14
+# SPLITHOTCOLD: {{^\.Ltmp1}}
+# SPLITHOTCOLD: Output Start Address: 14
+# SPLITHOTCOLD: Output End Address: 16
+# SPLITHOTCOLD: {{^\.Ltmp0}}
+# SPLITHOTCOLD: Output Start Address: 16
+# SPLITHOTCOLD: Output End Address: 27
+# SPLITHOTCOLD: {{^\.Ltmp2}}
+# SPLITHOTCOLD: Output Start Address: 27
+# SPLITHOTCOLD: Output End Address: 32
+# SPLITHOTCOLD: {{^\.LFT1}}
+# SPLITHOTCOLD: Output Start Address: 0
+# SPLITHOTCOLD: Output End Address: 8
+
+        .text
+        .globl  chain
+        .type   chain, @function
+chain:
+        pushq   %rbp
+        movq    %rsp, %rbp
+        cmpl    $2, %edi
+LLentry_LLchain_start:
+        jge     LLchain_start
+# FDATA: 1 chain #LLentry_LLchain_start# 1 chain #LLchain_start# 0 10
+# FDATA: 1 chain #LLentry_LLchain_start# 1 chain #LLfast# 0 500
+LLfast:
+        movl    $5, %eax
+LLfast_LLexit:
+        jmp     LLexit
+# FDATA: 1 chain #LLfast_LLexit# 1 chain #LLexit# 0 500
+LLchain_start:
+        movl    $10, %eax
+LLchain_start_LLchain1:
+        jge     LLchain1
+# FDATA: 1 chain #LLchain_start_LLchain1# 1 chain #LLchain1# 0 10
+# FDATA: 1 chain #LLchain_start_LLchain1# 1 chain #LLcold# 0 0
+LLcold:
+        addl    $1, %eax
+LLchain1:
+        addl    $1, %eax
+LLchain1_LLexit:
+        jmp     LLexit
+# FDATA: 1 chain #LLchain1_LLexit# 1 chain #LLexit# 0 10
+LLexit:
+        popq    %rbp
+        ret
+LLchain_end:
+        .size   chain, LLchain_end-chain
+
+
+        .globl  main
+        .type   main, @function
+main:
+        pushq   %rbp
+        movq    %rsp, %rbp
+        movl    $1, %edi
+LLmain_chain1:
+        call    chain
+# FDATA: 1 main #LLmain_chain1# 1 chain 0 0 500
+        movl    $4, %edi
+LLmain_chain2:
+        call    chain
+# FDATA: 1 main #LLmain_chain2# 1 chain 0 0 10
+        xorl    %eax, %eax
+        popq    %rbp
+        retq
+.Lmain_end:
+        .size   main, .Lmain_end-main



More information about the llvm-commits mailing list