[llvm-branch-commits] [BOLT] Drop macro-fusion alignment (PR #97358)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jul 1 21:25:12 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

9d0754ada5dbbc0c009bcc2f7824488419cc5530 dropped MC support required for
macro-fusion alignment in BOLT. Remove the support in BOLT.

Test Plan:
macro-fusion alignment was never upstreamed, so no upstream tests are
affected.


---
Full diff: https://github.com/llvm/llvm-project/pull/97358.diff


11 Files Affected:

- (modified) bolt/include/bolt/Core/BinaryBasicBlock.h (-9) 
- (modified) bolt/include/bolt/Core/BinaryContext.h (-4) 
- (modified) bolt/include/bolt/Core/BinaryFunction.h (-4) 
- (modified) bolt/include/bolt/Core/MCPlusBuilder.h (-7) 
- (modified) bolt/lib/Core/BinaryBasicBlock.cpp (-39) 
- (modified) bolt/lib/Core/BinaryEmitter.cpp (-37) 
- (modified) bolt/lib/Core/BinaryFunction.cpp (-25) 
- (modified) bolt/lib/Passes/BinaryPasses.cpp (-20) 
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (-22) 
- (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (-4) 
- (modified) bolt/lib/Target/X86/X86MCPlusBuilder.cpp (-34) 


``````````diff
diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h
index a57b70714fe38..9a9d7b8735d71 100644
--- a/bolt/include/bolt/Core/BinaryBasicBlock.h
+++ b/bolt/include/bolt/Core/BinaryBasicBlock.h
@@ -842,15 +842,6 @@ class BinaryBasicBlock {
   bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
                      MCInst *&CondBranch, MCInst *&UncondBranch);
 
-  /// Return true if iterator \p I is pointing to the first instruction in
-  /// a pair that could be macro-fused.
-  bool isMacroOpFusionPair(const_iterator I) const;
-
-  /// If the basic block has a pair of instructions suitable for macro-fusion,
-  /// return iterator to the first instruction of the pair.
-  /// Otherwise return end().
-  const_iterator getMacroOpFusionPair() const;
-
   /// Printer required for printing dominator trees.
   void printAsOperand(raw_ostream &OS, bool PrintType = true) {
     if (PrintType)
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 4ec3de3da1bf8..73932c4ca2fb3 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -698,10 +698,6 @@ class BinaryContext {
 
   /// Binary-wide aggregated stats.
   struct BinaryStats {
-    /// Stats for macro-fusion.
-    uint64_t MissedMacroFusionPairs{0};
-    uint64_t MissedMacroFusionExecCount{0};
-
     /// Stats for stale profile matching:
     ///   the total number of basic blocks in the profile
     uint32_t NumStaleBlocks{0};
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 3c641581e247a..d318dfbcbabe7 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -835,10 +835,6 @@ class BinaryFunction {
   /// them.
   void calculateLoopInfo();
 
-  /// Calculate missed macro-fusion opportunities and update BinaryContext
-  /// stats.
-  void calculateMacroOpFusionStats();
-
   /// Returns if BinaryDominatorTree has been constructed for this function.
   bool hasDomTree() const { return BDT != nullptr; }
 
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index a5fb3901428d9..584848a8601fd 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -917,13 +917,6 @@ class MCPlusBuilder {
   /// Return true if the instruction is encoded using EVEX (AVX-512).
   virtual bool hasEVEXEncoding(const MCInst &Inst) const { return false; }
 
-  /// Return true if a pair of instructions represented by \p Insts
-  /// could be fused into a single uop.
-  virtual bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const {
-    llvm_unreachable("not implemented");
-    return false;
-  }
-
   struct X86MemOperand {
     unsigned BaseRegNum;
     int64_t ScaleImm;
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp
index a4b9a7f558cd8..2a2192b79bb4b 100644
--- a/bolt/lib/Core/BinaryBasicBlock.cpp
+++ b/bolt/lib/Core/BinaryBasicBlock.cpp
@@ -404,45 +404,6 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
                             CondBranch, UncondBranch);
 }
 
-bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const {
-  auto &MIB = Function->getBinaryContext().MIB;
-  ArrayRef<MCInst> Insts = Instructions;
-  return MIB->isMacroOpFusionPair(Insts.slice(I - begin()));
-}
-
-BinaryBasicBlock::const_iterator
-BinaryBasicBlock::getMacroOpFusionPair() const {
-  if (!Function->getBinaryContext().isX86())
-    return end();
-
-  if (getNumNonPseudos() < 2 || succ_size() != 2)
-    return end();
-
-  auto RI = getLastNonPseudo();
-  assert(RI != rend() && "cannot have an empty block with 2 successors");
-
-  BinaryContext &BC = Function->getBinaryContext();
-
-  // Skip instruction if it's an unconditional branch following
-  // a conditional one.
-  if (BC.MIB->isUnconditionalBranch(*RI))
-    ++RI;
-
-  if (!BC.MIB->isConditionalBranch(*RI))
-    return end();
-
-  // Start checking with instruction preceding the conditional branch.
-  ++RI;
-  if (RI == rend())
-    return end();
-
-  auto II = std::prev(RI.base()); // convert to a forward iterator
-  if (isMacroOpFusionPair(II))
-    return II;
-
-  return end();
-}
-
 MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
   BinaryContext &BC = Function->getBinaryContext();
   auto Itr = rbegin();
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index 5793963f9b80d..f6dfa249f9a9f 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -38,19 +38,6 @@ extern cl::opt<bool> PreserveBlocksAlignment;
 cl::opt<bool> AlignBlocks("align-blocks", cl::desc("align basic blocks"),
                           cl::cat(BoltOptCategory));
 
-cl::opt<MacroFusionType>
-AlignMacroOpFusion("align-macro-fusion",
-  cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
-  cl::init(MFT_HOT),
-  cl::values(clEnumValN(MFT_NONE, "none",
-               "do not insert alignment no-ops for macro-fusion"),
-             clEnumValN(MFT_HOT, "hot",
-               "only insert alignment no-ops on hot execution paths (default)"),
-             clEnumValN(MFT_ALL, "all",
-               "always align instructions to allow macro-fusion")),
-  cl::ZeroOrMore,
-  cl::cat(BoltRelocCategory));
-
 static cl::list<std::string>
 BreakFunctionNames("break-funcs",
   cl::CommaSeparated,
@@ -453,20 +440,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
         Streamer.emitLabel(EntrySymbol);
     }
 
-    // Check if special alignment for macro-fusion is needed.
-    bool MayNeedMacroFusionAlignment =
-        (opts::AlignMacroOpFusion == MFT_ALL) ||
-        (opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount());
-    BinaryBasicBlock::const_iterator MacroFusionPair;
-    if (MayNeedMacroFusionAlignment) {
-      MacroFusionPair = BB->getMacroOpFusionPair();
-      if (MacroFusionPair == BB->end())
-        MayNeedMacroFusionAlignment = false;
-    }
-
     SMLoc LastLocSeen;
-    // Remember if the last instruction emitted was a prefix.
-    bool LastIsPrefix = false;
     for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
       MCInst &Instr = *I;
 
@@ -479,16 +453,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
         continue;
       }
 
-      // Handle macro-fusion alignment. If we emitted a prefix as
-      // the last instruction, we should've already emitted the associated
-      // alignment hint, so don't emit it twice.
-      if (MayNeedMacroFusionAlignment && !LastIsPrefix &&
-          I == MacroFusionPair) {
-        // This assumes the second instruction in the macro-op pair will get
-        // assigned to its own MCRelaxableFragment. Since all JCC instructions
-        // are relaxable, we should be safe.
-      }
-
       if (!EmitCodeOnly) {
         // A symbol to be emitted before the instruction to mark its location.
         MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
@@ -525,7 +489,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
       }
 
       Streamer.emitInstruction(Instr, *BC.STI);
-      LastIsPrefix = BC.MIB->isPrefix(Instr);
     }
   }
 
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index d13e28999a05c..a316b8ee802ad 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -2276,8 +2276,6 @@ void BinaryFunction::postProcessCFG() {
     postProcessBranches();
   }
 
-  calculateMacroOpFusionStats();
-
   // The final cleanup of intermediate structures.
   clearList(IgnoredBranches);
 
@@ -2294,29 +2292,6 @@ void BinaryFunction::postProcessCFG() {
          "invalid CFG detected after post-processing");
 }
 
-void BinaryFunction::calculateMacroOpFusionStats() {
-  if (!getBinaryContext().isX86())
-    return;
-  for (const BinaryBasicBlock &BB : blocks()) {
-    auto II = BB.getMacroOpFusionPair();
-    if (II == BB.end())
-      continue;
-
-    // Check offset of the second instruction.
-    // FIXME: arch-specific.
-    const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
-    if (!Offset || (getAddress() + Offset) % 64)
-      continue;
-
-    LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
-                      << Twine::utohexstr(getAddress() + Offset)
-                      << " in function " << *this << "; executed "
-                      << BB.getKnownExecutionCount() << " times.\n");
-    ++BC.Stats.MissedMacroFusionPairs;
-    BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
-  }
-}
-
 void BinaryFunction::removeTagsFromProfile() {
   for (BinaryBasicBlock *BB : BasicBlocks) {
     if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 2810f723719d0..64aa312fd5114 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -44,7 +44,6 @@ namespace opts {
 extern cl::OptionCategory BoltCategory;
 extern cl::OptionCategory BoltOptCategory;
 
-extern cl::opt<bolt::MacroFusionType> AlignMacroOpFusion;
 extern cl::opt<unsigned> Verbosity;
 extern cl::opt<bool> EnableBAT;
 extern cl::opt<unsigned> ExecutionCountThreshold;
@@ -1635,25 +1634,6 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
     }
   }
 
-  // Print information on missed macro-fusion opportunities seen on input.
-  if (BC.Stats.MissedMacroFusionPairs) {
-    BC.outs() << format(
-        "BOLT-INFO: the input contains %zu (dynamic count : %zu)"
-        " opportunities for macro-fusion optimization",
-        BC.Stats.MissedMacroFusionPairs, BC.Stats.MissedMacroFusionExecCount);
-    switch (opts::AlignMacroOpFusion) {
-    case MFT_NONE:
-      BC.outs() << ". Use -align-macro-fusion to fix.\n";
-      break;
-    case MFT_HOT:
-      BC.outs() << ". Will fix instances on a hot path.\n";
-      break;
-    case MFT_ALL:
-      BC.outs() << " that are going to be fixed\n";
-      break;
-    }
-  }
-
   // Collect and print information about suboptimal code layout on input.
   if (opts::ReportBadLayout) {
     std::vector<BinaryFunction *> SuboptimalFuncs;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 1a3a8af21d81b..e2925f4093746 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -76,7 +76,6 @@ extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;
 
 namespace opts {
 
-extern cl::opt<MacroFusionType> AlignMacroOpFusion;
 extern cl::list<std::string> HotTextMoveSections;
 extern cl::opt<bool> Hugify;
 extern cl::opt<bool> Instrument;
@@ -1972,12 +1971,6 @@ void RewriteInstance::adjustCommandLineOptions() {
   if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
     RtLibrary->adjustCommandLineOptions(*BC);
 
-  if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
-    BC->outs()
-        << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
-    opts::AlignMacroOpFusion = MFT_NONE;
-  }
-
   if (BC->isX86() && BC->MAB->allowAutoPadding()) {
     if (!BC->HasRelocations) {
       BC->errs()
@@ -1988,13 +1981,6 @@ void RewriteInstance::adjustCommandLineOptions() {
     BC->outs()
         << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
            "may take several minutes\n";
-    opts::AlignMacroOpFusion = MFT_NONE;
-  }
-
-  if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
-    BC->outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
-                  "mode\n";
-    opts::AlignMacroOpFusion = MFT_NONE;
   }
 
   if (opts::SplitEH && !BC->HasRelocations) {
@@ -2016,14 +2002,6 @@ void RewriteInstance::adjustCommandLineOptions() {
     opts::StrictMode = true;
   }
 
-  if (BC->isX86() && BC->HasRelocations &&
-      opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
-    BC->outs()
-        << "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
-           "was specified\n";
-    opts::AlignMacroOpFusion = MFT_ALL;
-  }
-
   if (!BC->HasRelocations &&
       opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
     BC->errs() << "BOLT-ERROR: function reordering only works when "
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index a74eda8e4a566..a21cf8f1cc244 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -141,10 +141,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
                                  *AArch64ExprB.getSubExpr(), Comp);
   }
 
-  bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
-    return false;
-  }
-
   bool shortenInstruction(MCInst &, const MCSubtargetInfo &) const override {
     return false;
   }
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index e350e701c7b7b..263e642e692eb 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -661,40 +661,6 @@ class X86MCPlusBuilder : public MCPlusBuilder {
     return (Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX;
   }
 
-  bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
-    const auto *I = Insts.begin();
-    while (I != Insts.end() && isPrefix(*I))
-      ++I;
-    if (I == Insts.end())
-      return false;
-
-    const MCInst &FirstInst = *I;
-    ++I;
-    while (I != Insts.end() && isPrefix(*I))
-      ++I;
-    if (I == Insts.end())
-      return false;
-    const MCInst &SecondInst = *I;
-
-    if (!isConditionalBranch(SecondInst))
-      return false;
-    // Cannot fuse if the first instruction uses RIP-relative memory.
-    if (hasPCRelOperand(FirstInst))
-      return false;
-
-    const X86::FirstMacroFusionInstKind CmpKind =
-        X86::classifyFirstOpcodeInMacroFusion(FirstInst.getOpcode());
-    if (CmpKind == X86::FirstMacroFusionInstKind::Invalid)
-      return false;
-
-    X86::CondCode CC = static_cast<X86::CondCode>(getCondCode(SecondInst));
-    X86::SecondMacroFusionInstKind BranchKind =
-        X86::classifySecondCondCodeInMacroFusion(CC);
-    if (BranchKind == X86::SecondMacroFusionInstKind::Invalid)
-      return false;
-    return X86::isMacroFused(CmpKind, BranchKind);
-  }
-
   std::optional<X86MemOperand>
   evaluateX86MemoryOperand(const MCInst &Inst) const override {
     int MemOpNo = getMemoryOperandNo(Inst);

``````````

</details>


https://github.com/llvm/llvm-project/pull/97358


More information about the llvm-branch-commits mailing list