[llvm] [BOLT][AArch64] Transform {cmpbr ~> cmp + br} when inversion not possible (PR #185731)

Alexandros Lamprineas via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 2 10:09:58 PDT 2026


https://github.com/labrinea updated https://github.com/llvm/llvm-project/pull/185731

>From 96f1ccd93053b6c19026bb17b451e08ace19ce31 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 2 Apr 2026 16:54:12 +0100
Subject: [PATCH] [BOLT][AArch64] Transform cmpbr ~> cmp + br when inversion
 not possible

When reordering blocks we may have to invert branches. Sometimes this isn't
possible for compare-and-branch instructions because the immediate value
would overflow/underflow after the adjustment. In such cases I am splitting
the instruction into a compare followed by a branch. For this to be legal
we should be sure that the condition flags are not being clobbered.
Liveness analysis may help here.
---
 bolt/include/bolt/Core/BinaryFunction.h       |   4 +-
 bolt/include/bolt/Core/MCPlusBuilder.h        |   7 +-
 bolt/include/bolt/Passes/LongJmp.h            |   4 +-
 bolt/include/bolt/Utils/CommandLineOpts.h     |   4 +
 bolt/lib/Core/BinaryFunction.cpp              |   7 +-
 bolt/lib/Passes/BinaryPasses.cpp              |  16 +-
 bolt/lib/Passes/LongJmp.cpp                   |  26 +++-
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  86 +++++++++--
 bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp  |   3 +-
 bolt/lib/Target/X86/X86MCPlusBuilder.cpp      |   3 +-
 bolt/lib/Utils/CommandLineOpts.cpp            |   5 +
 .../AArch64/compare-and-branch-inversion.S    |  44 ++++--
 bolt/unittests/Core/MCPlusBuilder.cpp         | 140 ++++++++++++++++--
 13 files changed, 299 insertions(+), 50 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 0fdfcc5d76597..8a789421cde0f 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -64,6 +64,8 @@ class DWARFUnit;
 
 namespace bolt {
 
+class DataflowInfoManager;
+
 using InputOffsetToAddressMapTy = std::unordered_multimap<uint64_t, uint64_t>;
 
 /// Types of macro-fusion alignment corrections.
@@ -2452,7 +2454,7 @@ class BinaryFunction {
   /// while the second successor - false/fall-through branch.
   ///
   /// When we reverse the branch condition, the CFG is updated accordingly.
-  void fixBranches();
+  void fixBranches(DataflowInfoManager *DIM = nullptr);
 
   /// Mark function as finalized. No further optimizations are permitted.
   void setFinalized() { CurrentState = State::CFG_Finalized; }
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 0f6076688b65d..eecf01102def1 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -52,6 +52,7 @@ namespace bolt {
 class BinaryBasicBlock;
 class BinaryContext;
 class BinaryFunction;
+class DataflowInfoManager;
 
 /// Different types of indirect branches encountered during disassembly.
 enum class IndirectBranchType : char {
@@ -477,7 +478,8 @@ class MCPlusBuilder {
   }
 
   /// Check whether this conditional branch can be reversed
-  virtual bool isReversibleBranch(const MCInst &Inst) const {
+  virtual bool isReversibleBranch(const MCInst &Inst,
+                                  DataflowInfoManager *DIM = nullptr) const {
     assert(!isUnsupportedInstruction(Inst) && isConditionalBranch(Inst) &&
            "Instruction is not known conditional branch");
 
@@ -2135,7 +2137,8 @@ class MCPlusBuilder {
   }
 
   /// Reverses the branch condition in Inst and update its taken target to TBB.
-  virtual void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
+  virtual void reverseBranchCondition(BinaryBasicBlock *Parent, MCInst &Inst,
+                                      const MCSymbol *TBB,
                                       MCContext *Ctx) const {
     llvm_unreachable("not implemented");
   }
diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h
index 4633d30104d43..ccdebf9e1ed0a 100644
--- a/bolt/include/bolt/Passes/LongJmp.h
+++ b/bolt/include/bolt/Passes/LongJmp.h
@@ -14,6 +14,8 @@
 namespace llvm {
 namespace bolt {
 
+class DataflowInfoManager;
+
 /// LongJmp is veneer-insertion pass originally written for AArch64 that
 /// compensates for its short-range branches, typically done during linking. We
 /// pull this pass inside BOLT because here we can do a better job at stub
@@ -74,7 +76,7 @@ class LongJmpPass : public BinaryFunctionPass {
   /// Relax all internal function branches including those between fragments.
   /// Assume that fragments are placed in different sections but are within
   /// 128MB of each other.
-  void relaxLocalBranches(BinaryFunction &BF);
+  void relaxLocalBranches(BinaryFunction &BF, DataflowInfoManager *DIM);
 
   ///                 -- Layout estimation methods --
   /// Try to do layout before running the emitter, by looking at BinaryFunctions
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index fcea952919f11..d7f2b27ced738 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -124,6 +124,10 @@ extern llvm::cl::opt<bool> UpdateDebugSections;
 // dbgs() for output within DEBUG().
 extern llvm::cl::opt<unsigned> Verbosity;
 
+// Option to control whether liveness analysis should be used by
+// FixupBranches and LongJmpPass.
+extern llvm::cl::opt<bool> LivenessAnalysis;
+
 /// Return true if we should process all functions in the binary.
 bool processAllFunctions();
 
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index c5aefe685de34..7431c753c0ee4 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -3620,7 +3620,7 @@ bool BinaryFunction::validateCFG() const {
   return true;
 }
 
-void BinaryFunction::fixBranches() {
+void BinaryFunction::fixBranches(DataflowInfoManager *DIM) {
   assert(isSimple() && "Expected function with valid CFG.");
 
   auto &MIB = BC.MIB;
@@ -3679,7 +3679,7 @@ void BinaryFunction::fixBranches() {
 
       // Reverse branch condition and swap successors.
       auto swapSuccessors = [&]() {
-        if (!MIB->isReversibleBranch(*CondBranch)) {
+        if (!MIB->isReversibleBranch(*CondBranch, DIM)) {
           if (opts::Verbosity) {
             BC.outs() << "BOLT-INFO: unable to swap successors in " << *this
                       << '\n';
@@ -3689,7 +3689,8 @@ void BinaryFunction::fixBranches() {
         std::swap(TSuccessor, FSuccessor);
         BB->swapConditionalSuccessors();
         auto L = BC.scopeLock();
-        MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
+        MIB->reverseBranchCondition(BB, *CondBranch, TSuccessor->getLabel(),
+                                    Ctx);
         return true;
       };
 
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 480d0cef58f43..98be39233b7b2 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -11,8 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "bolt/Passes/BinaryPasses.h"
+#include "bolt/Core/BinaryFunctionCallGraph.h"
 #include "bolt/Core/FunctionLayout.h"
 #include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Passes/DataflowInfoManager.h"
 #include "bolt/Passes/ReorderAlgorithm.h"
 #include "bolt/Passes/ReorderFunctions.h"
 #include "bolt/Utils/CommandLineOpts.h"
@@ -544,12 +546,22 @@ bool ReorderBasicBlocks::modifyFunctionLayout(BinaryFunction &BF,
 }
 
 Error FixupBranches::runOnFunctions(BinaryContext &BC) {
+  std::unique_ptr<BinaryFunctionCallGraph> CG;
+  std::unique_ptr<RegAnalysis> RA;
+  std::unique_ptr<DataflowInfoManager> DIM;
+
+  if (opts::LivenessAnalysis) {
+    CG = std::make_unique<BinaryFunctionCallGraph>(buildCallGraph(BC));
+    RA = std::make_unique<RegAnalysis>(BC, &BC.getBinaryFunctions(), CG.get());
+  }
   for (auto &It : BC.getBinaryFunctions()) {
     BinaryFunction &Function = It.second;
     if (!BC.shouldEmit(Function) || !Function.isSimple())
       continue;
 
-    Function.fixBranches();
+    if (opts::LivenessAnalysis)
+      DIM = std::make_unique<DataflowInfoManager>(Function, RA.get(), nullptr);
+    Function.fixBranches(DIM.get());
   }
   return Error::success();
 }
@@ -960,7 +972,7 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryFunction &BF) {
       uint64_t Count = 0;
       if (CondSucc != BB) {
         // Patch the new target address into the conditional branch.
-        MIB->reverseBranchCondition(*CondBranch, CalleeSymbol, Ctx);
+        MIB->reverseBranchCondition(PredBB, *CondBranch, CalleeSymbol, Ctx);
         // Since we reversed the condition on the branch we need to change
         // the target for the unconditional branch or add a unconditional
         // branch to the old target.  This has to be done manually since
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index 7744cf08defa9..1317f88fed627 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -11,7 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "bolt/Passes/LongJmp.h"
+#include "bolt/Core/BinaryFunctionCallGraph.h"
 #include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Passes/DataflowInfoManager.h"
 #include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/Support/MathExtras.h"
 
@@ -651,7 +653,8 @@ Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) {
   return Error::success();
 }
 
-void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
+void LongJmpPass::relaxLocalBranches(BinaryFunction &BF,
+                                     DataflowInfoManager *DIM) {
   BinaryContext &BC = BF.getBinaryContext();
   auto &MIB = BC.MIB;
 
@@ -821,7 +824,7 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
       // If the other successor is a fall-through, invert the condition code.
       BinaryBasicBlock *NextBB =
           BF->getLayout().getBasicBlockAfter(BB, /*IgnoreSplits*/ false);
-      bool IsReversibleBranch = MIB->isReversibleBranch(Inst);
+      bool IsReversibleBranch = MIB->isReversibleBranch(Inst, DIM);
       bool ShouldReverseBranch = BB->getConditionalSuccessor(false) == NextBB;
 
       // Create a trampoline basic block for the fall-through target of the
@@ -839,7 +842,7 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
       if (ShouldReverseBranch && IsReversibleBranch) {
         BB->swapConditionalSuccessors();
         auto L = BC.scopeLock();
-        MIB->reverseBranchCondition(Inst, NextBB->getLabel(), BC.Ctx.get());
+        MIB->reverseBranchCondition(BB, Inst, NextBB->getLabel(), BC.Ctx.get());
       } else {
         auto L = BC.scopeLock();
         MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get());
@@ -924,12 +927,23 @@ Error LongJmpPass::runOnFunctions(BinaryContext &BC) {
           opts::SplitStrategy != opts::SplitFunctionsStrategy::CDSplit) &&
          "LongJmp cannot work with functions split in more than two fragments");
 
+  std::unique_ptr<BinaryFunctionCallGraph> CG;
+  std::unique_ptr<RegAnalysis> RA;
+  std::unique_ptr<DataflowInfoManager> DIM;
+
+  if (opts::LivenessAnalysis) {
+    CG = std::make_unique<BinaryFunctionCallGraph>(buildCallGraph(BC));
+    RA = std::make_unique<RegAnalysis>(BC, &BC.getBinaryFunctions(), CG.get());
+  }
+
   if (opts::CompactCodeModel) {
     BC.outs()
         << "BOLT-INFO: relaxing branches for compact code model (<128MB)\n";
 
     ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
-      relaxLocalBranches(BF);
+      if (opts::LivenessAnalysis)
+        DIM = std::make_unique<DataflowInfoManager>(BF, RA.get(), nullptr);
+      relaxLocalBranches(BF, DIM.get());
     };
 
     ParallelUtilities::PredicateTy SkipPredicate =
@@ -954,12 +968,14 @@ Error LongJmpPass::runOnFunctions(BinaryContext &BC) {
     tentativeLayout(BC, Sorted);
     updateStubGroups();
     for (BinaryFunction *Func : Sorted) {
+      if (opts::LivenessAnalysis)
+        DIM = std::make_unique<DataflowInfoManager>(*Func, RA.get(), nullptr);
       if (auto E = relax(*Func, Modified))
         return Error(std::move(E));
       // Don't ruin non-simple functions, they can't afford to have the layout
       // changed.
       if (Modified && Func->isSimple())
-        Func->fixBranches();
+        Func->fixBranches(DIM.get());
     }
   } while (Modified);
   BC.outs() << "BOLT-INFO: Inserted " << NumHotStubs
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index b4c08cf1f6153..2bbcd963c6b94 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -21,6 +21,7 @@
 #include "bolt/Core/BinaryFunction.h"
 #include "bolt/Core/MCInstUtils.h"
 #include "bolt/Core/MCPlusBuilder.h"
+#include "bolt/Passes/DataflowInfoManager.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -1971,6 +1972,25 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     exit(1);
   }
 
+  unsigned getInvertedCC(unsigned Opcode) const {
+    // clang-format off
+    switch (Opcode) {
+    default:
+      llvm_unreachable("Failed to invert condition code");
+      return Opcode;
+    // Compare register with immediate and branch.
+    case AArch64::CBGTWri:  return AArch64CC::LE;
+    case AArch64::CBGTXri:  return AArch64CC::LE;
+    case AArch64::CBLTWri:  return AArch64CC::GE;
+    case AArch64::CBLTXri:  return AArch64CC::GE;
+    case AArch64::CBHIWri:  return AArch64CC::LS;
+    case AArch64::CBHIXri:  return AArch64CC::LS;
+    case AArch64::CBLOWri:  return AArch64CC::HS;
+    case AArch64::CBLOXri:  return AArch64CC::HS;
+    }
+    // clang-format on
+  }
+
   unsigned getInvertedBranchOpcode(unsigned Opcode) const {
     // clang-format off
     switch (Opcode) {
@@ -2097,38 +2117,74 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
   }
 
-  bool isReversibleBranch(const MCInst &Inst) const override {
+  bool is32BitVariant(unsigned Opcode) const {
+    switch (Opcode) {
+    default:
+      return false;
+    case AArch64::CBGTWri:
+    case AArch64::CBLTWri:
+    case AArch64::CBHIWri:
+    case AArch64::CBLOWri:
+      return true;
+    }
+  }
+
+  bool isReversibleBranch(const MCInst &Inst,
+                          DataflowInfoManager *DIM = nullptr) const override {
     if (isCompAndBranch(Inst)) {
+      bool IsReversible =
+          DIM ? !DIM->getLivenessAnalysis().getLiveIn(Inst).test(getFlagsReg())
+              : false;
       unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
       if (needsImmDec(InvertedOpcode) && Inst.getOperand(1).getImm() == 0)
-        return false;
+        return IsReversible;
       if (needsImmInc(InvertedOpcode) && Inst.getOperand(1).getImm() == 63)
-        return false;
+        return IsReversible;
     }
     return MCPlusBuilder::isReversibleBranch(Inst);
   }
 
-  void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
+  void reverseBranchCondition(BinaryBasicBlock *Parent, MCInst &Inst,
+                              const MCSymbol *TBB,
                               MCContext *Ctx) const override {
-    if (!isReversibleBranch(Inst)) {
-      errs() << "BOLT-ERROR: Cannot reverse branch " << Inst << "\n";
-      exit(1);
-    }
-
     if (isTB(Inst) || isCB(Inst) || isCompAndBranch(Inst)) {
+      bool ImmediateOutOfBounds = false;
       unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
-      Inst.setOpcode(InvertedOpcode);
-      assert(Inst.getOpcode() != 0 && "Invalid branch instruction");
+      assert(InvertedOpcode != 0 && "Invalid branch instruction");
       // The FEAT_CMPBR compare-and-branch instructions cannot encode all
       // the possible condition codes, therefore we either have to adjust
       // the immediate value by +-1, or to swap the register operands
       // when reversing the branch condition.
       if (needsRegSwap(InvertedOpcode))
         std::swap(Inst.getOperand(0), Inst.getOperand(1));
-      else if (needsImmDec(InvertedOpcode))
-        Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() - 1);
-      else if (needsImmInc(InvertedOpcode))
-        Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() + 1);
+      else if (needsImmDec(InvertedOpcode)) {
+        if (Inst.getOperand(1).getImm() == 0)
+          ImmediateOutOfBounds = true;
+        else
+          Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() - 1);
+      } else if (needsImmInc(InvertedOpcode)) {
+        if (Inst.getOperand(1).getImm() == 63)
+          ImmediateOutOfBounds = true;
+        else
+          Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() + 1);
+      }
+      if (ImmediateOutOfBounds) {
+        InstructionListType Code;
+        MCInstBuilder Cmp =
+            is32BitVariant(InvertedOpcode)
+                ? MCInstBuilder(AArch64::SUBSWri).addReg(AArch64::WZR)
+                : MCInstBuilder(AArch64::SUBSXri).addReg(AArch64::XZR);
+        Cmp.addReg(Inst.getOperand(0).getReg())
+            .addImm(Inst.getOperand(1).getImm())
+            .addImm(0);
+        Code.emplace_back(std::move(Cmp));
+        Code.emplace_back(MCInstBuilder(AArch64::Bcc)
+                              .addImm(getInvertedCC(Inst.getOpcode()))
+                              .addExpr(MCSymbolRefExpr::create(TBB, *Ctx)));
+        Parent->replaceInstruction(Parent->findInstruction(&Inst), Code);
+        return;
+      }
+      Inst.setOpcode(InvertedOpcode);
     } else if (Inst.getOpcode() == AArch64::Bcc) {
       Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode(
           static_cast<AArch64CC::CondCode>(Inst.getOperand(0).getImm())));
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
index 2f0291f3c8175..90d9bf07223ee 100644
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
@@ -154,7 +154,8 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
     }
   }
 
-  void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
+  void reverseBranchCondition(BinaryBasicBlock *Parent, MCInst &Inst,
+                              const MCSymbol *TBB,
                               MCContext *Ctx) const override {
     auto Opcode = getInvertedBranchOpcode(Inst.getOpcode());
     Inst.setOpcode(Opcode);
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index aa1a70e40669a..441564f22e572 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -2806,7 +2806,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
     Inst.addOperand(MCOperand::createImm(CC));
   }
 
-  void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
+  void reverseBranchCondition(BinaryBasicBlock *Parent, MCInst &Inst,
+                              const MCSymbol *TBB,
                               MCContext *Ctx) const override {
     unsigned InvCC = getInvertedCondCode(getCondCode(Inst));
     assert(InvCC != X86::COND_INVALID && "invalid branch instruction");
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index b7eb209af8aca..fef2e4e3b8c70 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -308,6 +308,11 @@ cl::opt<unsigned>
               cl::init(0), cl::ZeroOrMore, cl::cat(BoltCategory),
               cl::sub(cl::SubCommand::getAll()));
 
+cl::opt<bool> LivenessAnalysis(
+    "liveness-analysis",
+    cl::desc("use liveness analysis in FixupBranches and LongJmpPass"),
+    cl::init(false), cl::cat(BoltCategory));
+
 bool processAllFunctions() {
   if (opts::AggregateOnly)
     return false;
diff --git a/bolt/test/AArch64/compare-and-branch-inversion.S b/bolt/test/AArch64/compare-and-branch-inversion.S
index 28167416c31cb..37ea8d504f744 100644
--- a/bolt/test/AArch64/compare-and-branch-inversion.S
+++ b/bolt/test/AArch64/compare-and-branch-inversion.S
@@ -1,7 +1,8 @@
 # This test checks that branch inversion works when reordering blocks which
 # contain short range conditional branches. Handles edge cases, like when
 # the immediate value is the upper or lower allowed value in which case the
-# transformation bails.
+# transformation bails. If liveness analysis proves that the condition flags
+# are dead we can replace the branch with cmp + b.cc
 
 # REQUIRES: system-linux, asserts
 
@@ -9,7 +10,7 @@
 # RUN: link_fdata --no-lbr %s %t %t.fdata
 # RUN: llvm-strip --strip-unneeded %t
 # RUN: llvm-bolt -v=1 %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model \
-# RUN:   | FileCheck %s --check-prefix=BOLT-INFO
+# RUN:   --liveness-analysis | FileCheck %s --check-prefix=BOLT-INFO
 # RUN: llvm-objdump -d %t.bolt | FileCheck %s
 
 # CHECK: Disassembly of section .text:
@@ -80,29 +81,54 @@ register_swap:
 # CHECK-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
 # CHECK-NEXT:            {{.*}} ret
 
-  .globl irreversible
-  .type irreversible, %function
-irreversible:
+  .globl immediate_overflow
+  .type immediate_overflow, %function
+immediate_overflow:
 .entry3:
-# FDATA: 1 irreversible #.entry3# 10
+# FDATA: 1 immediate_overflow #.entry3# 10
     cbgt x0, #63, .exit3
 .cold3:
-# FDATA: 1 irreversible #.cold3# 1
+# FDATA: 1 immediate_overflow #.cold3# 1
     mov x0, #1
     ret
 .exit3:
-# FDATA: 1 irreversible #.exit3# 10
+# FDATA: 1 immediate_overflow #.exit3# 10
+    mov x0, #2
+    ret
+
+# CHECK: <immediate_overflow>:
+# CHECK-NEXT:            {{.*}} cmp  x0, #0x3f
+# CHECK-NEXT:            {{.*}} b.le 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT:            {{.*}} mov  x0, #0x2 // =2
+# CHECK-NEXT:            {{.*}} ret
+# CHECK-NEXT: [[ADDR3]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT:            {{.*}} ret
+
+  .globl irreversible
+  .type irreversible, %function
+irreversible:
+.entry4:
+# FDATA: 1 irreversible #.entry4# 10
+    cmp x0, #63
+    cbgt x0, #63, .exit4
+.cold4:
+# FDATA: 1 irreversible #.cold4# 1
+    csel x0, x1, x2, le
+    ret
+.exit4:
+# FDATA: 1 irreversible #.exit4# 10
     mov x0, #2
     ret
 
 # BOLT-INFO: unable to swap successors in irreversible
 
 # CHECK: <irreversible>:
+# CHECK-NEXT:            {{.*}} cmp  x0, #0x3f
 # CHECK-NEXT:            {{.*}} cbgt x0, #0x3f, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
 # CHECK-NEXT:            {{.*}} b               0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
 # CHECK-NEXT: [[ADDR3]]: {{.*}} mov  x0, #0x2 // =2
 # CHECK-NEXT:            {{.*}} ret
-# CHECK-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT: [[ADDR4]]: {{.*}} csel x0, x1, x2, le
 # CHECK-NEXT:            {{.*}} ret
 
 ## Force relocation mode.
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp
index 7073b41f524f9..64b03ae6dad50 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -18,6 +18,9 @@
 
 #include "bolt/Core/BinaryBasicBlock.h"
 #include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/BinaryFunctionCallGraph.h"
+#include "bolt/Passes/BinaryPasses.h"
+#include "bolt/Passes/DataflowInfoManager.h"
 #include "bolt/Rewrite/RewriteInstance.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -136,8 +139,8 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
                            .addExpr(MCSymbolRefExpr::create(
                                TargetBB->getLabel(), *BC->Ctx.get()));
   ASSERT_TRUE(BC->MIB->isReversibleBranch(NeedsImmInc));
-  BC->MIB->reverseBranchCondition(NeedsImmInc, TargetBB->getLabel(),
-                                  BC->Ctx.get());
+  BC->MIB->reverseBranchCondition(/*ParentBB*/ nullptr, NeedsImmInc,
+                                  TargetBB->getLabel(), BC->Ctx.get());
   ASSERT_EQ(NeedsImmInc.getOpcode(), AArch64::CBLTXri);
   ASSERT_EQ(NeedsImmInc.getOperand(1).getImm(), 1);
 
@@ -150,8 +153,8 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
                            .addExpr(MCSymbolRefExpr::create(
                                TargetBB->getLabel(), *BC->Ctx.get()));
   ASSERT_TRUE(BC->MIB->isReversibleBranch(NeedsImmDec));
-  BC->MIB->reverseBranchCondition(NeedsImmDec, TargetBB->getLabel(),
-                                  BC->Ctx.get());
+  BC->MIB->reverseBranchCondition(/*ParentBB*/ nullptr, NeedsImmDec,
+                                  TargetBB->getLabel(), BC->Ctx.get());
   ASSERT_EQ(NeedsImmDec.getOpcode(), AArch64::CBHIXri);
   ASSERT_EQ(NeedsImmDec.getOperand(1).getImm(), 0);
 
@@ -164,8 +167,8 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
                                    .addExpr(MCSymbolRefExpr::create(
                                        TargetBB->getLabel(), *BC->Ctx.get()));
   ASSERT_TRUE(BC->MIB->isReversibleBranch(CompRegNeedsRegSwap));
-  BC->MIB->reverseBranchCondition(CompRegNeedsRegSwap, TargetBB->getLabel(),
-                                  BC->Ctx.get());
+  BC->MIB->reverseBranchCondition(/*ParentBB*/ nullptr, CompRegNeedsRegSwap,
+                                  TargetBB->getLabel(), BC->Ctx.get());
   ASSERT_EQ(CompRegNeedsRegSwap.getOpcode(), AArch64::CBGTXrr);
   ASSERT_EQ(CompRegNeedsRegSwap.getOperand(0).getReg(), AArch64::X1);
   ASSERT_EQ(CompRegNeedsRegSwap.getOperand(1).getReg(), AArch64::X0);
@@ -179,8 +182,8 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
                                     .addExpr(MCSymbolRefExpr::create(
                                         TargetBB->getLabel(), *BC->Ctx.get()));
   ASSERT_TRUE(BC->MIB->isReversibleBranch(CompByteNeedsRegSwap));
-  BC->MIB->reverseBranchCondition(CompByteNeedsRegSwap, TargetBB->getLabel(),
-                                  BC->Ctx.get());
+  BC->MIB->reverseBranchCondition(/*ParentBB*/ nullptr, CompByteNeedsRegSwap,
+                                  TargetBB->getLabel(), BC->Ctx.get());
   ASSERT_EQ(CompByteNeedsRegSwap.getOpcode(), AArch64::CBBHSWrr);
   ASSERT_EQ(CompByteNeedsRegSwap.getOperand(0).getReg(), AArch64::W1);
   ASSERT_EQ(CompByteNeedsRegSwap.getOperand(1).getReg(), AArch64::W0);
@@ -194,8 +197,8 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
                                     .addExpr(MCSymbolRefExpr::create(
                                         TargetBB->getLabel(), *BC->Ctx.get()));
   ASSERT_TRUE(BC->MIB->isReversibleBranch(CompHalfNeedsRegSwap));
-  BC->MIB->reverseBranchCondition(CompHalfNeedsRegSwap, TargetBB->getLabel(),
-                                  BC->Ctx.get());
+  BC->MIB->reverseBranchCondition(/*ParentBB*/ nullptr, CompHalfNeedsRegSwap,
+                                  TargetBB->getLabel(), BC->Ctx.get());
   ASSERT_EQ(CompHalfNeedsRegSwap.getOpcode(), AArch64::CBHHIWrr);
   ASSERT_EQ(CompHalfNeedsRegSwap.getOperand(0).getReg(), AArch64::W1);
   ASSERT_EQ(CompHalfNeedsRegSwap.getOperand(1).getReg(), AArch64::W0);
@@ -221,6 +224,123 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
   ASSERT_FALSE(BC->MIB->isReversibleBranch(Overflows));
 }
 
+TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch_Underflows) {
+  if (GetParam() != Triple::aarch64)
+    GTEST_SKIP();
+
+  BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+  BinaryBasicBlock *EntryBB = BF->addBasicBlock();
+  BinaryBasicBlock *FallThroughBB = BF->addBasicBlock();
+  BinaryBasicBlock *TargetBB = BF->addBasicBlock();
+  BF->addEntryPoint(*EntryBB);
+  EntryBB->addSuccessor(TargetBB);
+  EntryBB->addSuccessor(FallThroughBB);
+
+  // Inversion requires expansion, immediate value underflows.
+  // cblt x0, #0, target ~> cmp x0, #0
+  //                        b.ge target
+  auto I =
+      EntryBB->addInstruction(MCInstBuilder(AArch64::CBLTXri)
+                                  .addReg(AArch64::X0)
+                                  .addImm(0)
+                                  .addExpr(MCSymbolRefExpr::create(
+                                      TargetBB->getLabel(), *BC->Ctx.get())));
+  BinaryFunctionCallGraph CG(buildCallGraph(*BC.get()));
+  RegAnalysis RA(*BC.get(), &BC->getBinaryFunctions(), &CG);
+  DataflowInfoManager DIM(*BF, &RA, nullptr);
+
+  ASSERT_TRUE(BC->MIB->isReversibleBranch(*I, &DIM));
+  BC->MIB->reverseBranchCondition(EntryBB, *I, TargetBB->getLabel(),
+                                  BC->Ctx.get());
+  auto R = EntryBB->rbegin();
+  ASSERT_EQ(R->getOpcode(), AArch64::Bcc);
+  ASSERT_EQ(R->getOperand(0).getImm(), AArch64CC::GE);
+  R++;
+  ASSERT_EQ(R->getOpcode(), AArch64::SUBSXri);
+  ASSERT_EQ(R->getOperand(0).getReg(), AArch64::XZR);
+  ASSERT_EQ(R->getOperand(1).getReg(), AArch64::X0);
+  ASSERT_EQ(R->getOperand(2).getImm(), 0);
+  ASSERT_EQ(R->getOperand(3).getImm(), 0);
+}
+
+TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch_Overflows) {
+  if (GetParam() != Triple::aarch64)
+    GTEST_SKIP();
+
+  BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+  BinaryBasicBlock *EntryBB = BF->addBasicBlock();
+  BinaryBasicBlock *FallThroughBB = BF->addBasicBlock();
+  BinaryBasicBlock *TargetBB = BF->addBasicBlock();
+  BF->addEntryPoint(*EntryBB);
+  EntryBB->addSuccessor(TargetBB);
+  EntryBB->addSuccessor(FallThroughBB);
+
+  // Inversion requires expansion, immediate value overflows.
+  // cbhi w0, #63, target ~> cmp w0, #63
+  //                         b.ls target
+  auto I =
+      EntryBB->addInstruction(MCInstBuilder(AArch64::CBHIWri)
+                                  .addReg(AArch64::W0)
+                                  .addImm(63)
+                                  .addExpr(MCSymbolRefExpr::create(
+                                      TargetBB->getLabel(), *BC->Ctx.get())));
+  BinaryFunctionCallGraph CG(buildCallGraph(*BC.get()));
+  RegAnalysis RA(*BC.get(), &BC->getBinaryFunctions(), &CG);
+  DataflowInfoManager DIM(*BF, &RA, nullptr);
+
+  ASSERT_TRUE(BC->MIB->isReversibleBranch(*I, &DIM));
+  BC->MIB->reverseBranchCondition(EntryBB, *I, TargetBB->getLabel(),
+                                  BC->Ctx.get());
+  auto R = EntryBB->rbegin();
+  ASSERT_EQ(R->getOpcode(), AArch64::Bcc);
+  ASSERT_EQ(R->getOperand(0).getImm(), AArch64CC::LS);
+  R++;
+  ASSERT_EQ(R->getOpcode(), AArch64::SUBSWri);
+  ASSERT_EQ(R->getOperand(0).getReg(), AArch64::WZR);
+  ASSERT_EQ(R->getOperand(1).getReg(), AArch64::W0);
+  ASSERT_EQ(R->getOperand(2).getImm(), 63);
+  ASSERT_EQ(R->getOperand(3).getImm(), 0);
+}
+
+TEST_P(MCPlusBuilderTester, AArch64_IsReversibleBranch_LiveCondFlags) {
+  if (GetParam() != Triple::aarch64)
+    GTEST_SKIP();
+
+  BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+  BinaryBasicBlock *EntryBB = BF->addBasicBlock();
+  BinaryBasicBlock *FallThroughBB = BF->addBasicBlock();
+  BinaryBasicBlock *TargetBB = BF->addBasicBlock();
+  BF->addEntryPoint(*EntryBB);
+  EntryBB->addSuccessor(TargetBB);
+  EntryBB->addSuccessor(FallThroughBB);
+
+  // cmp x0, #63
+  EntryBB->addInstruction(MCInstBuilder(AArch64::SUBSXri)
+                              .addReg(AArch64::XZR)
+                              .addReg(AArch64::X0)
+                              .addImm(63)
+                              .addImm(0));
+  // cbgt x0, #63, target
+  auto I =
+      EntryBB->addInstruction(MCInstBuilder(AArch64::CBGTXri)
+                                  .addReg(AArch64::X0)
+                                  .addImm(63)
+                                  .addExpr(MCSymbolRefExpr::create(
+                                      TargetBB->getLabel(), *BC->Ctx.get())));
+  // csel x0, x1, x2, le
+  FallThroughBB->addInstruction(MCInstBuilder(AArch64::CSELXr)
+                                    .addReg(AArch64::X0)
+                                    .addReg(AArch64::X1)
+                                    .addReg(AArch64::X2)
+                                    .addImm(13));
+
+  BinaryFunctionCallGraph CG(buildCallGraph(*BC.get()));
+  RegAnalysis RA(*BC.get(), &BC->getBinaryFunctions(), &CG);
+  DataflowInfoManager DIM(*BF, &RA, nullptr);
+
+  ASSERT_FALSE(BC->MIB->isReversibleBranch(*I, &DIM));
+}
+
 TEST_P(MCPlusBuilderTester, AArch64_CmpJE) {
   if (GetParam() != Triple::aarch64)
     GTEST_SKIP();



More information about the llvm-commits mailing list