[llvm] [BOLT][AArch64] Transform {cmpbr ~> cmp + br} when inversion not possible (PR #185731)
Alexandros Lamprineas via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 10:09:58 PDT 2026
https://github.com/labrinea updated https://github.com/llvm/llvm-project/pull/185731
>From 96f1ccd93053b6c19026bb17b451e08ace19ce31 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Thu, 2 Apr 2026 16:54:12 +0100
Subject: [PATCH] [BOLT][AArch64] Transform cmpbr ~> cmp + br when inversion
not possible
When reordering blocks we may have to invert branches. Sometimes this isn't
possible for compare-and-branch instructions because the immediate value
would overflow/underflow after the adjustment. In such cases I am splitting
the instruction into a compare followed by a branch. For this to be legal
we should be sure that the condition flags are not being clobbered.
Liveness analysis may help here.
---
bolt/include/bolt/Core/BinaryFunction.h | 4 +-
bolt/include/bolt/Core/MCPlusBuilder.h | 7 +-
bolt/include/bolt/Passes/LongJmp.h | 4 +-
bolt/include/bolt/Utils/CommandLineOpts.h | 4 +
bolt/lib/Core/BinaryFunction.cpp | 7 +-
bolt/lib/Passes/BinaryPasses.cpp | 16 +-
bolt/lib/Passes/LongJmp.cpp | 26 +++-
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 86 +++++++++--
bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 3 +-
bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 3 +-
bolt/lib/Utils/CommandLineOpts.cpp | 5 +
.../AArch64/compare-and-branch-inversion.S | 44 ++++--
bolt/unittests/Core/MCPlusBuilder.cpp | 140 ++++++++++++++++--
13 files changed, 299 insertions(+), 50 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 0fdfcc5d76597..8a789421cde0f 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -64,6 +64,8 @@ class DWARFUnit;
namespace bolt {
+class DataflowInfoManager;
+
using InputOffsetToAddressMapTy = std::unordered_multimap<uint64_t, uint64_t>;
/// Types of macro-fusion alignment corrections.
@@ -2452,7 +2454,7 @@ class BinaryFunction {
/// while the second successor - false/fall-through branch.
///
/// When we reverse the branch condition, the CFG is updated accordingly.
- void fixBranches();
+ void fixBranches(DataflowInfoManager *DIM = nullptr);
/// Mark function as finalized. No further optimizations are permitted.
void setFinalized() { CurrentState = State::CFG_Finalized; }
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 0f6076688b65d..eecf01102def1 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -52,6 +52,7 @@ namespace bolt {
class BinaryBasicBlock;
class BinaryContext;
class BinaryFunction;
+class DataflowInfoManager;
/// Different types of indirect branches encountered during disassembly.
enum class IndirectBranchType : char {
@@ -477,7 +478,8 @@ class MCPlusBuilder {
}
/// Check whether this conditional branch can be reversed
- virtual bool isReversibleBranch(const MCInst &Inst) const {
+ virtual bool isReversibleBranch(const MCInst &Inst,
+ DataflowInfoManager *DIM = nullptr) const {
assert(!isUnsupportedInstruction(Inst) && isConditionalBranch(Inst) &&
"Instruction is not known conditional branch");
@@ -2135,7 +2137,8 @@ class MCPlusBuilder {
}
/// Reverses the branch condition in Inst and update its taken target to TBB.
- virtual void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
+ virtual void reverseBranchCondition(BinaryBasicBlock *Parent, MCInst &Inst,
+ const MCSymbol *TBB,
MCContext *Ctx) const {
llvm_unreachable("not implemented");
}
diff --git a/bolt/include/bolt/Passes/LongJmp.h b/bolt/include/bolt/Passes/LongJmp.h
index 4633d30104d43..ccdebf9e1ed0a 100644
--- a/bolt/include/bolt/Passes/LongJmp.h
+++ b/bolt/include/bolt/Passes/LongJmp.h
@@ -14,6 +14,8 @@
namespace llvm {
namespace bolt {
+class DataflowInfoManager;
+
/// LongJmp is veneer-insertion pass originally written for AArch64 that
/// compensates for its short-range branches, typically done during linking. We
/// pull this pass inside BOLT because here we can do a better job at stub
@@ -74,7 +76,7 @@ class LongJmpPass : public BinaryFunctionPass {
/// Relax all internal function branches including those between fragments.
/// Assume that fragments are placed in different sections but are within
/// 128MB of each other.
- void relaxLocalBranches(BinaryFunction &BF);
+ void relaxLocalBranches(BinaryFunction &BF, DataflowInfoManager *DIM);
/// -- Layout estimation methods --
/// Try to do layout before running the emitter, by looking at BinaryFunctions
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index fcea952919f11..d7f2b27ced738 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -124,6 +124,10 @@ extern llvm::cl::opt<bool> UpdateDebugSections;
// dbgs() for output within DEBUG().
extern llvm::cl::opt<unsigned> Verbosity;
+// Option to control whether liveness analysis should be used by
+// FixupBranches and LongJmpPass.
+extern llvm::cl::opt<bool> LivenessAnalysis;
+
/// Return true if we should process all functions in the binary.
bool processAllFunctions();
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index c5aefe685de34..7431c753c0ee4 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -3620,7 +3620,7 @@ bool BinaryFunction::validateCFG() const {
return true;
}
-void BinaryFunction::fixBranches() {
+void BinaryFunction::fixBranches(DataflowInfoManager *DIM) {
assert(isSimple() && "Expected function with valid CFG.");
auto &MIB = BC.MIB;
@@ -3679,7 +3679,7 @@ void BinaryFunction::fixBranches() {
// Reverse branch condition and swap successors.
auto swapSuccessors = [&]() {
- if (!MIB->isReversibleBranch(*CondBranch)) {
+ if (!MIB->isReversibleBranch(*CondBranch, DIM)) {
if (opts::Verbosity) {
BC.outs() << "BOLT-INFO: unable to swap successors in " << *this
<< '\n';
@@ -3689,7 +3689,8 @@ void BinaryFunction::fixBranches() {
std::swap(TSuccessor, FSuccessor);
BB->swapConditionalSuccessors();
auto L = BC.scopeLock();
- MIB->reverseBranchCondition(*CondBranch, TSuccessor->getLabel(), Ctx);
+ MIB->reverseBranchCondition(BB, *CondBranch, TSuccessor->getLabel(),
+ Ctx);
return true;
};
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 480d0cef58f43..98be39233b7b2 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -11,8 +11,10 @@
//===----------------------------------------------------------------------===//
#include "bolt/Passes/BinaryPasses.h"
+#include "bolt/Core/BinaryFunctionCallGraph.h"
#include "bolt/Core/FunctionLayout.h"
#include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Passes/DataflowInfoManager.h"
#include "bolt/Passes/ReorderAlgorithm.h"
#include "bolt/Passes/ReorderFunctions.h"
#include "bolt/Utils/CommandLineOpts.h"
@@ -544,12 +546,22 @@ bool ReorderBasicBlocks::modifyFunctionLayout(BinaryFunction &BF,
}
Error FixupBranches::runOnFunctions(BinaryContext &BC) {
+ std::unique_ptr<BinaryFunctionCallGraph> CG;
+ std::unique_ptr<RegAnalysis> RA;
+ std::unique_ptr<DataflowInfoManager> DIM;
+
+ if (opts::LivenessAnalysis) {
+ CG = std::make_unique<BinaryFunctionCallGraph>(buildCallGraph(BC));
+ RA = std::make_unique<RegAnalysis>(BC, &BC.getBinaryFunctions(), CG.get());
+ }
for (auto &It : BC.getBinaryFunctions()) {
BinaryFunction &Function = It.second;
if (!BC.shouldEmit(Function) || !Function.isSimple())
continue;
- Function.fixBranches();
+ if (opts::LivenessAnalysis)
+ DIM = std::make_unique<DataflowInfoManager>(Function, RA.get(), nullptr);
+ Function.fixBranches(DIM.get());
}
return Error::success();
}
@@ -960,7 +972,7 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryFunction &BF) {
uint64_t Count = 0;
if (CondSucc != BB) {
// Patch the new target address into the conditional branch.
- MIB->reverseBranchCondition(*CondBranch, CalleeSymbol, Ctx);
+ MIB->reverseBranchCondition(PredBB, *CondBranch, CalleeSymbol, Ctx);
// Since we reversed the condition on the branch we need to change
// the target for the unconditional branch or add a unconditional
// branch to the old target. This has to be done manually since
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index 7744cf08defa9..1317f88fed627 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -11,7 +11,9 @@
//===----------------------------------------------------------------------===//
#include "bolt/Passes/LongJmp.h"
+#include "bolt/Core/BinaryFunctionCallGraph.h"
#include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Passes/DataflowInfoManager.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/MathExtras.h"
@@ -651,7 +653,8 @@ Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) {
return Error::success();
}
-void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
+void LongJmpPass::relaxLocalBranches(BinaryFunction &BF,
+ DataflowInfoManager *DIM) {
BinaryContext &BC = BF.getBinaryContext();
auto &MIB = BC.MIB;
@@ -821,7 +824,7 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
// If the other successor is a fall-through, invert the condition code.
BinaryBasicBlock *NextBB =
BF->getLayout().getBasicBlockAfter(BB, /*IgnoreSplits*/ false);
- bool IsReversibleBranch = MIB->isReversibleBranch(Inst);
+ bool IsReversibleBranch = MIB->isReversibleBranch(Inst, DIM);
bool ShouldReverseBranch = BB->getConditionalSuccessor(false) == NextBB;
// Create a trampoline basic block for the fall-through target of the
@@ -839,7 +842,7 @@ void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
if (ShouldReverseBranch && IsReversibleBranch) {
BB->swapConditionalSuccessors();
auto L = BC.scopeLock();
- MIB->reverseBranchCondition(Inst, NextBB->getLabel(), BC.Ctx.get());
+ MIB->reverseBranchCondition(BB, Inst, NextBB->getLabel(), BC.Ctx.get());
} else {
auto L = BC.scopeLock();
MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get());
@@ -924,12 +927,23 @@ Error LongJmpPass::runOnFunctions(BinaryContext &BC) {
opts::SplitStrategy != opts::SplitFunctionsStrategy::CDSplit) &&
"LongJmp cannot work with functions split in more than two fragments");
+ std::unique_ptr<BinaryFunctionCallGraph> CG;
+ std::unique_ptr<RegAnalysis> RA;
+ std::unique_ptr<DataflowInfoManager> DIM;
+
+ if (opts::LivenessAnalysis) {
+ CG = std::make_unique<BinaryFunctionCallGraph>(buildCallGraph(BC));
+ RA = std::make_unique<RegAnalysis>(BC, &BC.getBinaryFunctions(), CG.get());
+ }
+
if (opts::CompactCodeModel) {
BC.outs()
<< "BOLT-INFO: relaxing branches for compact code model (<128MB)\n";
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
- relaxLocalBranches(BF);
+ if (opts::LivenessAnalysis)
+ DIM = std::make_unique<DataflowInfoManager>(BF, RA.get(), nullptr);
+ relaxLocalBranches(BF, DIM.get());
};
ParallelUtilities::PredicateTy SkipPredicate =
@@ -954,12 +968,14 @@ Error LongJmpPass::runOnFunctions(BinaryContext &BC) {
tentativeLayout(BC, Sorted);
updateStubGroups();
for (BinaryFunction *Func : Sorted) {
+ if (opts::LivenessAnalysis)
+ DIM = std::make_unique<DataflowInfoManager>(*Func, RA.get(), nullptr);
if (auto E = relax(*Func, Modified))
return Error(std::move(E));
// Don't ruin non-simple functions, they can't afford to have the layout
// changed.
if (Modified && Func->isSimple())
- Func->fixBranches();
+ Func->fixBranches(DIM.get());
}
} while (Modified);
BC.outs() << "BOLT-INFO: Inserted " << NumHotStubs
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index b4c08cf1f6153..2bbcd963c6b94 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -21,6 +21,7 @@
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/MCInstUtils.h"
#include "bolt/Core/MCPlusBuilder.h"
+#include "bolt/Passes/DataflowInfoManager.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -1971,6 +1972,25 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
exit(1);
}
+ unsigned getInvertedCC(unsigned Opcode) const {
+ // clang-format off
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Failed to invert condition code");
+ return Opcode;
+ // Compare register with immediate and branch.
+ case AArch64::CBGTWri: return AArch64CC::LE;
+ case AArch64::CBGTXri: return AArch64CC::LE;
+ case AArch64::CBLTWri: return AArch64CC::GE;
+ case AArch64::CBLTXri: return AArch64CC::GE;
+ case AArch64::CBHIWri: return AArch64CC::LS;
+ case AArch64::CBHIXri: return AArch64CC::LS;
+ case AArch64::CBLOWri: return AArch64CC::HS;
+ case AArch64::CBLOXri: return AArch64CC::HS;
+ }
+ // clang-format on
+ }
+
unsigned getInvertedBranchOpcode(unsigned Opcode) const {
// clang-format off
switch (Opcode) {
@@ -2097,38 +2117,74 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
}
- bool isReversibleBranch(const MCInst &Inst) const override {
+ bool is32BitVariant(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ return false;
+ case AArch64::CBGTWri:
+ case AArch64::CBLTWri:
+ case AArch64::CBHIWri:
+ case AArch64::CBLOWri:
+ return true;
+ }
+ }
+
+ bool isReversibleBranch(const MCInst &Inst,
+ DataflowInfoManager *DIM = nullptr) const override {
if (isCompAndBranch(Inst)) {
+ bool IsReversible =
+ DIM ? !DIM->getLivenessAnalysis().getLiveIn(Inst).test(getFlagsReg())
+ : false;
unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
if (needsImmDec(InvertedOpcode) && Inst.getOperand(1).getImm() == 0)
- return false;
+ return IsReversible;
if (needsImmInc(InvertedOpcode) && Inst.getOperand(1).getImm() == 63)
- return false;
+ return IsReversible;
}
return MCPlusBuilder::isReversibleBranch(Inst);
}
- void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
+ void reverseBranchCondition(BinaryBasicBlock *Parent, MCInst &Inst,
+ const MCSymbol *TBB,
MCContext *Ctx) const override {
- if (!isReversibleBranch(Inst)) {
- errs() << "BOLT-ERROR: Cannot reverse branch " << Inst << "\n";
- exit(1);
- }
-
if (isTB(Inst) || isCB(Inst) || isCompAndBranch(Inst)) {
+ bool ImmediateOutOfBounds = false;
unsigned InvertedOpcode = getInvertedBranchOpcode(Inst.getOpcode());
- Inst.setOpcode(InvertedOpcode);
- assert(Inst.getOpcode() != 0 && "Invalid branch instruction");
+ assert(InvertedOpcode != 0 && "Invalid branch instruction");
// The FEAT_CMPBR compare-and-branch instructions cannot encode all
// the possible condition codes, therefore we either have to adjust
// the immediate value by +-1, or to swap the register operands
// when reversing the branch condition.
if (needsRegSwap(InvertedOpcode))
std::swap(Inst.getOperand(0), Inst.getOperand(1));
- else if (needsImmDec(InvertedOpcode))
- Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() - 1);
- else if (needsImmInc(InvertedOpcode))
- Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() + 1);
+ else if (needsImmDec(InvertedOpcode)) {
+ if (Inst.getOperand(1).getImm() == 0)
+ ImmediateOutOfBounds = true;
+ else
+ Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() - 1);
+ } else if (needsImmInc(InvertedOpcode)) {
+ if (Inst.getOperand(1).getImm() == 63)
+ ImmediateOutOfBounds = true;
+ else
+ Inst.getOperand(1).setImm(Inst.getOperand(1).getImm() + 1);
+ }
+ if (ImmediateOutOfBounds) {
+ InstructionListType Code;
+ MCInstBuilder Cmp =
+ is32BitVariant(InvertedOpcode)
+ ? MCInstBuilder(AArch64::SUBSWri).addReg(AArch64::WZR)
+ : MCInstBuilder(AArch64::SUBSXri).addReg(AArch64::XZR);
+ Cmp.addReg(Inst.getOperand(0).getReg())
+ .addImm(Inst.getOperand(1).getImm())
+ .addImm(0);
+ Code.emplace_back(std::move(Cmp));
+ Code.emplace_back(MCInstBuilder(AArch64::Bcc)
+ .addImm(getInvertedCC(Inst.getOpcode()))
+ .addExpr(MCSymbolRefExpr::create(TBB, *Ctx)));
+ Parent->replaceInstruction(Parent->findInstruction(&Inst), Code);
+ return;
+ }
+ Inst.setOpcode(InvertedOpcode);
} else if (Inst.getOpcode() == AArch64::Bcc) {
Inst.getOperand(0).setImm(AArch64CC::getInvertedCondCode(
static_cast<AArch64CC::CondCode>(Inst.getOperand(0).getImm())));
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
index 2f0291f3c8175..90d9bf07223ee 100644
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
@@ -154,7 +154,8 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
}
}
- void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
+ void reverseBranchCondition(BinaryBasicBlock *Parent, MCInst &Inst,
+ const MCSymbol *TBB,
MCContext *Ctx) const override {
auto Opcode = getInvertedBranchOpcode(Inst.getOpcode());
Inst.setOpcode(Opcode);
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index aa1a70e40669a..441564f22e572 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -2806,7 +2806,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
Inst.addOperand(MCOperand::createImm(CC));
}
- void reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
+ void reverseBranchCondition(BinaryBasicBlock *Parent, MCInst &Inst,
+ const MCSymbol *TBB,
MCContext *Ctx) const override {
unsigned InvCC = getInvertedCondCode(getCondCode(Inst));
assert(InvCC != X86::COND_INVALID && "invalid branch instruction");
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index b7eb209af8aca..fef2e4e3b8c70 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -308,6 +308,11 @@ cl::opt<unsigned>
cl::init(0), cl::ZeroOrMore, cl::cat(BoltCategory),
cl::sub(cl::SubCommand::getAll()));
+cl::opt<bool> LivenessAnalysis(
+ "liveness-analysis",
+ cl::desc("use liveness analysis in FixupBranches and LongJmpPass"),
+ cl::init(false), cl::cat(BoltCategory));
+
bool processAllFunctions() {
if (opts::AggregateOnly)
return false;
diff --git a/bolt/test/AArch64/compare-and-branch-inversion.S b/bolt/test/AArch64/compare-and-branch-inversion.S
index 28167416c31cb..37ea8d504f744 100644
--- a/bolt/test/AArch64/compare-and-branch-inversion.S
+++ b/bolt/test/AArch64/compare-and-branch-inversion.S
@@ -1,7 +1,8 @@
# This test checks that branch inversion works when reordering blocks which
# contain short range conditional branches. Handles edge cases, like when
# the immediate value is the upper or lower allowed value in which case the
-# transformation bails.
+# transformation bails. If liveness analysis proves that the condition flags
+# are dead we can replace the branch with cmp + b.cc
# REQUIRES: system-linux, asserts
@@ -9,7 +10,7 @@
# RUN: link_fdata --no-lbr %s %t %t.fdata
# RUN: llvm-strip --strip-unneeded %t
# RUN: llvm-bolt -v=1 %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp --compact-code-model \
-# RUN: | FileCheck %s --check-prefix=BOLT-INFO
+# RUN: --liveness-analysis | FileCheck %s --check-prefix=BOLT-INFO
# RUN: llvm-objdump -d %t.bolt | FileCheck %s
# CHECK: Disassembly of section .text:
@@ -80,29 +81,54 @@ register_swap:
# CHECK-NEXT: [[ADDR2]]: {{.*}} mov x0, #0x1 // =1
# CHECK-NEXT: {{.*}} ret
- .globl irreversible
- .type irreversible, %function
-irreversible:
+ .globl immediate_overflow
+ .type immediate_overflow, %function
+immediate_overflow:
.entry3:
-# FDATA: 1 irreversible #.entry3# 10
+# FDATA: 1 immediate_overflow #.entry3# 10
cbgt x0, #63, .exit3
.cold3:
-# FDATA: 1 irreversible #.cold3# 1
+# FDATA: 1 immediate_overflow #.cold3# 1
mov x0, #1
ret
.exit3:
-# FDATA: 1 irreversible #.exit3# 10
+# FDATA: 1 immediate_overflow #.exit3# 10
+ mov x0, #2
+ ret
+
+# CHECK: <immediate_overflow>:
+# CHECK-NEXT: {{.*}} cmp x0, #0x3f
+# CHECK-NEXT: {{.*}} b.le 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
+# CHECK-NEXT: {{.*}} mov x0, #0x2 // =2
+# CHECK-NEXT: {{.*}} ret
+# CHECK-NEXT: [[ADDR3]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT: {{.*}} ret
+
+ .globl irreversible
+ .type irreversible, %function
+irreversible:
+.entry4:
+# FDATA: 1 irreversible #.entry4# 10
+ cmp x0, #63
+ cbgt x0, #63, .exit4
+.cold4:
+# FDATA: 1 irreversible #.cold4# 1
+ csel x0, x1, x2, le
+ ret
+.exit4:
+# FDATA: 1 irreversible #.exit4# 10
mov x0, #2
ret
# BOLT-INFO: unable to swap successors in irreversible
# CHECK: <irreversible>:
+# CHECK-NEXT: {{.*}} cmp x0, #0x3f
# CHECK-NEXT: {{.*}} cbgt x0, #0x3f, 0x[[ADDR3:[0-9a-f]+]] <{{.*}}>
# CHECK-NEXT: {{.*}} b 0x[[ADDR4:[0-9a-f]+]] <{{.*}}>
# CHECK-NEXT: [[ADDR3]]: {{.*}} mov x0, #0x2 // =2
# CHECK-NEXT: {{.*}} ret
-# CHECK-NEXT: [[ADDR4]]: {{.*}} mov x0, #0x1 // =1
+# CHECK-NEXT: [[ADDR4]]: {{.*}} csel x0, x1, x2, le
# CHECK-NEXT: {{.*}} ret
## Force relocation mode.
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp
index 7073b41f524f9..64b03ae6dad50 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -18,6 +18,9 @@
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/BinaryFunctionCallGraph.h"
+#include "bolt/Passes/BinaryPasses.h"
+#include "bolt/Passes/DataflowInfoManager.h"
#include "bolt/Rewrite/RewriteInstance.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -136,8 +139,8 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
.addExpr(MCSymbolRefExpr::create(
TargetBB->getLabel(), *BC->Ctx.get()));
ASSERT_TRUE(BC->MIB->isReversibleBranch(NeedsImmInc));
- BC->MIB->reverseBranchCondition(NeedsImmInc, TargetBB->getLabel(),
- BC->Ctx.get());
+ BC->MIB->reverseBranchCondition(/*ParentBB*/ nullptr, NeedsImmInc,
+ TargetBB->getLabel(), BC->Ctx.get());
ASSERT_EQ(NeedsImmInc.getOpcode(), AArch64::CBLTXri);
ASSERT_EQ(NeedsImmInc.getOperand(1).getImm(), 1);
@@ -150,8 +153,8 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
.addExpr(MCSymbolRefExpr::create(
TargetBB->getLabel(), *BC->Ctx.get()));
ASSERT_TRUE(BC->MIB->isReversibleBranch(NeedsImmDec));
- BC->MIB->reverseBranchCondition(NeedsImmDec, TargetBB->getLabel(),
- BC->Ctx.get());
+ BC->MIB->reverseBranchCondition(/*ParentBB*/ nullptr, NeedsImmDec,
+ TargetBB->getLabel(), BC->Ctx.get());
ASSERT_EQ(NeedsImmDec.getOpcode(), AArch64::CBHIXri);
ASSERT_EQ(NeedsImmDec.getOperand(1).getImm(), 0);
@@ -164,8 +167,8 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
.addExpr(MCSymbolRefExpr::create(
TargetBB->getLabel(), *BC->Ctx.get()));
ASSERT_TRUE(BC->MIB->isReversibleBranch(CompRegNeedsRegSwap));
- BC->MIB->reverseBranchCondition(CompRegNeedsRegSwap, TargetBB->getLabel(),
- BC->Ctx.get());
+ BC->MIB->reverseBranchCondition(/*ParentBB*/ nullptr, CompRegNeedsRegSwap,
+ TargetBB->getLabel(), BC->Ctx.get());
ASSERT_EQ(CompRegNeedsRegSwap.getOpcode(), AArch64::CBGTXrr);
ASSERT_EQ(CompRegNeedsRegSwap.getOperand(0).getReg(), AArch64::X1);
ASSERT_EQ(CompRegNeedsRegSwap.getOperand(1).getReg(), AArch64::X0);
@@ -179,8 +182,8 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
.addExpr(MCSymbolRefExpr::create(
TargetBB->getLabel(), *BC->Ctx.get()));
ASSERT_TRUE(BC->MIB->isReversibleBranch(CompByteNeedsRegSwap));
- BC->MIB->reverseBranchCondition(CompByteNeedsRegSwap, TargetBB->getLabel(),
- BC->Ctx.get());
+ BC->MIB->reverseBranchCondition(/*ParentBB*/ nullptr, CompByteNeedsRegSwap,
+ TargetBB->getLabel(), BC->Ctx.get());
ASSERT_EQ(CompByteNeedsRegSwap.getOpcode(), AArch64::CBBHSWrr);
ASSERT_EQ(CompByteNeedsRegSwap.getOperand(0).getReg(), AArch64::W1);
ASSERT_EQ(CompByteNeedsRegSwap.getOperand(1).getReg(), AArch64::W0);
@@ -194,8 +197,8 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
.addExpr(MCSymbolRefExpr::create(
TargetBB->getLabel(), *BC->Ctx.get()));
ASSERT_TRUE(BC->MIB->isReversibleBranch(CompHalfNeedsRegSwap));
- BC->MIB->reverseBranchCondition(CompHalfNeedsRegSwap, TargetBB->getLabel(),
- BC->Ctx.get());
+ BC->MIB->reverseBranchCondition(/*ParentBB*/ nullptr, CompHalfNeedsRegSwap,
+ TargetBB->getLabel(), BC->Ctx.get());
ASSERT_EQ(CompHalfNeedsRegSwap.getOpcode(), AArch64::CBHHIWrr);
ASSERT_EQ(CompHalfNeedsRegSwap.getOperand(0).getReg(), AArch64::W1);
ASSERT_EQ(CompHalfNeedsRegSwap.getOperand(1).getReg(), AArch64::W0);
@@ -221,6 +224,123 @@ TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch) {
ASSERT_FALSE(BC->MIB->isReversibleBranch(Overflows));
}
+TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch_Underflows) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+
+ BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+ BinaryBasicBlock *EntryBB = BF->addBasicBlock();
+ BinaryBasicBlock *FallThroughBB = BF->addBasicBlock();
+ BinaryBasicBlock *TargetBB = BF->addBasicBlock();
+ BF->addEntryPoint(*EntryBB);
+ EntryBB->addSuccessor(TargetBB);
+ EntryBB->addSuccessor(FallThroughBB);
+
+ // Inversion requires expansion, immediate value underflows.
+ // cblt x0, #0, target ~> cmp x0, #0
+ // b.ge target
+ auto I =
+ EntryBB->addInstruction(MCInstBuilder(AArch64::CBLTXri)
+ .addReg(AArch64::X0)
+ .addImm(0)
+ .addExpr(MCSymbolRefExpr::create(
+ TargetBB->getLabel(), *BC->Ctx.get())));
+ BinaryFunctionCallGraph CG(buildCallGraph(*BC.get()));
+ RegAnalysis RA(*BC.get(), &BC->getBinaryFunctions(), &CG);
+ DataflowInfoManager DIM(*BF, &RA, nullptr);
+
+ ASSERT_TRUE(BC->MIB->isReversibleBranch(*I, &DIM));
+ BC->MIB->reverseBranchCondition(EntryBB, *I, TargetBB->getLabel(),
+ BC->Ctx.get());
+ auto R = EntryBB->rbegin();
+ ASSERT_EQ(R->getOpcode(), AArch64::Bcc);
+ ASSERT_EQ(R->getOperand(0).getImm(), AArch64CC::GE);
+ R++;
+ ASSERT_EQ(R->getOpcode(), AArch64::SUBSXri);
+ ASSERT_EQ(R->getOperand(0).getReg(), AArch64::XZR);
+ ASSERT_EQ(R->getOperand(1).getReg(), AArch64::X0);
+ ASSERT_EQ(R->getOperand(2).getImm(), 0);
+ ASSERT_EQ(R->getOperand(3).getImm(), 0);
+}
+
+TEST_P(MCPlusBuilderTester, AArch64_ReverseCompAndBranch_Overflows) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+
+ BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+ BinaryBasicBlock *EntryBB = BF->addBasicBlock();
+ BinaryBasicBlock *FallThroughBB = BF->addBasicBlock();
+ BinaryBasicBlock *TargetBB = BF->addBasicBlock();
+ BF->addEntryPoint(*EntryBB);
+ EntryBB->addSuccessor(TargetBB);
+ EntryBB->addSuccessor(FallThroughBB);
+
+ // Inversion requires expansion, immediate value overflows.
+ // cbhi w0, #63, target ~> cmp w0, #63
+ // b.ls target
+ auto I =
+ EntryBB->addInstruction(MCInstBuilder(AArch64::CBHIWri)
+ .addReg(AArch64::W0)
+ .addImm(63)
+ .addExpr(MCSymbolRefExpr::create(
+ TargetBB->getLabel(), *BC->Ctx.get())));
+ BinaryFunctionCallGraph CG(buildCallGraph(*BC.get()));
+ RegAnalysis RA(*BC.get(), &BC->getBinaryFunctions(), &CG);
+ DataflowInfoManager DIM(*BF, &RA, nullptr);
+
+ ASSERT_TRUE(BC->MIB->isReversibleBranch(*I, &DIM));
+ BC->MIB->reverseBranchCondition(EntryBB, *I, TargetBB->getLabel(),
+ BC->Ctx.get());
+ auto R = EntryBB->rbegin();
+ ASSERT_EQ(R->getOpcode(), AArch64::Bcc);
+ ASSERT_EQ(R->getOperand(0).getImm(), AArch64CC::LS);
+ R++;
+ ASSERT_EQ(R->getOpcode(), AArch64::SUBSWri);
+ ASSERT_EQ(R->getOperand(0).getReg(), AArch64::WZR);
+ ASSERT_EQ(R->getOperand(1).getReg(), AArch64::W0);
+ ASSERT_EQ(R->getOperand(2).getImm(), 63);
+ ASSERT_EQ(R->getOperand(3).getImm(), 0);
+}
+
+TEST_P(MCPlusBuilderTester, AArch64_IsReversibleBranch_LiveCondFlags) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+
+ BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+ BinaryBasicBlock *EntryBB = BF->addBasicBlock();
+ BinaryBasicBlock *FallThroughBB = BF->addBasicBlock();
+ BinaryBasicBlock *TargetBB = BF->addBasicBlock();
+ BF->addEntryPoint(*EntryBB);
+ EntryBB->addSuccessor(TargetBB);
+ EntryBB->addSuccessor(FallThroughBB);
+
+ // cmp x0, #63
+ EntryBB->addInstruction(MCInstBuilder(AArch64::SUBSXri)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X0)
+ .addImm(63)
+ .addImm(0));
+ // cbgt x0, #63, target
+ auto I =
+ EntryBB->addInstruction(MCInstBuilder(AArch64::CBGTXri)
+ .addReg(AArch64::X0)
+ .addImm(63)
+ .addExpr(MCSymbolRefExpr::create(
+ TargetBB->getLabel(), *BC->Ctx.get())));
+ // csel x0, x1, x2, le
+ FallThroughBB->addInstruction(MCInstBuilder(AArch64::CSELXr)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X1)
+ .addReg(AArch64::X2)
+ .addImm(13));
+
+ BinaryFunctionCallGraph CG(buildCallGraph(*BC.get()));
+ RegAnalysis RA(*BC.get(), &BC->getBinaryFunctions(), &CG);
+ DataflowInfoManager DIM(*BF, &RA, nullptr);
+
+ ASSERT_FALSE(BC->MIB->isReversibleBranch(*I, &DIM));
+}
+
TEST_P(MCPlusBuilderTester, AArch64_CmpJE) {
if (GetParam() != Triple::aarch64)
GTEST_SKIP();
More information about the llvm-commits
mailing list