[llvm] Add option for two-way branch optimization. (PR #161419)
Rahman Lavaee via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 30 11:40:15 PDT 2025
https://github.com/rlavaee created https://github.com/llvm/llvm-project/pull/161419
Our internal experiments show that in highly-optimized code, reversing the current compiler strategy for two-way branches can be beneficial (neutral to 0.2% win). Specifically, if we form a fallthrough (through the subsequent jmp) to the most likely successor, it can benefit static branch prediction since branches are initially assumed not-taken by most modern processors. This is especially important for binaries with split functions where the function is split into multiple code regions and speculative wrong predictions can incur high iTLB and icache misses.
Though our experiments are still ongoing (specifically to analyze the impact on ARM and different types of PGO), we want to support controlling the optimizing via a flag `two-way-branch-opt` which will take one of three values: none, hot-fallthrough, and cold-fallthrough. The current compiler strategy is cold-fallthrough and will remain intact.
>From 22e299d0f0a6b87a00b1b156ce9f11e651fd2f10 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Fri, 26 Sep 2025 21:23:32 +0000
Subject: [PATCH] Add option for two-way branch layout.
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 45 ++++++++++--
.../X86/code_placement_2_way_branch.ll | 70 +++++++++++++++++++
2 files changed, 111 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/code_placement_2_way_branch.ll
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index e9c75f0753f89..d0b5f5145f384 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -153,6 +153,36 @@ static cl::opt<unsigned> MisfetchCost(
static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
cl::desc("Cost of jump instructions."),
cl::init(1), cl::Hidden);
+
+// This enum controls how to optimize two-way branches (a conditional branch
+// immediately followed by an unconditional one). The goal is to optimize for
+// branch prediction and instruction cache efficiency.
+enum class TwoWayBranchOptStrategy {
+ // Do not reverse the condition. Leave the branch code as is.
+ None,
+ // For a two-way branch, make the hot path the fallthrough path. This is more
+ // friendly to static branch prediction (predict not-taken).
+ HotPathFallthrough,
+ // For a two-way branch, make the cold path the fallthrough path. This
+ // improves
+ // i-cache efficiency as the unconditional branch is fetched less often.
+ ColdPathFallthrough
+};
+
+static cl::opt<TwoWayBranchOptStrategy> TwoWayBranchOpt(
+ "two-way-branch-opt", cl::Hidden,
+ cl::desc("Select the optimization strategy for two-way conditional branches:"),
+ cl::values(
+ clEnumValN(TwoWayBranchOptStrategy::None, "none",
+ "Avoid optimizing the two-way branches."),
+ clEnumValN(
+ TwoWayBranchOptStrategy::HotPathFallthrough, "hot-fallthrough",
+ "Make the hot path the fallthrough path for two-way branches"),
+ clEnumValN(
+ TwoWayBranchOptStrategy::ColdPathFallthrough, "cold-fallthrough",
+ "Make the cold path the fallthrough path for two-way branches")),
+ cl::init(TwoWayBranchOptStrategy::ColdPathFallthrough));
+
static cl::opt<bool>
TailDupPlacement("tail-dup-placement",
cl::desc("Perform tail duplication during placement. "
@@ -2979,10 +3009,17 @@ void MachineBlockPlacement::optimizeBranches() {
// instructions which will benefit ICF.
if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()))
continue;
- // If ChainBB has a two-way branch, try to re-order the branches
- // such that we branch to the successor with higher probability first.
- if (MBPI->getEdgeProbability(ChainBB, TBB) >=
- MBPI->getEdgeProbability(ChainBB, FBB))
+ // ChainBB has a two-way branch. Reorder the branch based on
+ // `-two-way-branch-opt`;
+ auto TBBProb = MBPI->getEdgeProbability(ChainBB, TBB);
+ auto FBBProb = MBPI->getEdgeProbability(ChainBB, FBB);
+ bool ReverseBranch =
+ (TwoWayBranchOpt ==
+ TwoWayBranchOptStrategy::ColdPathFallthrough &&
+ (FBBProb > TBBProb)) ||
+ (TwoWayBranchOpt == TwoWayBranchOptStrategy::HotPathFallthrough &&
+ (TBBProb > FBBProb));
+ if (!ReverseBranch)
continue;
if (TII->reverseBranchCondition(Cond))
continue;
diff --git a/llvm/test/CodeGen/X86/code_placement_2_way_branch.ll b/llvm/test/CodeGen/X86/code_placement_2_way_branch.ll
new file mode 100644
index 0000000000000..3afa793e71ec7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/code_placement_2_way_branch.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=x86_64-linux -verify-machineinstrs -two-way-branch-opt=cold-fallthrough < %s | FileCheck %s --check-prefixes=CHECK,COLD-FT
+; RUN: llc -mtriple=x86_64-linux -verify-machineinstrs -two-way-branch-opt=none < %s | FileCheck %s --check-prefixes=CHECK,COLD-FT
+; RUN: llc -mtriple=x86_64-linux -verify-machineinstrs -two-way-branch-opt=hot-fallthrough < %s | FileCheck %s --check-prefixes=CHECK,HOT-FT
+
+define void @foo() !prof !1 {
+; Test that two-way branches are optimized based on `-two-way-branch-opt`.
+;
+; +--------+ 5 +--------+
+; | if.then| <---- | entry |
+; +--------+ +--------+
+; | | |
+; | | | 10
+; | | v
+; | | +--------+
+; | | | if.else|
+; | | +--------+
+; | | |
+; | | | 10
+; | | v
+; | | 4 +--------+
+; | +---------> | if.end |
+; | +--------+
+; | |
+; | | 14
+; | v
+; | 1 +--------+
+; +------------> | end |
+; +--------+
+;
+; CHECK-LABEL: foo:
+; CHECK: if.else
+; CHECK: .LBB0_3: # %if.end
+; CHECK: .LBB0_4: # %end
+; CHECK: if.then
+; COLD-FT: jne .LBB0_3
+; HOT-FT: je .LBB0_4
+; COLD-FT: jmp .LBB0_4
+; HOT-FT: jmp .LBB0_3
+
+entry:
+ call void @e()
+ %call1 = call zeroext i1 @a()
+ br i1 %call1, label %if.then, label %if.else, !prof !2
+
+if.then:
+ call void @f()
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %if.end, label %end, !prof !3
+
+if.else:
+ call void @g()
+ br label %if.end
+
+if.end:
+ call void @h()
+ br label %end
+
+end:
+ ret void
+}
+
+declare zeroext i1 @a()
+declare void @e()
+declare void @g()
+declare void @f()
+declare void @h()
+
+!1 = !{!"function_entry_count", i64 15}
+!2 = !{!"branch_weights", i32 5, i32 10}
+!3 = !{!"branch_weights", i32 4, i32 1}
More information about the llvm-commits
mailing list