[llvm] [BranchFolding] Fold fallthroughs into conditional tailcalls if profitable (PR #140476)
via llvm-commits
llvm-commits at lists.llvm.org
Sun May 18 13:31:54 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Nabeel Omer (omern1)
<details>
<summary>Changes</summary>
This patch makes BranchFolding take branch frequency information into
account when creating conditional tailcalls.
It also enables folding fallthrough blocks into conditional tailcalls
when that's profitable.
This should fix https://github.com/llvm/llvm-project/issues/126363.
---
Full diff: https://github.com/llvm/llvm-project/pull/140476.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/BranchFolding.cpp (+38-15)
- (modified) llvm/test/CodeGen/X86/conditional-tailcall.ll (+187)
``````````diff
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 6f5afbd2a996a..af2c40005081e 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/BranchFoldingPass.h"
#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -1547,32 +1548,54 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
if (TII->isUnconditionalTailCall(TailCall)) {
SmallVector<MachineBasicBlock *> PredsChanged;
- for (auto &Pred : MBB->predecessors()) {
+ for (auto *Pred : MBB->predecessors()) {
+ bool IsPGOInfoAvailable = false;
+ for (MachineBasicBlock *const PredSucc : Pred->successors()) {
+ IsPGOInfoAvailable |= MBPI.isEdgeHot(Pred, PredSucc);
+ }
+
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
bool PredAnalyzable =
!TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
- // Only eliminate if MBB == TBB (Taken Basic Block)
- if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
- PredTBB != PredFBB) {
- // The predecessor has a conditional branch to this block which
- // consists of only a tail call. Try to fold the tail call into the
- // conditional branch.
+ bool IsEdgeCold = !MBPI.isEdgeHot(Pred, MBB);
+ bool CanFoldFallThrough =
+ IsPGOInfoAvailable && IsEdgeCold &&
+ (MBB == PredFBB ||
+ (PredFBB == nullptr && Pred->getFallThrough() == MBB));
+ bool CanFoldTakenBlock =
+ (MBB == PredTBB && (IsPGOInfoAvailable ? IsEdgeCold : true));
+
+ // When we have PGO (or equivalent) information, we want to fold the
+ // fallthrough if it's cold. Folding a fallthrough puts it behind a
+ // conditional branch which isn't desirable if it's hot. When there
+ // isn't any PGO information available we want to fold the taken block
+ // if it's possible and we never want to fold the fallthrough as we
+ // don't know if that is desirable.
+ if (PredAnalyzable && !PredCond.empty() && PredTBB != PredFBB &&
+ (CanFoldTakenBlock || CanFoldFallThrough)) {
+ SmallVector<MachineOperand, 4> ReversedCond(PredCond);
+ if (CanFoldFallThrough) {
+ DebugLoc Dl = MBB->findBranchDebugLoc();
+ TII->reverseBranchCondition(ReversedCond);
+ TII->removeBranch(*Pred);
+ TII->insertBranch(*Pred, MBB, PredTBB, ReversedCond, Dl);
+ }
+
+ PredAnalyzable =
+ !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
- // TODO: It would be nice if analyzeBranch() could provide a pointer
- // to the branch instruction so replaceBranchWithTailCall() doesn't
- // have to search for it.
+ // TODO: It would be nice if analyzeBranch() could provide a
+ // pointer to the branch instruction so
+ // replaceBranchWithTailCall() doesn't have to search for it.
TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
PredsChanged.push_back(Pred);
}
}
- // If the predecessor is falling through to this block, we could reverse
- // the branch condition and fold the tail call into that. However, after
- // that we might have to re-arrange the CFG to fall through to the other
- // block and there is a high risk of regressing code size rather than
- // improving it.
}
+
if (!PredsChanged.empty()) {
NumTailCalls += PredsChanged.size();
for (auto &Pred : PredsChanged)
diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll
index 4c990d81810be..b851840e167da 100644
--- a/llvm/test/CodeGen/X86/conditional-tailcall.ll
+++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll
@@ -597,3 +597,190 @@ cleanup.thread: ; preds = %cleanup.thread.loop
%6 = phi i1 [ %cmp37, %5 ], [ %call34, %if.else28 ], [ false, %cleanup.thread.loopexit ]
ret i1 %6
}
+
+define void @true_likely(i1 noundef zeroext %0) {
+; CHECK32-LABEL: true_likely:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x04,0x00]
+; CHECK32-NEXT: je func_false # TAILCALL
+; CHECK32-NEXT: # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: jmp func_true # TAILCALL
+; CHECK32-NEXT: # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: true_likely:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT: je func_false # TAILCALL
+; CHECK64-NEXT: # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.1:
+; CHECK64-NEXT: jmp func_true # TAILCALL
+; CHECK64-NEXT: # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: true_likely:
+; WIN64: # %bb.0:
+; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT: je func_false # TAILCALL
+; WIN64-NEXT: # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.1:
+; WIN64-NEXT: jmp func_true # TAILCALL
+; WIN64-NEXT: # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+ br i1 %0, label %2, label %3, !prof !6
+
+2:
+ tail call void @func_true()
+ br label %4
+
+3:
+ tail call void @func_false()
+ br label %4
+
+4:
+ ret void
+}
+
+define void @false_likely(i1 noundef zeroext %0) {
+; CHECK32-LABEL: false_likely:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x04,0x00]
+; CHECK32-NEXT: jne func_true # TAILCALL
+; CHECK32-NEXT: # encoding: [0x75,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: jmp func_false # TAILCALL
+; CHECK32-NEXT: # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: false_likely:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT: jne func_true # TAILCALL
+; CHECK64-NEXT: # encoding: [0x75,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.1:
+; CHECK64-NEXT: jmp func_false # TAILCALL
+; CHECK64-NEXT: # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: false_likely:
+; WIN64: # %bb.0:
+; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT: jne func_true # TAILCALL
+; WIN64-NEXT: # encoding: [0x75,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.1:
+; WIN64-NEXT: jmp func_false # TAILCALL
+; WIN64-NEXT: # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+ br i1 %0, label %2, label %3, !prof !7
+
+2:
+ tail call void @func_true()
+ br label %4
+
+3:
+ tail call void @func_false()
+ br label %4
+
+4:
+ ret void
+}
+
+
+define void @edge_is_hot_but_not_fallthrough(i1 noundef zeroext %0) {
+; CHECK32-LABEL: edge_is_hot_but_not_fallthrough:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: subl $12, %esp # encoding: [0x83,0xec,0x0c]
+; CHECK32-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x10,0x00]
+; CHECK32-NEXT: je .LBB6_1 # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.3:
+; CHECK32-NEXT: addl $12, %esp # encoding: [0x83,0xc4,0x0c]
+; CHECK32-NEXT: .cfi_def_cfa_offset 4
+; CHECK32-NEXT: retl # encoding: [0xc3]
+; CHECK32-NEXT: .LBB6_1:
+; CHECK32-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-NEXT: calll func_true # encoding: [0xe8,A,A,A,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-4, kind: FK_PCRel_4
+; CHECK32-NEXT: .p2align 4
+; CHECK32-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
+; CHECK32-NEXT: calll func_false # encoding: [0xe8,A,A,A,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-4, kind: FK_PCRel_4
+; CHECK32-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: edge_is_hot_but_not_fallthrough:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: pushq %rax # encoding: [0x50]
+; CHECK64-NEXT: .cfi_def_cfa_offset 16
+; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT: je .LBB6_1 # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.3:
+; CHECK64-NEXT: popq %rax # encoding: [0x58]
+; CHECK64-NEXT: .cfi_def_cfa_offset 8
+; CHECK64-NEXT: retq # encoding: [0xc3]
+; CHECK64-NEXT: .LBB6_1:
+; CHECK64-NEXT: .cfi_def_cfa_offset 16
+; CHECK64-NEXT: callq func_true # encoding: [0xe8,A,A,A,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
+; CHECK64-NEXT: .p2align 4
+; CHECK64-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
+; CHECK64-NEXT: callq func_false # encoding: [0xe8,A,A,A,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
+; CHECK64-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: edge_is_hot_but_not_fallthrough:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $40, %rsp # encoding: [0x48,0x83,0xec,0x28]
+; WIN64-NEXT: .seh_stackalloc 40
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT: je .LBB6_1 # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.3:
+; WIN64-NEXT: .seh_startepilogue
+; WIN64-NEXT: addq $40, %rsp # encoding: [0x48,0x83,0xc4,0x28]
+; WIN64-NEXT: .seh_endepilogue
+; WIN64-NEXT: retq # encoding: [0xc3]
+; WIN64-NEXT: .LBB6_1:
+; WIN64-NEXT: callq func_true # encoding: [0xe8,A,A,A,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
+; WIN64-NEXT: .p2align 4
+; WIN64-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
+; WIN64-NEXT: callq func_false # encoding: [0xe8,A,A,A,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
+; WIN64-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+; WIN64-NEXT: .seh_endproc
+ br i1 %0, label %2, label %3, !prof !6
+2:
+ %and6 = and i1 %0, 1
+ br label %5
+
+3:
+ tail call void @func_true()
+ br label %4
+
+4:
+ tail call void @func_false()
+ br label %4
+
+5:
+ ret void
+}
+
+!6 = !{!"branch_weights", !"expected", i32 2000, i32 1}
+!7 = !{!"branch_weights", !"expected", i32 1, i32 2000}
+
+
+declare dso_local void @func_true()
+declare dso_local void @func_false()
``````````
</details>
https://github.com/llvm/llvm-project/pull/140476
More information about the llvm-commits
mailing list