[llvm] [BranchFolding] Fold fallthroughs into conditional tailcalls if profitable (PR #140476)
Nabeel Omer via llvm-commits
llvm-commits at lists.llvm.org
Mon May 19 05:30:03 PDT 2025
https://github.com/omern1 updated https://github.com/llvm/llvm-project/pull/140476
>From 8948b7244be63d32b3f3fae19160672e094f2014 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 16 May 2025 16:24:04 +0100
Subject: [PATCH 1/5] [BranchFolding] Fold fallthroughs into conditional
tailcalls if profitable
This patch makes BranchFolding take branch frequency information into
account when creating conditional tailcalls.
It also enables folding fallthrough blocks into conditional tailcalls
when that's profitable.
This should fix #126363.
---
llvm/lib/CodeGen/BranchFolding.cpp | 53 +++--
llvm/test/CodeGen/X86/conditional-tailcall.ll | 187 ++++++++++++++++++
2 files changed, 225 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 6f5afbd2a996a..af2c40005081e 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/BranchFoldingPass.h"
#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -1547,32 +1548,54 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
if (TII->isUnconditionalTailCall(TailCall)) {
SmallVector<MachineBasicBlock *> PredsChanged;
- for (auto &Pred : MBB->predecessors()) {
+ for (auto *Pred : MBB->predecessors()) {
+ bool IsPGOInfoAvailable = false;
+ for (MachineBasicBlock *const PredSucc : Pred->successors()) {
+ IsPGOInfoAvailable |= MBPI.isEdgeHot(Pred, PredSucc);
+ }
+
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
bool PredAnalyzable =
!TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
- // Only eliminate if MBB == TBB (Taken Basic Block)
- if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
- PredTBB != PredFBB) {
- // The predecessor has a conditional branch to this block which
- // consists of only a tail call. Try to fold the tail call into the
- // conditional branch.
+ bool IsEdgeCold = !MBPI.isEdgeHot(Pred, MBB);
+ bool CanFoldFallThrough =
+ IsPGOInfoAvailable && IsEdgeCold &&
+ (MBB == PredFBB ||
+ (PredFBB == nullptr && Pred->getFallThrough() == MBB));
+ bool CanFoldTakenBlock =
+ (MBB == PredTBB && (IsPGOInfoAvailable ? IsEdgeCold : true));
+
+ // When we have PGO (or equivalent) information, we want to fold the
+ // fallthrough if it's cold. Folding a fallthrough puts it behind a
+ // conditional branch which isn't desirable if it's hot. When there
+ // isn't any PGO information available we want to fold the taken block
+ // if it's possible and we never want to fold the fallthrough as we
+ // don't know if that is desirable.
+ if (PredAnalyzable && !PredCond.empty() && PredTBB != PredFBB &&
+ (CanFoldTakenBlock || CanFoldFallThrough)) {
+ SmallVector<MachineOperand, 4> ReversedCond(PredCond);
+ if (CanFoldFallThrough) {
+ DebugLoc Dl = MBB->findBranchDebugLoc();
+ TII->reverseBranchCondition(ReversedCond);
+ TII->removeBranch(*Pred);
+ TII->insertBranch(*Pred, MBB, PredTBB, ReversedCond, Dl);
+ }
+
+ PredAnalyzable =
+ !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
- // TODO: It would be nice if analyzeBranch() could provide a pointer
- // to the branch instruction so replaceBranchWithTailCall() doesn't
- // have to search for it.
+ // TODO: It would be nice if analyzeBranch() could provide a
+ // pointer to the branch instruction so
+ // replaceBranchWithTailCall() doesn't have to search for it.
TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
PredsChanged.push_back(Pred);
}
}
- // If the predecessor is falling through to this block, we could reverse
- // the branch condition and fold the tail call into that. However, after
- // that we might have to re-arrange the CFG to fall through to the other
- // block and there is a high risk of regressing code size rather than
- // improving it.
}
+
if (!PredsChanged.empty()) {
NumTailCalls += PredsChanged.size();
for (auto &Pred : PredsChanged)
diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll
index 4c990d81810be..b851840e167da 100644
--- a/llvm/test/CodeGen/X86/conditional-tailcall.ll
+++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll
@@ -597,3 +597,190 @@ cleanup.thread: ; preds = %cleanup.thread.loop
%6 = phi i1 [ %cmp37, %5 ], [ %call34, %if.else28 ], [ false, %cleanup.thread.loopexit ]
ret i1 %6
}
+
+define void @true_likely(i1 noundef zeroext %0) {
+; CHECK32-LABEL: true_likely:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x04,0x00]
+; CHECK32-NEXT: je func_false # TAILCALL
+; CHECK32-NEXT: # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: jmp func_true # TAILCALL
+; CHECK32-NEXT: # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: true_likely:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT: je func_false # TAILCALL
+; CHECK64-NEXT: # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.1:
+; CHECK64-NEXT: jmp func_true # TAILCALL
+; CHECK64-NEXT: # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: true_likely:
+; WIN64: # %bb.0:
+; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT: je func_false # TAILCALL
+; WIN64-NEXT: # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.1:
+; WIN64-NEXT: jmp func_true # TAILCALL
+; WIN64-NEXT: # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+ br i1 %0, label %2, label %3, !prof !6
+
+2:
+ tail call void @func_true()
+ br label %4
+
+3:
+ tail call void @func_false()
+ br label %4
+
+4:
+ ret void
+}
+
+define void @false_likely(i1 noundef zeroext %0) {
+; CHECK32-LABEL: false_likely:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x04,0x00]
+; CHECK32-NEXT: jne func_true # TAILCALL
+; CHECK32-NEXT: # encoding: [0x75,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: jmp func_false # TAILCALL
+; CHECK32-NEXT: # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: false_likely:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT: jne func_true # TAILCALL
+; CHECK64-NEXT: # encoding: [0x75,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.1:
+; CHECK64-NEXT: jmp func_false # TAILCALL
+; CHECK64-NEXT: # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: false_likely:
+; WIN64: # %bb.0:
+; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT: jne func_true # TAILCALL
+; WIN64-NEXT: # encoding: [0x75,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.1:
+; WIN64-NEXT: jmp func_false # TAILCALL
+; WIN64-NEXT: # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+ br i1 %0, label %2, label %3, !prof !7
+
+2:
+ tail call void @func_true()
+ br label %4
+
+3:
+ tail call void @func_false()
+ br label %4
+
+4:
+ ret void
+}
+
+
+define void @edge_is_hot_but_not_fallthrough(i1 noundef zeroext %0) {
+; CHECK32-LABEL: edge_is_hot_but_not_fallthrough:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: subl $12, %esp # encoding: [0x83,0xec,0x0c]
+; CHECK32-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x10,0x00]
+; CHECK32-NEXT: je .LBB6_1 # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.3:
+; CHECK32-NEXT: addl $12, %esp # encoding: [0x83,0xc4,0x0c]
+; CHECK32-NEXT: .cfi_def_cfa_offset 4
+; CHECK32-NEXT: retl # encoding: [0xc3]
+; CHECK32-NEXT: .LBB6_1:
+; CHECK32-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-NEXT: calll func_true # encoding: [0xe8,A,A,A,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-4, kind: FK_PCRel_4
+; CHECK32-NEXT: .p2align 4
+; CHECK32-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
+; CHECK32-NEXT: calll func_false # encoding: [0xe8,A,A,A,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-4, kind: FK_PCRel_4
+; CHECK32-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: edge_is_hot_but_not_fallthrough:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: pushq %rax # encoding: [0x50]
+; CHECK64-NEXT: .cfi_def_cfa_offset 16
+; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT: je .LBB6_1 # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.3:
+; CHECK64-NEXT: popq %rax # encoding: [0x58]
+; CHECK64-NEXT: .cfi_def_cfa_offset 8
+; CHECK64-NEXT: retq # encoding: [0xc3]
+; CHECK64-NEXT: .LBB6_1:
+; CHECK64-NEXT: .cfi_def_cfa_offset 16
+; CHECK64-NEXT: callq func_true # encoding: [0xe8,A,A,A,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
+; CHECK64-NEXT: .p2align 4
+; CHECK64-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
+; CHECK64-NEXT: callq func_false # encoding: [0xe8,A,A,A,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
+; CHECK64-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: edge_is_hot_but_not_fallthrough:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $40, %rsp # encoding: [0x48,0x83,0xec,0x28]
+; WIN64-NEXT: .seh_stackalloc 40
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT: je .LBB6_1 # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.3:
+; WIN64-NEXT: .seh_startepilogue
+; WIN64-NEXT: addq $40, %rsp # encoding: [0x48,0x83,0xc4,0x28]
+; WIN64-NEXT: .seh_endepilogue
+; WIN64-NEXT: retq # encoding: [0xc3]
+; WIN64-NEXT: .LBB6_1:
+; WIN64-NEXT: callq func_true # encoding: [0xe8,A,A,A,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
+; WIN64-NEXT: .p2align 4
+; WIN64-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
+; WIN64-NEXT: callq func_false # encoding: [0xe8,A,A,A,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
+; WIN64-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+; WIN64-NEXT: .seh_endproc
+ br i1 %0, label %2, label %3, !prof !6
+2:
+ %and6 = and i1 %0, 1
+ br label %5
+
+3:
+ tail call void @func_true()
+ br label %4
+
+4:
+ tail call void @func_false()
+ br label %4
+
+5:
+ ret void
+}
+
+!6 = !{!"branch_weights", !"expected", i32 2000, i32 1}
+!7 = !{!"branch_weights", !"expected", i32 1, i32 2000}
+
+
+declare dso_local void @func_true()
+declare dso_local void @func_false()
>From cd4084f4885b5e0f60c4af1ce8b6d28724188833 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Sun, 18 May 2025 21:34:48 +0100
Subject: [PATCH 2/5] Fix formatting
---
llvm/lib/CodeGen/BranchFolding.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index af2c40005081e..17c6fd095e687 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1582,7 +1582,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
TII->removeBranch(*Pred);
TII->insertBranch(*Pred, MBB, PredTBB, ReversedCond, Dl);
}
-
+
PredAnalyzable =
!TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
>From 9b7c90a7c13da2d334da5a12e990c102ee2c4a8e Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Sun, 18 May 2025 21:37:29 +0100
Subject: [PATCH 3/5] Update tests
---
llvm/test/CodeGen/X86/conditional-tailcall.ll | 86 -------------------
1 file changed, 86 deletions(-)
diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll
index b851840e167da..f477495ecf37b 100644
--- a/llvm/test/CodeGen/X86/conditional-tailcall.ll
+++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll
@@ -692,92 +692,6 @@ define void @false_likely(i1 noundef zeroext %0) {
ret void
}
-
-define void @edge_is_hot_but_not_fallthrough(i1 noundef zeroext %0) {
-; CHECK32-LABEL: edge_is_hot_but_not_fallthrough:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: subl $12, %esp # encoding: [0x83,0xec,0x0c]
-; CHECK32-NEXT: .cfi_def_cfa_offset 16
-; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x10,0x00]
-; CHECK32-NEXT: je .LBB6_1 # encoding: [0x74,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
-; CHECK32-NEXT: # %bb.3:
-; CHECK32-NEXT: addl $12, %esp # encoding: [0x83,0xc4,0x0c]
-; CHECK32-NEXT: .cfi_def_cfa_offset 4
-; CHECK32-NEXT: retl # encoding: [0xc3]
-; CHECK32-NEXT: .LBB6_1:
-; CHECK32-NEXT: .cfi_def_cfa_offset 16
-; CHECK32-NEXT: calll func_true # encoding: [0xe8,A,A,A,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-4, kind: FK_PCRel_4
-; CHECK32-NEXT: .p2align 4
-; CHECK32-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
-; CHECK32-NEXT: calll func_false # encoding: [0xe8,A,A,A,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-4, kind: FK_PCRel_4
-; CHECK32-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
-;
-; CHECK64-LABEL: edge_is_hot_but_not_fallthrough:
-; CHECK64: # %bb.0:
-; CHECK64-NEXT: pushq %rax # encoding: [0x50]
-; CHECK64-NEXT: .cfi_def_cfa_offset 16
-; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
-; CHECK64-NEXT: je .LBB6_1 # encoding: [0x74,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
-; CHECK64-NEXT: # %bb.3:
-; CHECK64-NEXT: popq %rax # encoding: [0x58]
-; CHECK64-NEXT: .cfi_def_cfa_offset 8
-; CHECK64-NEXT: retq # encoding: [0xc3]
-; CHECK64-NEXT: .LBB6_1:
-; CHECK64-NEXT: .cfi_def_cfa_offset 16
-; CHECK64-NEXT: callq func_true # encoding: [0xe8,A,A,A,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
-; CHECK64-NEXT: .p2align 4
-; CHECK64-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
-; CHECK64-NEXT: callq func_false # encoding: [0xe8,A,A,A,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
-; CHECK64-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
-;
-; WIN64-LABEL: edge_is_hot_but_not_fallthrough:
-; WIN64: # %bb.0:
-; WIN64-NEXT: subq $40, %rsp # encoding: [0x48,0x83,0xec,0x28]
-; WIN64-NEXT: .seh_stackalloc 40
-; WIN64-NEXT: .seh_endprologue
-; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
-; WIN64-NEXT: je .LBB6_1 # encoding: [0x74,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
-; WIN64-NEXT: # %bb.3:
-; WIN64-NEXT: .seh_startepilogue
-; WIN64-NEXT: addq $40, %rsp # encoding: [0x48,0x83,0xc4,0x28]
-; WIN64-NEXT: .seh_endepilogue
-; WIN64-NEXT: retq # encoding: [0xc3]
-; WIN64-NEXT: .LBB6_1:
-; WIN64-NEXT: callq func_true # encoding: [0xe8,A,A,A,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
-; WIN64-NEXT: .p2align 4
-; WIN64-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
-; WIN64-NEXT: callq func_false # encoding: [0xe8,A,A,A,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
-; WIN64-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
-; WIN64-NEXT: .seh_endproc
- br i1 %0, label %2, label %3, !prof !6
-2:
- %and6 = and i1 %0, 1
- br label %5
-
-3:
- tail call void @func_true()
- br label %4
-
-4:
- tail call void @func_false()
- br label %4
-
-5:
- ret void
-}
-
!6 = !{!"branch_weights", !"expected", i32 2000, i32 1}
!7 = !{!"branch_weights", !"expected", i32 1, i32 2000}
>From bcf809d779a036fe863b1e64414f4f25a9752ccf Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 19 May 2025 13:29:33 +0100
Subject: [PATCH 4/5] Update name
---
llvm/lib/CodeGen/BranchFolding.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 17c6fd095e687..cea460043310f 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1549,9 +1549,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
if (TII->isUnconditionalTailCall(TailCall)) {
SmallVector<MachineBasicBlock *> PredsChanged;
for (auto *Pred : MBB->predecessors()) {
- bool IsPGOInfoAvailable = false;
+ bool PredHasHotSuccessor = false;
for (MachineBasicBlock *const PredSucc : Pred->successors()) {
- IsPGOInfoAvailable |= MBPI.isEdgeHot(Pred, PredSucc);
+ PredHasHotSuccessor |= MBPI.isEdgeHot(Pred, PredSucc);
}
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
@@ -1561,11 +1561,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool IsEdgeCold = !MBPI.isEdgeHot(Pred, MBB);
bool CanFoldFallThrough =
- IsPGOInfoAvailable && IsEdgeCold &&
+ PredHasHotSuccessor && IsEdgeCold &&
(MBB == PredFBB ||
(PredFBB == nullptr && Pred->getFallThrough() == MBB));
bool CanFoldTakenBlock =
- (MBB == PredTBB && (IsPGOInfoAvailable ? IsEdgeCold : true));
+ (MBB == PredTBB && (PredHasHotSuccessor ? IsEdgeCold : true));
// When we have PGO (or equivalent) information, we want to fold the
// fallthrough if it's cold. Folding a fallthrough puts it behind a
>From 028746447d3ff2090d09c27c91e98d482c71f86f Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 19 May 2025 13:29:50 +0100
Subject: [PATCH 5/5] Update tests
---
llvm/test/CodeGen/X86/segmented-stacks.ll | 60 +++++++++++++++--------
llvm/test/CodeGen/X86/switch-bt.ll | 8 ++-
2 files changed, 46 insertions(+), 22 deletions(-)
diff --git a/llvm/test/CodeGen/X86/segmented-stacks.ll b/llvm/test/CodeGen/X86/segmented-stacks.ll
index f8627ff56a1f9..cd406385d6d68 100644
--- a/llvm/test/CodeGen/X86/segmented-stacks.ll
+++ b/llvm/test/CodeGen/X86/segmented-stacks.ll
@@ -1732,8 +1732,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X86-Linux-LABEL: test_sibling_call_empty_frame:
; X86-Linux: # %bb.0:
; X86-Linux-NEXT: cmpl %gs:48, %esp
-; X86-Linux-NEXT: ja callee at PLT # TAILCALL
-; X86-Linux-NEXT: # %bb.1:
+; X86-Linux-NEXT: jbe .LBB8_1
+; X86-Linux-NEXT: # %bb.2:
+; X86-Linux-NEXT: jmp callee at PLT # TAILCALL
+; X86-Linux-NEXT: .LBB8_1:
; X86-Linux-NEXT: pushl $4
; X86-Linux-NEXT: pushl $0
; X86-Linux-NEXT: calll __morestack
@@ -1743,8 +1745,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X64-Linux-LABEL: test_sibling_call_empty_frame:
; X64-Linux: # %bb.0:
; X64-Linux-NEXT: cmpq %fs:112, %rsp
-; X64-Linux-NEXT: ja callee at PLT # TAILCALL
-; X64-Linux-NEXT: # %bb.1:
+; X64-Linux-NEXT: jbe .LBB8_1
+; X64-Linux-NEXT: # %bb.2:
+; X64-Linux-NEXT: jmp callee at PLT # TAILCALL
+; X64-Linux-NEXT: .LBB8_1:
; X64-Linux-NEXT: movl $0, %r10d
; X64-Linux-NEXT: movl $0, %r11d
; X64-Linux-NEXT: callq __morestack
@@ -1769,8 +1773,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X32ABI-LABEL: test_sibling_call_empty_frame:
; X32ABI: # %bb.0:
; X32ABI-NEXT: cmpl %fs:64, %esp
-; X32ABI-NEXT: ja callee at PLT # TAILCALL
-; X32ABI-NEXT: # %bb.1:
+; X32ABI-NEXT: jbe .LBB8_1
+; X32ABI-NEXT: # %bb.2:
+; X32ABI-NEXT: jmp callee at PLT # TAILCALL
+; X32ABI-NEXT: .LBB8_1:
; X32ABI-NEXT: movl $0, %r10d
; X32ABI-NEXT: movl $0, %r11d
; X32ABI-NEXT: callq __morestack
@@ -1781,8 +1787,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X86-Darwin: ## %bb.0:
; X86-Darwin-NEXT: movl $432, %ecx ## imm = 0x1B0
; X86-Darwin-NEXT: cmpl %gs:(%ecx), %esp
-; X86-Darwin-NEXT: ja _callee ## TAILCALL
-; X86-Darwin-NEXT: ## %bb.1:
+; X86-Darwin-NEXT: jbe LBB8_1
+; X86-Darwin-NEXT: ## %bb.2:
+; X86-Darwin-NEXT: jmp _callee ## TAILCALL
+; X86-Darwin-NEXT: LBB8_1:
; X86-Darwin-NEXT: pushl $4
; X86-Darwin-NEXT: pushl $0
; X86-Darwin-NEXT: calll ___morestack
@@ -1792,8 +1800,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X64-Darwin-LABEL: test_sibling_call_empty_frame:
; X64-Darwin: ## %bb.0:
; X64-Darwin-NEXT: cmpq %gs:816, %rsp
-; X64-Darwin-NEXT: ja _callee ## TAILCALL
-; X64-Darwin-NEXT: ## %bb.1:
+; X64-Darwin-NEXT: jbe LBB8_1
+; X64-Darwin-NEXT: ## %bb.2:
+; X64-Darwin-NEXT: jmp _callee ## TAILCALL
+; X64-Darwin-NEXT: LBB8_1:
; X64-Darwin-NEXT: movl $0, %r10d
; X64-Darwin-NEXT: movl $0, %r11d
; X64-Darwin-NEXT: callq ___morestack
@@ -1803,8 +1813,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X86-MinGW-LABEL: test_sibling_call_empty_frame:
; X86-MinGW: # %bb.0:
; X86-MinGW-NEXT: cmpl %fs:20, %esp
-; X86-MinGW-NEXT: ja _callee # TAILCALL
-; X86-MinGW-NEXT: # %bb.1:
+; X86-MinGW-NEXT: jbe LBB8_1
+; X86-MinGW-NEXT: # %bb.2:
+; X86-MinGW-NEXT: jmp _callee # TAILCALL
+; X86-MinGW-NEXT: LBB8_1:
; X86-MinGW-NEXT: pushl $4
; X86-MinGW-NEXT: pushl $0
; X86-MinGW-NEXT: calll ___morestack
@@ -1814,8 +1826,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X64-FreeBSD-LABEL: test_sibling_call_empty_frame:
; X64-FreeBSD: # %bb.0:
; X64-FreeBSD-NEXT: cmpq %fs:24, %rsp
-; X64-FreeBSD-NEXT: ja callee at PLT # TAILCALL
-; X64-FreeBSD-NEXT: # %bb.1:
+; X64-FreeBSD-NEXT: jbe .LBB8_1
+; X64-FreeBSD-NEXT: # %bb.2:
+; X64-FreeBSD-NEXT: jmp callee at PLT # TAILCALL
+; X64-FreeBSD-NEXT: .LBB8_1:
; X64-FreeBSD-NEXT: movl $0, %r10d
; X64-FreeBSD-NEXT: movl $0, %r11d
; X64-FreeBSD-NEXT: callq __morestack
@@ -1825,8 +1839,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X86-DFlyBSD-LABEL: test_sibling_call_empty_frame:
; X86-DFlyBSD: # %bb.0:
; X86-DFlyBSD-NEXT: cmpl %fs:16, %esp
-; X86-DFlyBSD-NEXT: ja callee at PLT # TAILCALL
-; X86-DFlyBSD-NEXT: # %bb.1:
+; X86-DFlyBSD-NEXT: jbe .LBB8_1
+; X86-DFlyBSD-NEXT: # %bb.2:
+; X86-DFlyBSD-NEXT: jmp callee at PLT # TAILCALL
+; X86-DFlyBSD-NEXT: .LBB8_1:
; X86-DFlyBSD-NEXT: pushl $4
; X86-DFlyBSD-NEXT: pushl $0
; X86-DFlyBSD-NEXT: calll __morestack
@@ -1836,8 +1852,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X64-DFlyBSD-LABEL: test_sibling_call_empty_frame:
; X64-DFlyBSD: # %bb.0:
; X64-DFlyBSD-NEXT: cmpq %fs:32, %rsp
-; X64-DFlyBSD-NEXT: ja callee at PLT # TAILCALL
-; X64-DFlyBSD-NEXT: # %bb.1:
+; X64-DFlyBSD-NEXT: jbe .LBB8_1
+; X64-DFlyBSD-NEXT: # %bb.2:
+; X64-DFlyBSD-NEXT: jmp callee at PLT # TAILCALL
+; X64-DFlyBSD-NEXT: .LBB8_1:
; X64-DFlyBSD-NEXT: movl $0, %r10d
; X64-DFlyBSD-NEXT: movl $0, %r11d
; X64-DFlyBSD-NEXT: callq __morestack
@@ -1847,8 +1865,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X64-MinGW-LABEL: test_sibling_call_empty_frame:
; X64-MinGW: # %bb.0:
; X64-MinGW-NEXT: cmpq %gs:40, %rsp
-; X64-MinGW-NEXT: ja callee # TAILCALL
-; X64-MinGW-NEXT: # %bb.1:
+; X64-MinGW-NEXT: jbe .LBB8_1
+; X64-MinGW-NEXT: # %bb.2:
+; X64-MinGW-NEXT: jmp callee # TAILCALL
+; X64-MinGW-NEXT: .LBB8_1:
; X64-MinGW-NEXT: movl $0, %r10d
; X64-MinGW-NEXT: movl $32, %r11d
; X64-MinGW-NEXT: callq __morestack
diff --git a/llvm/test/CodeGen/X86/switch-bt.ll b/llvm/test/CodeGen/X86/switch-bt.ll
index 2bf7c46e67e18..b1ec2d59a6a76 100644
--- a/llvm/test/CodeGen/X86/switch-bt.ll
+++ b/llvm/test/CodeGen/X86/switch-bt.ll
@@ -86,7 +86,9 @@ define void @test2(i32 %x) nounwind ssp {
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movl $91, %eax
; CHECK-NEXT: btl %edi, %eax
-; CHECK-NEXT: jb bar at PLT # TAILCALL
+; CHECK-NEXT: jae .LBB1_2
+; CHECK-NEXT: # %bb.3: # %if.then
+; CHECK-NEXT: jmp bar at PLT # TAILCALL
; CHECK-NEXT: .LBB1_2: # %if.end
; CHECK-NEXT: retq
@@ -116,7 +118,9 @@ define void @test3(i32 %x) nounwind {
; CHECK-NEXT: ja .LBB2_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: cmpl $4, %edi
-; CHECK-NEXT: jne bar at PLT # TAILCALL
+; CHECK-NEXT: je .LBB2_2
+; CHECK-NEXT: # %bb.3: # %if.then
+; CHECK-NEXT: jmp bar at PLT # TAILCALL
; CHECK-NEXT: .LBB2_2: # %if.end
; CHECK-NEXT: retq
switch i32 %x, label %if.end [
More information about the llvm-commits
mailing list