[llvm] [BranchFolding] Fold fallthroughs into conditional tailcalls if profitable (PR #140476)
Nabeel Omer via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 3 09:05:27 PDT 2025
https://github.com/omern1 updated https://github.com/llvm/llvm-project/pull/140476
>From c58375b16477eda03885f904c3cecb261424eaa8 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 16 May 2025 16:24:04 +0100
Subject: [PATCH 1/6] [BranchFolding] Fold fallthroughs into conditional
tailcalls if profitable
This patch makes BranchFolding take branch frequency information into
account when creating conditional tailcalls.
It also enables folding fallthrough blocks into conditional tailcalls
when that's profitable.
This should fix #126363.
---
llvm/lib/CodeGen/BranchFolding.cpp | 53 +++--
llvm/test/CodeGen/X86/conditional-tailcall.ll | 187 ++++++++++++++++++
2 files changed, 225 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 7292bc2be0df2..82f7738236b05 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/BranchFoldingPass.h"
#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -1553,32 +1554,54 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
if (TII->isUnconditionalTailCall(TailCall)) {
SmallVector<MachineBasicBlock *> PredsChanged;
- for (auto &Pred : MBB->predecessors()) {
+ for (auto *Pred : MBB->predecessors()) {
+ bool IsPGOInfoAvailable = false;
+ for (MachineBasicBlock *const PredSucc : Pred->successors()) {
+ IsPGOInfoAvailable |= MBPI.isEdgeHot(Pred, PredSucc);
+ }
+
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
bool PredAnalyzable =
!TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
- // Only eliminate if MBB == TBB (Taken Basic Block)
- if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
- PredTBB != PredFBB) {
- // The predecessor has a conditional branch to this block which
- // consists of only a tail call. Try to fold the tail call into the
- // conditional branch.
+ bool IsEdgeCold = !MBPI.isEdgeHot(Pred, MBB);
+ bool CanFoldFallThrough =
+ IsPGOInfoAvailable && IsEdgeCold &&
+ (MBB == PredFBB ||
+ (PredFBB == nullptr && Pred->getFallThrough() == MBB));
+ bool CanFoldTakenBlock =
+ (MBB == PredTBB && (IsPGOInfoAvailable ? IsEdgeCold : true));
+
+ // When we have PGO (or equivalent) information, we want to fold the
+ // fallthrough if it's cold. Folding a fallthrough puts it behind a
+ // conditional branch which isn't desirable if it's hot. When there
+ // isn't any PGO information available we want to fold the taken block
+ // if it's possible and we never want to fold the fallthrough as we
+ // don't know if that is desirable.
+ if (PredAnalyzable && !PredCond.empty() && PredTBB != PredFBB &&
+ (CanFoldTakenBlock || CanFoldFallThrough)) {
+ SmallVector<MachineOperand, 4> ReversedCond(PredCond);
+ if (CanFoldFallThrough) {
+ DebugLoc Dl = MBB->findBranchDebugLoc();
+ TII->reverseBranchCondition(ReversedCond);
+ TII->removeBranch(*Pred);
+ TII->insertBranch(*Pred, MBB, PredTBB, ReversedCond, Dl);
+ }
+
+ PredAnalyzable =
+ !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
- // TODO: It would be nice if analyzeBranch() could provide a pointer
- // to the branch instruction so replaceBranchWithTailCall() doesn't
- // have to search for it.
+ // TODO: It would be nice if analyzeBranch() could provide a
+ // pointer to the branch instruction so
+ // replaceBranchWithTailCall() doesn't have to search for it.
TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
PredsChanged.push_back(Pred);
}
}
- // If the predecessor is falling through to this block, we could reverse
- // the branch condition and fold the tail call into that. However, after
- // that we might have to re-arrange the CFG to fall through to the other
- // block and there is a high risk of regressing code size rather than
- // improving it.
}
+
if (!PredsChanged.empty()) {
NumTailCalls += PredsChanged.size();
for (auto &Pred : PredsChanged)
diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll
index 2859a87db3d56..5954de585fe6c 100644
--- a/llvm/test/CodeGen/X86/conditional-tailcall.ll
+++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll
@@ -596,3 +596,190 @@ cleanup.thread: ; preds = %cleanup.thread.loop
%6 = phi i1 [ %cmp37, %5 ], [ %call34, %if.else28 ], [ false, %cleanup.thread.loopexit ]
ret i1 %6
}
+
+define void @true_likely(i1 noundef zeroext %0) {
+; CHECK32-LABEL: true_likely:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x04,0x00]
+; CHECK32-NEXT: je func_false # TAILCALL
+; CHECK32-NEXT: # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: jmp func_true # TAILCALL
+; CHECK32-NEXT: # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: true_likely:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT: je func_false # TAILCALL
+; CHECK64-NEXT: # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.1:
+; CHECK64-NEXT: jmp func_true # TAILCALL
+; CHECK64-NEXT: # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: true_likely:
+; WIN64: # %bb.0:
+; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT: je func_false # TAILCALL
+; WIN64-NEXT: # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.1:
+; WIN64-NEXT: jmp func_true # TAILCALL
+; WIN64-NEXT: # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+ br i1 %0, label %2, label %3, !prof !6
+
+2:
+ tail call void @func_true()
+ br label %4
+
+3:
+ tail call void @func_false()
+ br label %4
+
+4:
+ ret void
+}
+
+define void @false_likely(i1 noundef zeroext %0) {
+; CHECK32-LABEL: false_likely:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x04,0x00]
+; CHECK32-NEXT: jne func_true # TAILCALL
+; CHECK32-NEXT: # encoding: [0x75,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: jmp func_false # TAILCALL
+; CHECK32-NEXT: # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: false_likely:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT: jne func_true # TAILCALL
+; CHECK64-NEXT: # encoding: [0x75,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.1:
+; CHECK64-NEXT: jmp func_false # TAILCALL
+; CHECK64-NEXT: # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: false_likely:
+; WIN64: # %bb.0:
+; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT: jne func_true # TAILCALL
+; WIN64-NEXT: # encoding: [0x75,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.1:
+; WIN64-NEXT: jmp func_false # TAILCALL
+; WIN64-NEXT: # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+ br i1 %0, label %2, label %3, !prof !7
+
+2:
+ tail call void @func_true()
+ br label %4
+
+3:
+ tail call void @func_false()
+ br label %4
+
+4:
+ ret void
+}
+
+
+define void @edge_is_hot_but_not_fallthrough(i1 noundef zeroext %0) {
+; CHECK32-LABEL: edge_is_hot_but_not_fallthrough:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: subl $12, %esp # encoding: [0x83,0xec,0x0c]
+; CHECK32-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x10,0x00]
+; CHECK32-NEXT: je .LBB6_1 # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.3:
+; CHECK32-NEXT: addl $12, %esp # encoding: [0x83,0xc4,0x0c]
+; CHECK32-NEXT: .cfi_def_cfa_offset 4
+; CHECK32-NEXT: retl # encoding: [0xc3]
+; CHECK32-NEXT: .LBB6_1:
+; CHECK32-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-NEXT: calll func_true # encoding: [0xe8,A,A,A,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-4, kind: FK_PCRel_4
+; CHECK32-NEXT: .p2align 4
+; CHECK32-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
+; CHECK32-NEXT: calll func_false # encoding: [0xe8,A,A,A,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-4, kind: FK_PCRel_4
+; CHECK32-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: edge_is_hot_but_not_fallthrough:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: pushq %rax # encoding: [0x50]
+; CHECK64-NEXT: .cfi_def_cfa_offset 16
+; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT: je .LBB6_1 # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.3:
+; CHECK64-NEXT: popq %rax # encoding: [0x58]
+; CHECK64-NEXT: .cfi_def_cfa_offset 8
+; CHECK64-NEXT: retq # encoding: [0xc3]
+; CHECK64-NEXT: .LBB6_1:
+; CHECK64-NEXT: .cfi_def_cfa_offset 16
+; CHECK64-NEXT: callq func_true # encoding: [0xe8,A,A,A,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
+; CHECK64-NEXT: .p2align 4
+; CHECK64-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
+; CHECK64-NEXT: callq func_false # encoding: [0xe8,A,A,A,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
+; CHECK64-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: edge_is_hot_but_not_fallthrough:
+; WIN64: # %bb.0:
+; WIN64-NEXT: subq $40, %rsp # encoding: [0x48,0x83,0xec,0x28]
+; WIN64-NEXT: .seh_stackalloc 40
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT: je .LBB6_1 # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.3:
+; WIN64-NEXT: .seh_startepilogue
+; WIN64-NEXT: addq $40, %rsp # encoding: [0x48,0x83,0xc4,0x28]
+; WIN64-NEXT: .seh_endepilogue
+; WIN64-NEXT: retq # encoding: [0xc3]
+; WIN64-NEXT: .LBB6_1:
+; WIN64-NEXT: callq func_true # encoding: [0xe8,A,A,A,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
+; WIN64-NEXT: .p2align 4
+; WIN64-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
+; WIN64-NEXT: callq func_false # encoding: [0xe8,A,A,A,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
+; WIN64-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+; WIN64-NEXT: .seh_endproc
+ br i1 %0, label %2, label %3, !prof !6
+2:
+ %and6 = and i1 %0, 1
+ br label %5
+
+3:
+ tail call void @func_true()
+ br label %4
+
+4:
+ tail call void @func_false()
+ br label %4
+
+5:
+ ret void
+}
+
+!6 = !{!"branch_weights", !"expected", i32 2000, i32 1}
+!7 = !{!"branch_weights", !"expected", i32 1, i32 2000}
+
+
+declare dso_local void @func_true()
+declare dso_local void @func_false()
>From 4f292c164082aaca7837cb215b23dec77cd46d3e Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Sun, 18 May 2025 21:34:48 +0100
Subject: [PATCH 2/6] Fix formatting
---
llvm/lib/CodeGen/BranchFolding.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 82f7738236b05..75354e62e33b9 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1588,7 +1588,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
TII->removeBranch(*Pred);
TII->insertBranch(*Pred, MBB, PredTBB, ReversedCond, Dl);
}
-
+
PredAnalyzable =
!TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
>From 19f7fd4c2b37a9ac9b8075669177b176d71d60d9 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Sun, 18 May 2025 21:37:29 +0100
Subject: [PATCH 3/6] Update tests
---
llvm/test/CodeGen/X86/conditional-tailcall.ll | 86 -------------------
1 file changed, 86 deletions(-)
diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll
index 5954de585fe6c..19980cbcd6ce8 100644
--- a/llvm/test/CodeGen/X86/conditional-tailcall.ll
+++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll
@@ -691,92 +691,6 @@ define void @false_likely(i1 noundef zeroext %0) {
ret void
}
-
-define void @edge_is_hot_but_not_fallthrough(i1 noundef zeroext %0) {
-; CHECK32-LABEL: edge_is_hot_but_not_fallthrough:
-; CHECK32: # %bb.0:
-; CHECK32-NEXT: subl $12, %esp # encoding: [0x83,0xec,0x0c]
-; CHECK32-NEXT: .cfi_def_cfa_offset 16
-; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x10,0x00]
-; CHECK32-NEXT: je .LBB6_1 # encoding: [0x74,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
-; CHECK32-NEXT: # %bb.3:
-; CHECK32-NEXT: addl $12, %esp # encoding: [0x83,0xc4,0x0c]
-; CHECK32-NEXT: .cfi_def_cfa_offset 4
-; CHECK32-NEXT: retl # encoding: [0xc3]
-; CHECK32-NEXT: .LBB6_1:
-; CHECK32-NEXT: .cfi_def_cfa_offset 16
-; CHECK32-NEXT: calll func_true # encoding: [0xe8,A,A,A,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-4, kind: FK_PCRel_4
-; CHECK32-NEXT: .p2align 4
-; CHECK32-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
-; CHECK32-NEXT: calll func_false # encoding: [0xe8,A,A,A,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-4, kind: FK_PCRel_4
-; CHECK32-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
-;
-; CHECK64-LABEL: edge_is_hot_but_not_fallthrough:
-; CHECK64: # %bb.0:
-; CHECK64-NEXT: pushq %rax # encoding: [0x50]
-; CHECK64-NEXT: .cfi_def_cfa_offset 16
-; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
-; CHECK64-NEXT: je .LBB6_1 # encoding: [0x74,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
-; CHECK64-NEXT: # %bb.3:
-; CHECK64-NEXT: popq %rax # encoding: [0x58]
-; CHECK64-NEXT: .cfi_def_cfa_offset 8
-; CHECK64-NEXT: retq # encoding: [0xc3]
-; CHECK64-NEXT: .LBB6_1:
-; CHECK64-NEXT: .cfi_def_cfa_offset 16
-; CHECK64-NEXT: callq func_true # encoding: [0xe8,A,A,A,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
-; CHECK64-NEXT: .p2align 4
-; CHECK64-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
-; CHECK64-NEXT: callq func_false # encoding: [0xe8,A,A,A,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
-; CHECK64-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
-;
-; WIN64-LABEL: edge_is_hot_but_not_fallthrough:
-; WIN64: # %bb.0:
-; WIN64-NEXT: subq $40, %rsp # encoding: [0x48,0x83,0xec,0x28]
-; WIN64-NEXT: .seh_stackalloc 40
-; WIN64-NEXT: .seh_endprologue
-; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
-; WIN64-NEXT: je .LBB6_1 # encoding: [0x74,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
-; WIN64-NEXT: # %bb.3:
-; WIN64-NEXT: .seh_startepilogue
-; WIN64-NEXT: addq $40, %rsp # encoding: [0x48,0x83,0xc4,0x28]
-; WIN64-NEXT: .seh_endepilogue
-; WIN64-NEXT: retq # encoding: [0xc3]
-; WIN64-NEXT: .LBB6_1:
-; WIN64-NEXT: callq func_true # encoding: [0xe8,A,A,A,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
-; WIN64-NEXT: .p2align 4
-; WIN64-NEXT: .LBB6_2: # =>This Inner Loop Header: Depth=1
-; WIN64-NEXT: callq func_false # encoding: [0xe8,A,A,A,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
-; WIN64-NEXT: jmp .LBB6_2 # encoding: [0xeb,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
-; WIN64-NEXT: .seh_endproc
- br i1 %0, label %2, label %3, !prof !6
-2:
- %and6 = and i1 %0, 1
- br label %5
-
-3:
- tail call void @func_true()
- br label %4
-
-4:
- tail call void @func_false()
- br label %4
-
-5:
- ret void
-}
-
!6 = !{!"branch_weights", !"expected", i32 2000, i32 1}
!7 = !{!"branch_weights", !"expected", i32 1, i32 2000}
>From 15526b9b303c2902bb1dafa22a0bdd33c90e3af5 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 19 May 2025 13:29:33 +0100
Subject: [PATCH 4/6] Update name
---
llvm/lib/CodeGen/BranchFolding.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 75354e62e33b9..7a21c41e8d1ff 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1555,9 +1555,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
if (TII->isUnconditionalTailCall(TailCall)) {
SmallVector<MachineBasicBlock *> PredsChanged;
for (auto *Pred : MBB->predecessors()) {
- bool IsPGOInfoAvailable = false;
+ bool PredHasHotSuccessor = false;
for (MachineBasicBlock *const PredSucc : Pred->successors()) {
- IsPGOInfoAvailable |= MBPI.isEdgeHot(Pred, PredSucc);
+ PredHasHotSuccessor |= MBPI.isEdgeHot(Pred, PredSucc);
}
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
@@ -1567,11 +1567,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool IsEdgeCold = !MBPI.isEdgeHot(Pred, MBB);
bool CanFoldFallThrough =
- IsPGOInfoAvailable && IsEdgeCold &&
+ PredHasHotSuccessor && IsEdgeCold &&
(MBB == PredFBB ||
(PredFBB == nullptr && Pred->getFallThrough() == MBB));
bool CanFoldTakenBlock =
- (MBB == PredTBB && (IsPGOInfoAvailable ? IsEdgeCold : true));
+ (MBB == PredTBB && (PredHasHotSuccessor ? IsEdgeCold : true));
// When we have PGO (or equivalent) information, we want to fold the
// fallthrough if it's cold. Folding a fallthrough puts it behind a
>From e9db1c815fbf2ae0f904c20376650098f75e0ee8 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 19 May 2025 13:29:50 +0100
Subject: [PATCH 5/6] Update tests
---
llvm/test/CodeGen/X86/segmented-stacks.ll | 60 +++++++++++++++--------
llvm/test/CodeGen/X86/switch-bt.ll | 8 ++-
2 files changed, 46 insertions(+), 22 deletions(-)
diff --git a/llvm/test/CodeGen/X86/segmented-stacks.ll b/llvm/test/CodeGen/X86/segmented-stacks.ll
index f8627ff56a1f9..cd406385d6d68 100644
--- a/llvm/test/CodeGen/X86/segmented-stacks.ll
+++ b/llvm/test/CodeGen/X86/segmented-stacks.ll
@@ -1732,8 +1732,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X86-Linux-LABEL: test_sibling_call_empty_frame:
; X86-Linux: # %bb.0:
; X86-Linux-NEXT: cmpl %gs:48, %esp
-; X86-Linux-NEXT: ja callee at PLT # TAILCALL
-; X86-Linux-NEXT: # %bb.1:
+; X86-Linux-NEXT: jbe .LBB8_1
+; X86-Linux-NEXT: # %bb.2:
+; X86-Linux-NEXT: jmp callee at PLT # TAILCALL
+; X86-Linux-NEXT: .LBB8_1:
; X86-Linux-NEXT: pushl $4
; X86-Linux-NEXT: pushl $0
; X86-Linux-NEXT: calll __morestack
@@ -1743,8 +1745,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X64-Linux-LABEL: test_sibling_call_empty_frame:
; X64-Linux: # %bb.0:
; X64-Linux-NEXT: cmpq %fs:112, %rsp
-; X64-Linux-NEXT: ja callee at PLT # TAILCALL
-; X64-Linux-NEXT: # %bb.1:
+; X64-Linux-NEXT: jbe .LBB8_1
+; X64-Linux-NEXT: # %bb.2:
+; X64-Linux-NEXT: jmp callee at PLT # TAILCALL
+; X64-Linux-NEXT: .LBB8_1:
; X64-Linux-NEXT: movl $0, %r10d
; X64-Linux-NEXT: movl $0, %r11d
; X64-Linux-NEXT: callq __morestack
@@ -1769,8 +1773,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X32ABI-LABEL: test_sibling_call_empty_frame:
; X32ABI: # %bb.0:
; X32ABI-NEXT: cmpl %fs:64, %esp
-; X32ABI-NEXT: ja callee at PLT # TAILCALL
-; X32ABI-NEXT: # %bb.1:
+; X32ABI-NEXT: jbe .LBB8_1
+; X32ABI-NEXT: # %bb.2:
+; X32ABI-NEXT: jmp callee at PLT # TAILCALL
+; X32ABI-NEXT: .LBB8_1:
; X32ABI-NEXT: movl $0, %r10d
; X32ABI-NEXT: movl $0, %r11d
; X32ABI-NEXT: callq __morestack
@@ -1781,8 +1787,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X86-Darwin: ## %bb.0:
; X86-Darwin-NEXT: movl $432, %ecx ## imm = 0x1B0
; X86-Darwin-NEXT: cmpl %gs:(%ecx), %esp
-; X86-Darwin-NEXT: ja _callee ## TAILCALL
-; X86-Darwin-NEXT: ## %bb.1:
+; X86-Darwin-NEXT: jbe LBB8_1
+; X86-Darwin-NEXT: ## %bb.2:
+; X86-Darwin-NEXT: jmp _callee ## TAILCALL
+; X86-Darwin-NEXT: LBB8_1:
; X86-Darwin-NEXT: pushl $4
; X86-Darwin-NEXT: pushl $0
; X86-Darwin-NEXT: calll ___morestack
@@ -1792,8 +1800,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X64-Darwin-LABEL: test_sibling_call_empty_frame:
; X64-Darwin: ## %bb.0:
; X64-Darwin-NEXT: cmpq %gs:816, %rsp
-; X64-Darwin-NEXT: ja _callee ## TAILCALL
-; X64-Darwin-NEXT: ## %bb.1:
+; X64-Darwin-NEXT: jbe LBB8_1
+; X64-Darwin-NEXT: ## %bb.2:
+; X64-Darwin-NEXT: jmp _callee ## TAILCALL
+; X64-Darwin-NEXT: LBB8_1:
; X64-Darwin-NEXT: movl $0, %r10d
; X64-Darwin-NEXT: movl $0, %r11d
; X64-Darwin-NEXT: callq ___morestack
@@ -1803,8 +1813,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X86-MinGW-LABEL: test_sibling_call_empty_frame:
; X86-MinGW: # %bb.0:
; X86-MinGW-NEXT: cmpl %fs:20, %esp
-; X86-MinGW-NEXT: ja _callee # TAILCALL
-; X86-MinGW-NEXT: # %bb.1:
+; X86-MinGW-NEXT: jbe LBB8_1
+; X86-MinGW-NEXT: # %bb.2:
+; X86-MinGW-NEXT: jmp _callee # TAILCALL
+; X86-MinGW-NEXT: LBB8_1:
; X86-MinGW-NEXT: pushl $4
; X86-MinGW-NEXT: pushl $0
; X86-MinGW-NEXT: calll ___morestack
@@ -1814,8 +1826,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X64-FreeBSD-LABEL: test_sibling_call_empty_frame:
; X64-FreeBSD: # %bb.0:
; X64-FreeBSD-NEXT: cmpq %fs:24, %rsp
-; X64-FreeBSD-NEXT: ja callee at PLT # TAILCALL
-; X64-FreeBSD-NEXT: # %bb.1:
+; X64-FreeBSD-NEXT: jbe .LBB8_1
+; X64-FreeBSD-NEXT: # %bb.2:
+; X64-FreeBSD-NEXT: jmp callee at PLT # TAILCALL
+; X64-FreeBSD-NEXT: .LBB8_1:
; X64-FreeBSD-NEXT: movl $0, %r10d
; X64-FreeBSD-NEXT: movl $0, %r11d
; X64-FreeBSD-NEXT: callq __morestack
@@ -1825,8 +1839,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X86-DFlyBSD-LABEL: test_sibling_call_empty_frame:
; X86-DFlyBSD: # %bb.0:
; X86-DFlyBSD-NEXT: cmpl %fs:16, %esp
-; X86-DFlyBSD-NEXT: ja callee at PLT # TAILCALL
-; X86-DFlyBSD-NEXT: # %bb.1:
+; X86-DFlyBSD-NEXT: jbe .LBB8_1
+; X86-DFlyBSD-NEXT: # %bb.2:
+; X86-DFlyBSD-NEXT: jmp callee at PLT # TAILCALL
+; X86-DFlyBSD-NEXT: .LBB8_1:
; X86-DFlyBSD-NEXT: pushl $4
; X86-DFlyBSD-NEXT: pushl $0
; X86-DFlyBSD-NEXT: calll __morestack
@@ -1836,8 +1852,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X64-DFlyBSD-LABEL: test_sibling_call_empty_frame:
; X64-DFlyBSD: # %bb.0:
; X64-DFlyBSD-NEXT: cmpq %fs:32, %rsp
-; X64-DFlyBSD-NEXT: ja callee at PLT # TAILCALL
-; X64-DFlyBSD-NEXT: # %bb.1:
+; X64-DFlyBSD-NEXT: jbe .LBB8_1
+; X64-DFlyBSD-NEXT: # %bb.2:
+; X64-DFlyBSD-NEXT: jmp callee at PLT # TAILCALL
+; X64-DFlyBSD-NEXT: .LBB8_1:
; X64-DFlyBSD-NEXT: movl $0, %r10d
; X64-DFlyBSD-NEXT: movl $0, %r11d
; X64-DFlyBSD-NEXT: callq __morestack
@@ -1847,8 +1865,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 {
; X64-MinGW-LABEL: test_sibling_call_empty_frame:
; X64-MinGW: # %bb.0:
; X64-MinGW-NEXT: cmpq %gs:40, %rsp
-; X64-MinGW-NEXT: ja callee # TAILCALL
-; X64-MinGW-NEXT: # %bb.1:
+; X64-MinGW-NEXT: jbe .LBB8_1
+; X64-MinGW-NEXT: # %bb.2:
+; X64-MinGW-NEXT: jmp callee # TAILCALL
+; X64-MinGW-NEXT: .LBB8_1:
; X64-MinGW-NEXT: movl $0, %r10d
; X64-MinGW-NEXT: movl $32, %r11d
; X64-MinGW-NEXT: callq __morestack
diff --git a/llvm/test/CodeGen/X86/switch-bt.ll b/llvm/test/CodeGen/X86/switch-bt.ll
index 2bf7c46e67e18..b1ec2d59a6a76 100644
--- a/llvm/test/CodeGen/X86/switch-bt.ll
+++ b/llvm/test/CodeGen/X86/switch-bt.ll
@@ -86,7 +86,9 @@ define void @test2(i32 %x) nounwind ssp {
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movl $91, %eax
; CHECK-NEXT: btl %edi, %eax
-; CHECK-NEXT: jb bar at PLT # TAILCALL
+; CHECK-NEXT: jae .LBB1_2
+; CHECK-NEXT: # %bb.3: # %if.then
+; CHECK-NEXT: jmp bar at PLT # TAILCALL
; CHECK-NEXT: .LBB1_2: # %if.end
; CHECK-NEXT: retq
@@ -116,7 +118,9 @@ define void @test3(i32 %x) nounwind {
; CHECK-NEXT: ja .LBB2_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: cmpl $4, %edi
-; CHECK-NEXT: jne bar at PLT # TAILCALL
+; CHECK-NEXT: je .LBB2_2
+; CHECK-NEXT: # %bb.3: # %if.then
+; CHECK-NEXT: jmp bar at PLT # TAILCALL
; CHECK-NEXT: .LBB2_2: # %if.end
; CHECK-NEXT: retq
switch i32 %x, label %if.end [
>From 8a4c2bf40a58ab2d7c69fdc562175ba51e9a1b4d Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 3 Oct 2025 16:58:26 +0100
Subject: [PATCH 6/6] Update test to address unrelated changes
---
llvm/test/CodeGen/X86/conditional-tailcall.ll | 24 +++++++++----------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll
index 19980cbcd6ce8..7233dfd44c605 100644
--- a/llvm/test/CodeGen/X86/conditional-tailcall.ll
+++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll
@@ -603,33 +603,33 @@ define void @true_likely(i1 noundef zeroext %0) {
; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x04,0x00]
; CHECK32-NEXT: je func_false # TAILCALL
; CHECK32-NEXT: # encoding: [0x74,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false, kind: FK_PCRel_1
; CHECK32-NEXT: # %bb.1:
; CHECK32-NEXT: jmp func_true # TAILCALL
; CHECK32-NEXT: # encoding: [0xeb,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true, kind: FK_PCRel_1
;
; CHECK64-LABEL: true_likely:
; CHECK64: # %bb.0:
; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
; CHECK64-NEXT: je func_false # TAILCALL
; CHECK64-NEXT: # encoding: [0x74,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false, kind: FK_PCRel_1
; CHECK64-NEXT: # %bb.1:
; CHECK64-NEXT: jmp func_true # TAILCALL
; CHECK64-NEXT: # encoding: [0xeb,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true, kind: FK_PCRel_1
;
; WIN64-LABEL: true_likely:
; WIN64: # %bb.0:
; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
; WIN64-NEXT: je func_false # TAILCALL
; WIN64-NEXT: # encoding: [0x74,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false, kind: FK_PCRel_1
; WIN64-NEXT: # %bb.1:
; WIN64-NEXT: jmp func_true # TAILCALL
; WIN64-NEXT: # encoding: [0xeb,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true, kind: FK_PCRel_1
br i1 %0, label %2, label %3, !prof !6
2:
@@ -650,33 +650,33 @@ define void @false_likely(i1 noundef zeroext %0) {
; CHECK32-NEXT: cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x04,0x00]
; CHECK32-NEXT: jne func_true # TAILCALL
; CHECK32-NEXT: # encoding: [0x75,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_true, kind: FK_PCRel_1
; CHECK32-NEXT: # %bb.1:
; CHECK32-NEXT: jmp func_false # TAILCALL
; CHECK32-NEXT: # encoding: [0xeb,A]
-; CHECK32-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # fixup A - offset: 1, value: func_false, kind: FK_PCRel_1
;
; CHECK64-LABEL: false_likely:
; CHECK64: # %bb.0:
; CHECK64-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
; CHECK64-NEXT: jne func_true # TAILCALL
; CHECK64-NEXT: # encoding: [0x75,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_true, kind: FK_PCRel_1
; CHECK64-NEXT: # %bb.1:
; CHECK64-NEXT: jmp func_false # TAILCALL
; CHECK64-NEXT: # encoding: [0xeb,A]
-; CHECK64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # fixup A - offset: 1, value: func_false, kind: FK_PCRel_1
;
; WIN64-LABEL: false_likely:
; WIN64: # %bb.0:
; WIN64-NEXT: testb %cl, %cl # encoding: [0x84,0xc9]
; WIN64-NEXT: jne func_true # TAILCALL
; WIN64-NEXT: # encoding: [0x75,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; WIN64-NEXT: # fixup A - offset: 1, value: func_true, kind: FK_PCRel_1
; WIN64-NEXT: # %bb.1:
; WIN64-NEXT: jmp func_false # TAILCALL
; WIN64-NEXT: # encoding: [0xeb,A]
-; WIN64-NEXT: # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; WIN64-NEXT: # fixup A - offset: 1, value: func_false, kind: FK_PCRel_1
br i1 %0, label %2, label %3, !prof !7
2:
More information about the llvm-commits
mailing list