[llvm] [BranchFolding] Fold fallthroughs into conditional tailcalls if profitable (PR #140476)

via llvm-commits llvm-commits at lists.llvm.org
Sun May 18 13:31:54 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Nabeel Omer (omern1)

<details>
<summary>Changes</summary>

This patch makes BranchFolding take branch frequency information into
account when creating conditional tailcalls.

It also enables folding fallthrough blocks into conditional tailcalls
when that's profitable.

This should fix https://github.com/llvm/llvm-project/issues/126363.

---
Full diff: https://github.com/llvm/llvm-project/pull/140476.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/BranchFolding.cpp (+38-15) 
- (modified) llvm/test/CodeGen/X86/conditional-tailcall.ll (+187) 


``````````diff
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 6f5afbd2a996a..af2c40005081e 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -26,6 +26,7 @@
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/BranchFoldingPass.h"
 #include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -1547,32 +1548,54 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
     MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
     if (TII->isUnconditionalTailCall(TailCall)) {
       SmallVector<MachineBasicBlock *> PredsChanged;
-      for (auto &Pred : MBB->predecessors()) {
+      for (auto *Pred : MBB->predecessors()) {
+        bool IsPGOInfoAvailable = false;
+        for (MachineBasicBlock *const PredSucc : Pred->successors()) {
+          IsPGOInfoAvailable |= MBPI.isEdgeHot(Pred, PredSucc);
+        }
+
         MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
         SmallVector<MachineOperand, 4> PredCond;
         bool PredAnalyzable =
             !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
 
-        // Only eliminate if MBB == TBB (Taken Basic Block)
-        if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
-            PredTBB != PredFBB) {
-          // The predecessor has a conditional branch to this block which
-          // consists of only a tail call. Try to fold the tail call into the
-          // conditional branch.
+        bool IsEdgeCold = !MBPI.isEdgeHot(Pred, MBB);
+        bool CanFoldFallThrough =
+            IsPGOInfoAvailable && IsEdgeCold &&
+            (MBB == PredFBB ||
+             (PredFBB == nullptr && Pred->getFallThrough() == MBB));
+        bool CanFoldTakenBlock =
+            (MBB == PredTBB && (IsPGOInfoAvailable ? IsEdgeCold : true));
+
+        // When we have PGO (or equivalent) information, we want to fold the
+        // fallthrough if it's cold. Folding a fallthrough puts it behind a
+        // conditional branch which isn't desirable if it's hot. When there
+        // isn't any PGO information available we want to fold the taken block
+        // if it's possible and we never want to fold the fallthrough as we
+        // don't know if that is desirable.
+        if (PredAnalyzable && !PredCond.empty() && PredTBB != PredFBB &&
+            (CanFoldTakenBlock || CanFoldFallThrough)) {
+          SmallVector<MachineOperand, 4> ReversedCond(PredCond);
+          if (CanFoldFallThrough) {
+            DebugLoc Dl = MBB->findBranchDebugLoc();
+            TII->reverseBranchCondition(ReversedCond);
+            TII->removeBranch(*Pred);
+            TII->insertBranch(*Pred, MBB, PredTBB, ReversedCond, Dl);
+          }
+          
+          PredAnalyzable =
+              !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
           if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
-            // TODO: It would be nice if analyzeBranch() could provide a pointer
-            // to the branch instruction so replaceBranchWithTailCall() doesn't
-            // have to search for it.
+            // TODO: It would be nice if analyzeBranch() could provide a
+            // pointer to the branch instruction so
+            // replaceBranchWithTailCall() doesn't have to search for it.
             TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
             PredsChanged.push_back(Pred);
           }
         }
-        // If the predecessor is falling through to this block, we could reverse
-        // the branch condition and fold the tail call into that. However, after
-        // that we might have to re-arrange the CFG to fall through to the other
-        // block and there is a high risk of regressing code size rather than
-        // improving it.
       }
+
       if (!PredsChanged.empty()) {
         NumTailCalls += PredsChanged.size();
         for (auto &Pred : PredsChanged)
diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll
index 4c990d81810be..b851840e167da 100644
--- a/llvm/test/CodeGen/X86/conditional-tailcall.ll
+++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll
@@ -597,3 +597,190 @@ cleanup.thread:                                   ; preds = %cleanup.thread.loop
   %6 = phi i1 [ %cmp37, %5 ], [ %call34, %if.else28 ], [ false, %cleanup.thread.loopexit ]
   ret i1 %6
 }
+
+define void @true_likely(i1 noundef zeroext %0) {
+; CHECK32-LABEL: true_likely:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x04,0x00]
+; CHECK32-NEXT:    je func_false # TAILCALL
+; CHECK32-NEXT:    # encoding: [0x74,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK32-NEXT:  # %bb.1:
+; CHECK32-NEXT:    jmp func_true # TAILCALL
+; CHECK32-NEXT:    # encoding: [0xeb,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: true_likely:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT:    je func_false # TAILCALL
+; CHECK64-NEXT:    # encoding: [0x74,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; CHECK64-NEXT:  # %bb.1:
+; CHECK64-NEXT:    jmp func_true # TAILCALL
+; CHECK64-NEXT:    # encoding: [0xeb,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: true_likely:
+; WIN64:       # %bb.0:
+; WIN64-NEXT:    testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT:    je func_false # TAILCALL
+; WIN64-NEXT:    # encoding: [0x74,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+; WIN64-NEXT:  # %bb.1:
+; WIN64-NEXT:    jmp func_true # TAILCALL
+; WIN64-NEXT:    # encoding: [0xeb,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+  br i1 %0, label %2, label %3, !prof !6
+
+2:
+  tail call void @func_true()
+  br label %4
+
+3:
+  tail call void @func_false()
+  br label %4
+
+4:
+  ret void
+}
+
+define void @false_likely(i1 noundef zeroext %0) {
+; CHECK32-LABEL: false_likely:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x04,0x00]
+; CHECK32-NEXT:    jne func_true # TAILCALL
+; CHECK32-NEXT:    # encoding: [0x75,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK32-NEXT:  # %bb.1:
+; CHECK32-NEXT:    jmp func_false # TAILCALL
+; CHECK32-NEXT:    # encoding: [0xeb,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: false_likely:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT:    jne func_true # TAILCALL
+; CHECK64-NEXT:    # encoding: [0x75,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; CHECK64-NEXT:  # %bb.1:
+; CHECK64-NEXT:    jmp func_false # TAILCALL
+; CHECK64-NEXT:    # encoding: [0xeb,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: false_likely:
+; WIN64:       # %bb.0:
+; WIN64-NEXT:    testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT:    jne func_true # TAILCALL
+; WIN64-NEXT:    # encoding: [0x75,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: func_true-1, kind: FK_PCRel_1
+; WIN64-NEXT:  # %bb.1:
+; WIN64-NEXT:    jmp func_false # TAILCALL
+; WIN64-NEXT:    # encoding: [0xeb,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: func_false-1, kind: FK_PCRel_1
+  br i1 %0, label %2, label %3, !prof !7
+
+2:
+  tail call void @func_true()
+  br label %4
+
+3:
+  tail call void @func_false()
+  br label %4
+
+4:
+  ret void
+}
+
+
+define void @edge_is_hot_but_not_fallthrough(i1 noundef zeroext %0) {
+; CHECK32-LABEL: edge_is_hot_but_not_fallthrough:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    subl $12, %esp # encoding: [0x83,0xec,0x0c]
+; CHECK32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK32-NEXT:    cmpb $0, {{[0-9]+}}(%esp) # encoding: [0x80,0x7c,0x24,0x10,0x00]
+; CHECK32-NEXT:    je .LBB6_1 # encoding: [0x74,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; CHECK32-NEXT:  # %bb.3:
+; CHECK32-NEXT:    addl $12, %esp # encoding: [0x83,0xc4,0x0c]
+; CHECK32-NEXT:    .cfi_def_cfa_offset 4
+; CHECK32-NEXT:    retl # encoding: [0xc3]
+; CHECK32-NEXT:  .LBB6_1:
+; CHECK32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK32-NEXT:    calll func_true # encoding: [0xe8,A,A,A,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: func_true-4, kind: FK_PCRel_4
+; CHECK32-NEXT:    .p2align 4
+; CHECK32-NEXT:  .LBB6_2: # =>This Inner Loop Header: Depth=1
+; CHECK32-NEXT:    calll func_false # encoding: [0xe8,A,A,A,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: func_false-4, kind: FK_PCRel_4
+; CHECK32-NEXT:    jmp .LBB6_2 # encoding: [0xeb,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: edge_is_hot_but_not_fallthrough:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    pushq %rax # encoding: [0x50]
+; CHECK64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK64-NEXT:    testl %edi, %edi # encoding: [0x85,0xff]
+; CHECK64-NEXT:    je .LBB6_1 # encoding: [0x74,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; CHECK64-NEXT:  # %bb.3:
+; CHECK64-NEXT:    popq %rax # encoding: [0x58]
+; CHECK64-NEXT:    .cfi_def_cfa_offset 8
+; CHECK64-NEXT:    retq # encoding: [0xc3]
+; CHECK64-NEXT:  .LBB6_1:
+; CHECK64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK64-NEXT:    callq func_true # encoding: [0xe8,A,A,A,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
+; CHECK64-NEXT:    .p2align 4
+; CHECK64-NEXT:  .LBB6_2: # =>This Inner Loop Header: Depth=1
+; CHECK64-NEXT:    callq func_false # encoding: [0xe8,A,A,A,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
+; CHECK64-NEXT:    jmp .LBB6_2 # encoding: [0xeb,A]
+; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: edge_is_hot_but_not_fallthrough:
+; WIN64:       # %bb.0:
+; WIN64-NEXT:    subq $40, %rsp # encoding: [0x48,0x83,0xec,0x28]
+; WIN64-NEXT:    .seh_stackalloc 40
+; WIN64-NEXT:    .seh_endprologue
+; WIN64-NEXT:    testb %cl, %cl # encoding: [0x84,0xc9]
+; WIN64-NEXT:    je .LBB6_1 # encoding: [0x74,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; WIN64-NEXT:  # %bb.3:
+; WIN64-NEXT:    .seh_startepilogue
+; WIN64-NEXT:    addq $40, %rsp # encoding: [0x48,0x83,0xc4,0x28]
+; WIN64-NEXT:    .seh_endepilogue
+; WIN64-NEXT:    retq # encoding: [0xc3]
+; WIN64-NEXT:  .LBB6_1:
+; WIN64-NEXT:    callq func_true # encoding: [0xe8,A,A,A,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: func_true-4, kind: reloc_branch_4byte_pcrel
+; WIN64-NEXT:    .p2align 4
+; WIN64-NEXT:  .LBB6_2: # =>This Inner Loop Header: Depth=1
+; WIN64-NEXT:    callq func_false # encoding: [0xe8,A,A,A,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: func_false-4, kind: reloc_branch_4byte_pcrel
+; WIN64-NEXT:    jmp .LBB6_2 # encoding: [0xeb,A]
+; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB6_2-1, kind: FK_PCRel_1
+; WIN64-NEXT:    .seh_endproc
+  br i1 %0, label %2, label %3, !prof !6
+2:
+  %and6 = and i1 %0, 1
+  br label %5
+
+3:
+  tail call void @func_true()
+  br label %4
+
+4:
+  tail call void @func_false()
+  br label %4
+
+5:
+  ret void
+}
+
+!6 = !{!"branch_weights", !"expected", i32 2000, i32 1}
+!7 = !{!"branch_weights", !"expected", i32 1, i32 2000}
+
+
+declare dso_local void @func_true()
+declare dso_local void @func_false()

``````````

</details>


https://github.com/llvm/llvm-project/pull/140476


More information about the llvm-commits mailing list