[llvm] [MachineBasicBlock] Don't split loop header successor if the terminator is unanalyzable (PR #170146)

Hongyu Chen via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 1 21:05:56 PST 2025


https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/170146

>From 21ed39b275ad24e8b958c9f84952b2340640dde8 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 1 Dec 2025 22:25:04 +0800
Subject: [PATCH 1/3] [MachineBasicBlock] Don't split loop header if the
 terminator is unanalyzable

---
 llvm/lib/CodeGen/MachineBasicBlock.cpp | 12 ++---
 llvm/test/CodeGen/NVPTX/switch.ll      | 73 ++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/NVPTX/switch.ll

diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index ba0b025167307..10bf18b7fcb6d 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1425,14 +1425,13 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
   // where both sides of the branches are always executed.
 
   if (MF->getTarget().requiresStructuredCFG()) {
+    if (!MLI)
+      return false;
+    const MachineLoop *L = MLI->getLoopFor(Succ);
     // If `Succ` is a loop header, splitting the critical edge will not
     // break structured CFG.
-    if (MLI) {
-      const MachineLoop *L = MLI->getLoopFor(Succ);
-      return L && L->getHeader() == Succ;
-    }
-
-    return false;
+    if (!L || L->getHeader() != Succ)
+      return false;
   }
 
   // Do we have an Indirect jump with a jumptable that we can rewrite?
@@ -1459,6 +1458,7 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
                       << printMBBReference(*this) << '\n');
     return false;
   }
+
   return true;
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/switch.ll b/llvm/test/CodeGen/NVPTX/switch.ll
new file mode 100644
index 0000000000000..7fcfcfbb85d00
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/switch.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+
+target triple = "nvptx64-unknown-nvidiacl"
+
+define void @pr170051(i32 %cond) {
+; CHECK-LABEL: pr170051(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    mov.b32 %r2, 0;
+; CHECK-NEXT:    ld.param.b32 %r1, [pr170051_param_0];
+; CHECK-NEXT:    setp.gt.u32 %p1, %r1, 6;
+; CHECK-NEXT:    bra.uni $L__BB0_3;
+; CHECK-NEXT:  $L__BB0_1: // %BS_LABEL_2
+; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    or.b32 %r3, %r2, 1;
+; CHECK-NEXT:  $L__BB0_2: // %for.cond4
+; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    mov.b32 %r2, %r3;
+; CHECK-NEXT:  $L__BB0_3: // %BS_LABEL_1
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    @%p1 bra $L__BB0_5;
+; CHECK-NEXT:  // %bb.4: // %BS_LABEL_1
+; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    mov.b32 %r3, %r1;
+; CHECK-NEXT:    $L_brx_0: .branchtargets
+; CHECK-NEXT:     $L__BB0_2,
+; CHECK-NEXT:     $L__BB0_3,
+; CHECK-NEXT:     $L__BB0_5,
+; CHECK-NEXT:     $L__BB0_5,
+; CHECK-NEXT:     $L__BB0_1,
+; CHECK-NEXT:     $L__BB0_5,
+; CHECK-NEXT:     $L__BB0_3;
+; CHECK-NEXT:    brx.idx %r1, $L_brx_0;
+; CHECK-NEXT:  $L__BB0_5: // %unreachable
+; CHECK-NEXT:    // begin inline asm
+; CHECK-NEXT:    exit;
+; CHECK-NEXT:    // end inline asm
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond4.for.cond_crit_edge, %BS_LABEL_1, %BS_LABEL_1, %entry
+  %p_2218_0.1 = phi i32 [ 0, %entry ], [ %p_2218_0.3, %BS_LABEL_1 ], [ %p_2218_0.3, %BS_LABEL_1 ], [ poison, %for.cond4.for.cond_crit_edge ]
+  br label %BS_LABEL_1
+
+BS_LABEL_2:                                       ; preds = %BS_LABEL_1
+  %sub = or i32 %p_2218_0.3, 1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %BS_LABEL_1, %BS_LABEL_2
+  %p_2218_0.2 = phi i32 [ 0, %BS_LABEL_1 ], [ %sub, %BS_LABEL_2 ]
+  br i1 false, label %for.cond4.for.cond_crit_edge, label %BS_LABEL_1
+
+for.cond4.for.cond_crit_edge:                     ; preds = %for.cond4
+  br label %for.cond
+
+BS_LABEL_1:                                       ; preds = %for.cond4, %for.cond
+  %p_2218_0.3 = phi i32 [ %p_2218_0.2, %for.cond4 ], [ %p_2218_0.1, %for.cond ]
+  switch i32 %cond, label %unreachable [
+    i32 0, label %for.cond4
+    i32 4, label %BS_LABEL_2
+    i32 1, label %for.cond
+    i32 6, label %for.cond
+  ]
+
+unreachable:                                      ; preds = %BS_LABEL_1
+  unreachable
+}
+
+

>From a3bd7e4aa5bf504b89c563662cfe18f0cfd92174 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 1 Dec 2025 22:36:10 +0800
Subject: [PATCH 2/3] format

---
 llvm/lib/CodeGen/MachineBasicBlock.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 10bf18b7fcb6d..f3fa52150bee9 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1458,7 +1458,6 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
                       << printMBBReference(*this) << '\n');
     return false;
   }
-
   return true;
 }
 

>From 23cc659e44ad84bcc0c91485e26a26a03b24a049 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 2 Dec 2025 13:05:23 +0800
Subject: [PATCH 3/3] add mir test

---
 .../test/CodeGen/NVPTX/switch-loop-header.mir | 182 ++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 llvm/test/CodeGen/NVPTX/switch-loop-header.mir

diff --git a/llvm/test/CodeGen/NVPTX/switch-loop-header.mir b/llvm/test/CodeGen/NVPTX/switch-loop-header.mir
new file mode 100644
index 0000000000000..4d86bb879f18f
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/switch-loop-header.mir
@@ -0,0 +1,182 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -o - %s -passes="require<machine-loops>,require<live-vars>,phi-node-elimination" | FileCheck %s
+
+--- |
+  target datalayout = "e-p6:32:32-i64:64-i128:128-i256:256-v16:16-v32:32-n16:32:64"
+  target triple = "nvptx64-unknown-nvidiacl"
+
+  define void @func_26(i32 %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.) {
+  entry:
+    br label %for.cond
+
+  for.cond:                                         ; preds = %BS_LABEL_1, %BS_LABEL_1, %entry
+    %p_2218_0.1 = phi i32 [ 0, %entry ], [ %p_2218_0.3, %BS_LABEL_1 ], [ %p_2218_0.3, %BS_LABEL_1 ]
+    br label %BS_LABEL_1
+
+  BS_LABEL_2:                                       ; preds = %BS_LABEL_1
+    %sub = or i32 %p_2218_0.3, 1
+    br label %for.cond4
+
+  for.cond4:                                        ; preds = %BS_LABEL_1, %BS_LABEL_2
+    %p_2218_0.2 = phi i32 [ %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0., %BS_LABEL_1 ], [ %sub, %BS_LABEL_2 ]
+    br label %BS_LABEL_1
+
+  BS_LABEL_1:                                       ; preds = %for.cond4, %for.cond
+    %p_2218_0.3 = phi i32 [ %p_2218_0.2, %for.cond4 ], [ %p_2218_0.1, %for.cond ]
+    switch i32 %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0., label %unreachable [
+      i32 0, label %for.cond4
+      i32 4, label %BS_LABEL_2
+      i32 1, label %for.cond
+      i32 6, label %for.cond
+    ]
+
+  unreachable:                                      ; preds = %BS_LABEL_1
+    call void asm sideeffect "exit;", ""()
+    unreachable
+  }
+...
+---
+name:            func_26
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+noPhis:          false
+isSSA:           true
+noVRegs:         false
+hasFakeUses:     false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: b32, preferred-register: '', flags: [  ] }
+  - { id: 1, class: b32, preferred-register: '', flags: [  ] }
+  - { id: 2, class: b32, preferred-register: '', flags: [  ] }
+  - { id: 3, class: b32, preferred-register: '', flags: [  ] }
+  - { id: 4, class: b32, preferred-register: '', flags: [  ] }
+  - { id: 5, class: b32, preferred-register: '', flags: [  ] }
+  - { id: 6, class: b32, preferred-register: '', flags: [  ] }
+  - { id: 7, class: b1, preferred-register: '', flags: [  ] }
+  - { id: 8, class: b32, preferred-register: '', flags: [  ] }
+  - { id: 9, class: b1, preferred-register: '', flags: [  ] }
+  - { id: 10, class: b32, preferred-register: '', flags: [  ] }
+  - { id: 11, class: b1, preferred-register: '', flags: [  ] }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+fixedStack:      []
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+jumpTable:
+  kind:            inline
+  entries:
+    - id:              0
+      blocks:          [ '%bb.3', '%bb.1', '%bb.6', '%bb.6', '%bb.2', '%bb.6',
+                         '%bb.1' ]
+body:             |
+  ; CHECK-LABEL: name: func_26
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[DEF:%[0-9]+]]:b32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead [[DEF1:%[0-9]+]]:b1 = IMPLICIT_DEF
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[DEF2:%[0-9]+]]:b32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   GOTO %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.6(0x00000000), %bb.5(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   CBranch undef [[DEF1]], %bb.6
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   successors: %bb.3(0x3e000000), %bb.1(0x04000000), %bb.6(0x00000000), %bb.2(0x3e000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   BRX_START 0
+  ; CHECK-NEXT:   BRX_ITEM %bb.3
+  ; CHECK-NEXT:   BRX_ITEM %bb.1
+  ; CHECK-NEXT:   BRX_ITEM %bb.6
+  ; CHECK-NEXT:   BRX_ITEM %bb.6
+  ; CHECK-NEXT:   BRX_ITEM %bb.2
+  ; CHECK-NEXT:   BRX_ITEM %bb.6
+  ; CHECK-NEXT:   BRX_END %bb.1, undef [[DEF]], 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6:
+  bb.0:
+    successors: %bb.1(0x80000000)
+
+    %10:b32 = IMPLICIT_DEF
+    %11:b1 = IMPLICIT_DEF
+
+  bb.1:
+    successors: %bb.4(0x80000000)
+
+    %0:b32 = PHI undef %10, %bb.0, undef %0, %bb.5
+    GOTO %bb.4
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+
+  bb.3:
+    successors: %bb.4(0x80000000)
+
+  bb.4:
+    successors: %bb.6(0x00000000), %bb.5(0x80000000)
+
+    CBranch undef %11, %bb.6
+
+  bb.5:
+    successors: %bb.3(0x3e000000), %bb.1(0x04000000), %bb.6(0x00000000), %bb.2(0x3e000000)
+
+    BRX_START 0
+    BRX_ITEM %bb.3
+    BRX_ITEM %bb.1
+    BRX_ITEM %bb.6
+    BRX_ITEM %bb.6
+    BRX_ITEM %bb.2
+    BRX_ITEM %bb.6
+    BRX_END %bb.1, undef %10, 0
+
+  bb.6:
+...



More information about the llvm-commits mailing list