[llvm] bd21095 - [MachineBasicBlock] Don't split loop header successor if the terminator is unanalyzable (#170146)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 07:56:04 PST 2025
Author: Hongyu Chen
Date: 2025-12-03T15:55:59Z
New Revision: bd21095d8ba0bff04f5718096601638ecf9270db
URL: https://github.com/llvm/llvm-project/commit/bd21095d8ba0bff04f5718096601638ecf9270db
DIFF: https://github.com/llvm/llvm-project/commit/bd21095d8ba0bff04f5718096601638ecf9270db.diff
LOG: [MachineBasicBlock] Don't split loop header successor if the terminator is unanalyzable (#170146)
Fixes https://github.com/llvm/llvm-project/issues/170051
The previous implementation allows splitting the successor if it's the
loop header, regardless of whether the terminator of `this` is
analyzable.
Added:
llvm/test/CodeGen/NVPTX/switch-loop-header.mir
llvm/test/CodeGen/NVPTX/switch.ll
Modified:
llvm/lib/CodeGen/MachineBasicBlock.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index ba0b025167307..be94e1e6d25b6 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1425,14 +1425,14 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
// where both sides of the branches are always executed.
if (MF->getTarget().requiresStructuredCFG()) {
- // If `Succ` is a loop header, splitting the critical edge will not
- // break structured CFG.
- if (MLI) {
- const MachineLoop *L = MLI->getLoopFor(Succ);
- return L && L->getHeader() == Succ;
- }
-
- return false;
+ if (!MLI)
+ return false;
+ const MachineLoop *L = MLI->getLoopFor(Succ);
+ // Only if `Succ` is a loop header, splitting the critical edge will not
+ // break structured CFG. And fallthrough to check if this's terminator is
+ // analyzable.
+ if (!L || L->getHeader() != Succ)
+ return false;
}
// Do we have an Indirect jump with a jumptable that we can rewrite?
diff --git a/llvm/test/CodeGen/NVPTX/switch-loop-header.mir b/llvm/test/CodeGen/NVPTX/switch-loop-header.mir
new file mode 100644
index 0000000000000..4d86bb879f18f
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/switch-loop-header.mir
@@ -0,0 +1,182 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -o - %s -passes="require<machine-loops>,require<live-vars>,phi-node-elimination" | FileCheck %s
+
+--- |
+ target datalayout = "e-p6:32:32-i64:64-i128:128-i256:256-v16:16-v32:32-n16:32:64"
+ target triple = "nvptx64-unknown-nvidiacl"
+
+ define void @func_26(i32 %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.) {
+ entry:
+ br label %for.cond
+
+ for.cond: ; preds = %BS_LABEL_1, %BS_LABEL_1, %entry
+ %p_2218_0.1 = phi i32 [ 0, %entry ], [ %p_2218_0.3, %BS_LABEL_1 ], [ %p_2218_0.3, %BS_LABEL_1 ]
+ br label %BS_LABEL_1
+
+ BS_LABEL_2: ; preds = %BS_LABEL_1
+ %sub = or i32 %p_2218_0.3, 1
+ br label %for.cond4
+
+ for.cond4: ; preds = %BS_LABEL_1, %BS_LABEL_2
+ %p_2218_0.2 = phi i32 [ %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0., %BS_LABEL_1 ], [ %sub, %BS_LABEL_2 ]
+ br label %BS_LABEL_1
+
+ BS_LABEL_1: ; preds = %for.cond4, %for.cond
+ %p_2218_0.3 = phi i32 [ %p_2218_0.2, %for.cond4 ], [ %p_2218_0.1, %for.cond ]
+ switch i32 %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0., label %unreachable [
+ i32 0, label %for.cond4
+ i32 4, label %BS_LABEL_2
+ i32 1, label %for.cond
+ i32 6, label %for.cond
+ ]
+
+ unreachable: ; preds = %BS_LABEL_1
+ call void asm sideeffect "exit;", ""()
+ unreachable
+ }
+...
+---
+name: func_26
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: false
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 1, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 3, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 4, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 5, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 6, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 7, class: b1, preferred-register: '', flags: [ ] }
+ - { id: 8, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 9, class: b1, preferred-register: '', flags: [ ] }
+ - { id: 10, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 11, class: b1, preferred-register: '', flags: [ ] }
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+jumpTable:
+ kind: inline
+ entries:
+ - id: 0
+ blocks: [ '%bb.3', '%bb.1', '%bb.6', '%bb.6', '%bb.2', '%bb.6',
+ '%bb.1' ]
+body: |
+ ; CHECK-LABEL: name: func_26
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:b32 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:b1 = IMPLICIT_DEF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[DEF2:%[0-9]+]]:b32 = IMPLICIT_DEF
+ ; CHECK-NEXT: GOTO %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.6(0x00000000), %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CBranch undef [[DEF1]], %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.3(0x3e000000), %bb.1(0x04000000), %bb.6(0x00000000), %bb.2(0x3e000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: BRX_START 0
+ ; CHECK-NEXT: BRX_ITEM %bb.3
+ ; CHECK-NEXT: BRX_ITEM %bb.1
+ ; CHECK-NEXT: BRX_ITEM %bb.6
+ ; CHECK-NEXT: BRX_ITEM %bb.6
+ ; CHECK-NEXT: BRX_ITEM %bb.2
+ ; CHECK-NEXT: BRX_ITEM %bb.6
+ ; CHECK-NEXT: BRX_END %bb.1, undef [[DEF]], 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ bb.0:
+ successors: %bb.1(0x80000000)
+
+ %10:b32 = IMPLICIT_DEF
+ %11:b1 = IMPLICIT_DEF
+
+ bb.1:
+ successors: %bb.4(0x80000000)
+
+ %0:b32 = PHI undef %10, %bb.0, undef %0, %bb.5
+ GOTO %bb.4
+
+ bb.2:
+ successors: %bb.3(0x80000000)
+
+ bb.3:
+ successors: %bb.4(0x80000000)
+
+ bb.4:
+ successors: %bb.6(0x00000000), %bb.5(0x80000000)
+
+ CBranch undef %11, %bb.6
+
+ bb.5:
+ successors: %bb.3(0x3e000000), %bb.1(0x04000000), %bb.6(0x00000000), %bb.2(0x3e000000)
+
+ BRX_START 0
+ BRX_ITEM %bb.3
+ BRX_ITEM %bb.1
+ BRX_ITEM %bb.6
+ BRX_ITEM %bb.6
+ BRX_ITEM %bb.2
+ BRX_ITEM %bb.6
+ BRX_END %bb.1, undef %10, 0
+
+ bb.6:
+...
diff --git a/llvm/test/CodeGen/NVPTX/switch.ll b/llvm/test/CodeGen/NVPTX/switch.ll
new file mode 100644
index 0000000000000..7fcfcfbb85d00
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/switch.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+
+target triple = "nvptx64-unknown-nvidiacl"
+
+define void @pr170051(i32 %cond) {
+; CHECK-LABEL: pr170051(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: mov.b32 %r2, 0;
+; CHECK-NEXT: ld.param.b32 %r1, [pr170051_param_0];
+; CHECK-NEXT: setp.gt.u32 %p1, %r1, 6;
+; CHECK-NEXT: bra.uni $L__BB0_3;
+; CHECK-NEXT: $L__BB0_1: // %BS_LABEL_2
+; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: or.b32 %r3, %r2, 1;
+; CHECK-NEXT: $L__BB0_2: // %for.cond4
+; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: mov.b32 %r2, %r3;
+; CHECK-NEXT: $L__BB0_3: // %BS_LABEL_1
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: @%p1 bra $L__BB0_5;
+; CHECK-NEXT: // %bb.4: // %BS_LABEL_1
+; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: mov.b32 %r3, %r1;
+; CHECK-NEXT: $L_brx_0: .branchtargets
+; CHECK-NEXT: $L__BB0_2,
+; CHECK-NEXT: $L__BB0_3,
+; CHECK-NEXT: $L__BB0_5,
+; CHECK-NEXT: $L__BB0_5,
+; CHECK-NEXT: $L__BB0_1,
+; CHECK-NEXT: $L__BB0_5,
+; CHECK-NEXT: $L__BB0_3;
+; CHECK-NEXT: brx.idx %r1, $L_brx_0;
+; CHECK-NEXT: $L__BB0_5: // %unreachable
+; CHECK-NEXT: // begin inline asm
+; CHECK-NEXT: exit;
+; CHECK-NEXT: // end inline asm
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.cond4.for.cond_crit_edge, %BS_LABEL_1, %BS_LABEL_1, %entry
+ %p_2218_0.1 = phi i32 [ 0, %entry ], [ %p_2218_0.3, %BS_LABEL_1 ], [ %p_2218_0.3, %BS_LABEL_1 ], [ poison, %for.cond4.for.cond_crit_edge ]
+ br label %BS_LABEL_1
+
+BS_LABEL_2: ; preds = %BS_LABEL_1
+ %sub = or i32 %p_2218_0.3, 1
+ br label %for.cond4
+
+for.cond4: ; preds = %BS_LABEL_1, %BS_LABEL_2
+ %p_2218_0.2 = phi i32 [ 0, %BS_LABEL_1 ], [ %sub, %BS_LABEL_2 ]
+ br i1 false, label %for.cond4.for.cond_crit_edge, label %BS_LABEL_1
+
+for.cond4.for.cond_crit_edge: ; preds = %for.cond4
+ br label %for.cond
+
+BS_LABEL_1: ; preds = %for.cond4, %for.cond
+ %p_2218_0.3 = phi i32 [ %p_2218_0.2, %for.cond4 ], [ %p_2218_0.1, %for.cond ]
+ switch i32 %cond, label %unreachable [
+ i32 0, label %for.cond4
+ i32 4, label %BS_LABEL_2
+ i32 1, label %for.cond
+ i32 6, label %for.cond
+ ]
+
+unreachable: ; preds = %BS_LABEL_1
+ unreachable
+}
+
+
More information about the llvm-commits
mailing list