[llvm] [MachineBasicBlock] Don't split loop header successor if the terminator is unanalyzable (PR #170146)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 1 06:31:35 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-nvptx

Author: Hongyu Chen (XChy)

<details>
<summary>Changes</summary>

Fixes https://github.com/llvm/llvm-project/issues/170051
The previous implementation allows splitting the successor if it's the loop header, regardless of whether the terminator of `this` is analyzable.

---
Full diff: https://github.com/llvm/llvm-project/pull/170146.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/MachineBasicBlock.cpp (+6-6) 
- (added) llvm/test/CodeGen/NVPTX/switch.ll (+73) 


``````````diff
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index ba0b025167307..10bf18b7fcb6d 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1425,14 +1425,13 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
   // where both sides of the branches are always executed.
 
   if (MF->getTarget().requiresStructuredCFG()) {
+    if (!MLI)
+      return false;
+    const MachineLoop *L = MLI->getLoopFor(Succ);
     // If `Succ` is a loop header, splitting the critical edge will not
     // break structured CFG.
-    if (MLI) {
-      const MachineLoop *L = MLI->getLoopFor(Succ);
-      return L && L->getHeader() == Succ;
-    }
-
-    return false;
+    if (!L || L->getHeader() != Succ)
+      return false;
   }
 
   // Do we have an Indirect jump with a jumptable that we can rewrite?
@@ -1459,6 +1458,7 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
                       << printMBBReference(*this) << '\n');
     return false;
   }
+
   return true;
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/switch.ll b/llvm/test/CodeGen/NVPTX/switch.ll
new file mode 100644
index 0000000000000..7fcfcfbb85d00
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/switch.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+
+target triple = "nvptx64-unknown-nvidiacl"
+
+define void @pr170051(i32 %cond) {
+; CHECK-LABEL: pr170051(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<2>;
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    mov.b32 %r2, 0;
+; CHECK-NEXT:    ld.param.b32 %r1, [pr170051_param_0];
+; CHECK-NEXT:    setp.gt.u32 %p1, %r1, 6;
+; CHECK-NEXT:    bra.uni $L__BB0_3;
+; CHECK-NEXT:  $L__BB0_1: // %BS_LABEL_2
+; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    or.b32 %r3, %r2, 1;
+; CHECK-NEXT:  $L__BB0_2: // %for.cond4
+; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    mov.b32 %r2, %r3;
+; CHECK-NEXT:  $L__BB0_3: // %BS_LABEL_1
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    @%p1 bra $L__BB0_5;
+; CHECK-NEXT:  // %bb.4: // %BS_LABEL_1
+; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    mov.b32 %r3, %r1;
+; CHECK-NEXT:    $L_brx_0: .branchtargets
+; CHECK-NEXT:     $L__BB0_2,
+; CHECK-NEXT:     $L__BB0_3,
+; CHECK-NEXT:     $L__BB0_5,
+; CHECK-NEXT:     $L__BB0_5,
+; CHECK-NEXT:     $L__BB0_1,
+; CHECK-NEXT:     $L__BB0_5,
+; CHECK-NEXT:     $L__BB0_3;
+; CHECK-NEXT:    brx.idx %r1, $L_brx_0;
+; CHECK-NEXT:  $L__BB0_5: // %unreachable
+; CHECK-NEXT:    // begin inline asm
+; CHECK-NEXT:    exit;
+; CHECK-NEXT:    // end inline asm
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond4.for.cond_crit_edge, %BS_LABEL_1, %BS_LABEL_1, %entry
+  %p_2218_0.1 = phi i32 [ 0, %entry ], [ %p_2218_0.3, %BS_LABEL_1 ], [ %p_2218_0.3, %BS_LABEL_1 ], [ poison, %for.cond4.for.cond_crit_edge ]
+  br label %BS_LABEL_1
+
+BS_LABEL_2:                                       ; preds = %BS_LABEL_1
+  %sub = or i32 %p_2218_0.3, 1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %BS_LABEL_1, %BS_LABEL_2
+  %p_2218_0.2 = phi i32 [ 0, %BS_LABEL_1 ], [ %sub, %BS_LABEL_2 ]
+  br i1 false, label %for.cond4.for.cond_crit_edge, label %BS_LABEL_1
+
+for.cond4.for.cond_crit_edge:                     ; preds = %for.cond4
+  br label %for.cond
+
+BS_LABEL_1:                                       ; preds = %for.cond4, %for.cond
+  %p_2218_0.3 = phi i32 [ %p_2218_0.2, %for.cond4 ], [ %p_2218_0.1, %for.cond ]
+  switch i32 %cond, label %unreachable [
+    i32 0, label %for.cond4
+    i32 4, label %BS_LABEL_2
+    i32 1, label %for.cond
+    i32 6, label %for.cond
+  ]
+
+unreachable:                                      ; preds = %BS_LABEL_1
+  unreachable
+}
+
+

``````````

</details>


https://github.com/llvm/llvm-project/pull/170146


More information about the llvm-commits mailing list