[llvm] 745e1e6 - [CodeGen] Ignore requiresStructuredCFG check in canSplitCriticalEdge if successor is loop header (#154063)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 02:25:41 PDT 2025
Author: Wenju He
Date: 2025-09-26T17:25:37+08:00
New Revision: 745e1e6ad5d40ff8f1553e62c48554a61611ee76
URL: https://github.com/llvm/llvm-project/commit/745e1e6ad5d40ff8f1553e62c48554a61611ee76
DIFF: https://github.com/llvm/llvm-project/commit/745e1e6ad5d40ff8f1553e62c48554a61611ee76.diff
LOG: [CodeGen] Ignore requiresStructuredCFG check in canSplitCriticalEdge if successor is loop header (#154063)
This addresses a performance issue for our downstream GPU target that
sets requiresStructuredCFG to true. The issue is that EarlyMachineLICM
pass does not hoist loop invariants because a critical edge is not
split.
The critical edge's destination a loop header. Splitting the critical
edge will not break structured CFG.
Add a nvptx test to demonstrate the issue since the target also
requires structured CFG.
---------
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
Added:
llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir
Modified:
llvm/include/llvm/CodeGen/MachineBasicBlock.h
llvm/lib/CodeGen/MachineBasicBlock.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 6d026796e93b7..71739278cf513 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -1040,7 +1040,9 @@ class MachineBasicBlock
/// Succ, can be split. If this returns true a subsequent call to
/// SplitCriticalEdge is guaranteed to return a valid basic block if
/// no changes occurred in the meantime.
- LLVM_ABI bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const;
+ LLVM_ABI bool
+ canSplitCriticalEdge(const MachineBasicBlock *Succ,
+ const MachineLoopInfo *MLI = nullptr) const;
void pop_front() { Insts.pop_front(); }
void pop_back() { Insts.pop_back(); }
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index bc1df26db2684..1cb57a4fa4258 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1180,7 +1180,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *Succ, const SplitCriticalEdgeAnalyses &Analyses,
std::vector<SparseBitVector<>> *LiveInSets, MachineDomTreeUpdater *MDTU) {
- if (!canSplitCriticalEdge(Succ))
+ if (!canSplitCriticalEdge(Succ, Analyses.MLI))
return nullptr;
MachineFunction *MF = getParent();
@@ -1408,8 +1408,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
return NMBB;
}
-bool MachineBasicBlock::canSplitCriticalEdge(
- const MachineBasicBlock *Succ) const {
+bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
+ const MachineLoopInfo *MLI) const {
// Splitting the critical edge to a landing pad block is non-trivial. Don't do
// it in this generic function.
if (Succ->isEHPad())
@@ -1423,8 +1423,17 @@ bool MachineBasicBlock::canSplitCriticalEdge(
const MachineFunction *MF = getParent();
// Performance might be harmed on HW that implements branching using exec mask
// where both sides of the branches are always executed.
- if (MF->getTarget().requiresStructuredCFG())
+
+ if (MF->getTarget().requiresStructuredCFG()) {
+ // If `Succ` is a loop header, splitting the critical edge will not
+ // break structured CFG.
+ if (MLI) {
+ const MachineLoop *L = MLI->getLoopFor(Succ);
+ return L && L->getHeader() == Succ;
+ }
+
return false;
+ }
// Do we have an Indirect jump with a jumptable that we can rewrite?
int JTI = findJumpTableIndex(*this);
diff --git a/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir b/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir
new file mode 100644
index 0000000000000..0b2d85600a2ef
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir
@@ -0,0 +1,80 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=nvptx64 -mcpu=sm_20 -run-pass=early-machinelicm %s -o - | FileCheck %s
+
+# This test checks that the early-machineLICM pass successfully creates a new
+# loop preheader by splitting the critical edge and hoisting the loop invariant
+# value `%8` to the preheader.
+# Since the critical edge successor is a loop header, the splitting does not
+# break the structured CFG, which is a requirement for the NVPTX target.
+
+---
+name: test_hoist
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: b64, preferred-register: '', flags: [ ] }
+ - { id: 1, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 3, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 4, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 5, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 6, class: b64, preferred-register: '', flags: [ ] }
+ - { id: 7, class: b1, preferred-register: '', flags: [ ] }
+ - { id: 8, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 9, class: b1, preferred-register: '', flags: [ ] }
+body: |
+ ; CHECK-LABEL: name: test_hoist
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.3(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[LD_i32_:%[0-9]+]]:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101)
+ ; CHECK-NEXT: [[LD_i64_:%[0-9]+]]:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101)
+ ; CHECK-NEXT: [[ADD64ri:%[0-9]+]]:b64 = nuw ADD64ri killed [[LD_i64_]], 2
+ ; CHECK-NEXT: [[LD_i32_1:%[0-9]+]]:b32 = LD_i32 0, 0, 1, 3, 32, [[ADD64ri]], 0
+ ; CHECK-NEXT: [[SETP_i32ri:%[0-9]+]]:b1 = SETP_i32ri [[LD_i32_]], 0, 0
+ ; CHECK-NEXT: CBranch killed [[SETP_i32ri]], %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADD32ri:%[0-9]+]]:b32 = ADD32ri [[LD_i32_]], -1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:b32 = PHI [[LD_i32_1]], %bb.3, %3, %bb.1
+ ; CHECK-NEXT: [[SREM32rr:%[0-9]+]]:b32 = SREM32rr [[PHI]], [[ADD32ri]]
+ ; CHECK-NEXT: [[SETP_i32ri1:%[0-9]+]]:b1 = SETP_i32ri [[SREM32rr]], 0, 1
+ ; CHECK-NEXT: CBranch killed [[SETP_i32ri1]], %bb.1
+ ; CHECK-NEXT: GOTO %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:b32 = PHI [[LD_i32_1]], %bb.0, [[SREM32rr]], %bb.1
+ ; CHECK-NEXT: ST_i32 [[PHI1]], 0, 0, 1, 32, [[ADD64ri]], 0
+ ; CHECK-NEXT: Return
+ bb.0.entry:
+ successors: %bb.2(0x30000000), %bb.1(0x50000000)
+
+ %5:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101)
+ %6:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101)
+ %0:b64 = nuw ADD64ri killed %6, 2
+ %1:b32 = LD_i32 0, 0, 1, 3, 32, %0, 0
+ %7:b1 = SETP_i32ri %5, 0, 0
+ CBranch killed %7, %bb.2
+ GOTO %bb.1
+
+
+ bb.1:
+ successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+
+ %2:b32 = PHI %1, %bb.0, %3, %bb.1
+ %8:b32 = ADD32ri %5, -1
+ %3:b32 = SREM32rr %2, %8
+ %9:b1 = SETP_i32ri %3, 0, 1
+ CBranch killed %9, %bb.1
+ GOTO %bb.2
+
+ bb.2:
+ %4:b32 = PHI %1, %bb.0, %3, %bb.1
+ ST_i32 %4, 0, 0, 1, 32, %0, 0
+ Return
+...
More information about the llvm-commits
mailing list