[llvm] [CodeGen] Ignore requiresStructuredCFG check in canSplitCriticalEdge if successor is loop header (PR #154063)
Wenju He via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 23 22:28:34 PDT 2025
https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/154063
>From 1c2a9e2b97910e1d59852999ae5b2196d1976ebe Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Mon, 18 Aug 2025 06:37:03 +0200
Subject: [PATCH 1/5] [CodeGen] Ignore requiresStructuredCFG check in
canSplitCriticalEdge if successor is loop header
This addresses a performance issue for our downstream GPU target that
sets requiresStructuredCFG to true. The issue is that EarlyMachineLICM
pass does not hoist loop invariants because a critical edge is not split.
---
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 4 +++-
llvm/lib/CodeGen/MachineBasicBlock.cpp | 15 ++++++++++++---
2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 9e3d9196cc184..78e5dd99eab06 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -1035,7 +1035,9 @@ class MachineBasicBlock
/// Succ, can be split. If this returns true a subsequent call to
/// SplitCriticalEdge is guaranteed to return a valid basic block if
/// no changes occurred in the meantime.
- LLVM_ABI bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const;
+ LLVM_ABI bool
+ canSplitCriticalEdge(const MachineBasicBlock *Succ,
+ const SplitCriticalEdgeAnalyses &Analyses = {}) const;
void pop_front() { Insts.pop_front(); }
void pop_back() { Insts.pop_back(); }
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index c3c5a0f5102d7..82c852bb0f93b 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1160,7 +1160,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *Succ, const SplitCriticalEdgeAnalyses &Analyses,
std::vector<SparseBitVector<>> *LiveInSets, MachineDomTreeUpdater *MDTU) {
- if (!canSplitCriticalEdge(Succ))
+ if (!canSplitCriticalEdge(Succ, Analyses))
return nullptr;
MachineFunction *MF = getParent();
@@ -1389,7 +1389,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
}
bool MachineBasicBlock::canSplitCriticalEdge(
- const MachineBasicBlock *Succ) const {
+ const MachineBasicBlock *Succ,
+ const SplitCriticalEdgeAnalyses &Analyses) const {
// Splitting the critical edge to a landing pad block is non-trivial. Don't do
// it in this generic function.
if (Succ->isEHPad())
@@ -1403,7 +1404,15 @@ bool MachineBasicBlock::canSplitCriticalEdge(
const MachineFunction *MF = getParent();
// Performance might be harmed on HW that implements branching using exec mask
// where both sides of the branches are always executed.
- if (MF->getTarget().requiresStructuredCFG())
+ // However, if `Succ` is a loop header, splitting the critical edge will not
+ // break structured CFG.
+ auto SuccIsLoopHeader = [&]() {
+ if (MachineLoopInfo *MLI = Analyses.MLI)
+ if (MachineLoop *L = MLI->getLoopFor(Succ); L && L->getHeader() == Succ)
+ return true;
+ return false;
+ };
+ if (MF->getTarget().requiresStructuredCFG() && !SuccIsLoopHeader())
return false;
// Do we have an Indirect jump with a jumptable that we can rewrite?
>From 2a193599a5d8c0d506ef53a3d8aebc2c10ac9e00 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Mon, 1 Sep 2025 08:19:52 +0200
Subject: [PATCH 2/5] pass MLI as new arg
---
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 2 +-
llvm/lib/CodeGen/MachineBasicBlock.cpp | 9 ++++-----
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 78e5dd99eab06..7df34a76912dd 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -1037,7 +1037,7 @@ class MachineBasicBlock
/// no changes occurred in the meantime.
LLVM_ABI bool
canSplitCriticalEdge(const MachineBasicBlock *Succ,
- const SplitCriticalEdgeAnalyses &Analyses = {}) const;
+ const MachineLoopInfo *MLI = nullptr) const;
void pop_front() { Insts.pop_front(); }
void pop_back() { Insts.pop_back(); }
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 82c852bb0f93b..8c795f812df09 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1160,7 +1160,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *Succ, const SplitCriticalEdgeAnalyses &Analyses,
std::vector<SparseBitVector<>> *LiveInSets, MachineDomTreeUpdater *MDTU) {
- if (!canSplitCriticalEdge(Succ, Analyses))
+ if (!canSplitCriticalEdge(Succ, Analyses.MLI))
return nullptr;
MachineFunction *MF = getParent();
@@ -1388,9 +1388,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
return NMBB;
}
-bool MachineBasicBlock::canSplitCriticalEdge(
- const MachineBasicBlock *Succ,
- const SplitCriticalEdgeAnalyses &Analyses) const {
+bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
+ const MachineLoopInfo *MLI) const {
// Splitting the critical edge to a landing pad block is non-trivial. Don't do
// it in this generic function.
if (Succ->isEHPad())
@@ -1407,7 +1406,7 @@ bool MachineBasicBlock::canSplitCriticalEdge(
// However, if `Succ` is a loop header, splitting the critical edge will not
// break structured CFG.
auto SuccIsLoopHeader = [&]() {
- if (MachineLoopInfo *MLI = Analyses.MLI)
+ if (MLI)
if (MachineLoop *L = MLI->getLoopFor(Succ); L && L->getHeader() == Succ)
return true;
return false;
>From f9217abe32b0bb5e21aff8b9c22916bc0008c820 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Mon, 1 Sep 2025 10:18:04 +0200
Subject: [PATCH 3/5] add nvptx test
---
.../NVPTX/machinelicm-no-preheader.mir | 72 +++++++++++++++++++
1 file changed, 72 insertions(+)
create mode 100644 llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir
diff --git a/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir b/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir
new file mode 100644
index 0000000000000..f2f0ffdec8094
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir
@@ -0,0 +1,72 @@
+# RUN: llc -mtriple=nvptx64 -mcpu=sm_20 -run-pass=early-machinelicm %s -verify-machineinstrs -o - | FileCheck %s
+
+# This test checks that the early-machineLICM pass successfully creates a new
+# loop preheader by splitting the critical edge and hoisting the loop invariant
+# value `%18` to the preheader.
+# Since the critical edge successor is a loop header, the splitting does not
+# break the structured CFG, which is a requirement for the NVPTX target.
+
+---
+name: test_hoist
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: b64, preferred-register: '', flags: [ ] }
+ - { id: 1, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 3, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 4, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 5, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 6, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 7, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 8, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 9, class: b64, preferred-register: '', flags: [ ] }
+ - { id: 10, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 11, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 12, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 13, class: b64, preferred-register: '', flags: [ ] }
+ - { id: 14, class: b64, preferred-register: '', flags: [ ] }
+ - { id: 15, class: b64, preferred-register: '', flags: [ ] }
+ - { id: 16, class: b1, preferred-register: '', flags: [ ] }
+ - { id: 17, class: b1, preferred-register: '', flags: [ ] }
+ - { id: 18, class: b32, preferred-register: '', flags: [ ] }
+body: |
+ bb.0.entry:
+ successors: %bb.2(0x30000000), %bb.1(0x50000000)
+
+ %8:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_2, 0 :: (dereferenceable invariant load (s32), addrspace 101)
+ %7:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101)
+ %9:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101)
+ %10:b32 = INT_PTX_SREG_CTAID_x
+ %11:b32 = INT_PTX_SREG_NTID_x
+ %12:b32 = INT_PTX_SREG_TID_x
+ %13:b64 = CVT_u64_u32 killed %12, 0
+ %14:b64 = nuw MAD_WIDE_U32rrr killed %11, killed %10, killed %13
+ %15:b64 = nuw nsw SHL64_ri killed %14, 2
+ %0:b64 = nuw ADD64rr killed %9, killed %15
+ %1:b32 = LD_i32 0, 0, 1, 3, 32, %0, 0
+ %16:b1 = SETP_i32ri %8, 0, 0
+ CBranch killed %16, %bb.2
+ GOTO %bb.1
+
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: %18:b32 = ADD32ri %7, -1
+ ; CHECK: bb.1:
+
+ bb.1:
+ successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+
+ %2:b32 = PHI %8, %bb.0, %5, %bb.1
+ %3:b32 = PHI %1, %bb.0, %4, %bb.1
+ %18:b32 = ADD32ri %7, -1
+ %4:b32 = SREM32rr %3, %18
+ %5:b32 = ADD32ri %2, -1
+ %17:b1 = SETP_i32ri %5, 0, 1
+ CBranch killed %17, %bb.1
+ GOTO %bb.2
+
+ bb.2:
+ %6:b32 = PHI %1, %bb.0, %4, %bb.1
+ ST_i32 %6, 0, 0, 1, 32, %0, 0
+ Return
+...
>From ced084c62f75c554ed0dd6087407f6203c11a1bb Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Wed, 24 Sep 2025 05:20:44 +0200
Subject: [PATCH 4/5] fix test
---
.../NVPTX/machinelicm-no-preheader.mir | 26 ++++++++++---------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir b/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir
index f2f0ffdec8094..7a1fc6dda5472 100644
--- a/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir
+++ b/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir
@@ -22,13 +22,14 @@ registers:
- { id: 9, class: b64, preferred-register: '', flags: [ ] }
- { id: 10, class: b32, preferred-register: '', flags: [ ] }
- { id: 11, class: b32, preferred-register: '', flags: [ ] }
- - { id: 12, class: b32, preferred-register: '', flags: [ ] }
- - { id: 13, class: b64, preferred-register: '', flags: [ ] }
+ - { id: 12, class: b64, preferred-register: '', flags: [ ] }
+ - { id: 13, class: b32, preferred-register: '', flags: [ ] }
- { id: 14, class: b64, preferred-register: '', flags: [ ] }
- { id: 15, class: b64, preferred-register: '', flags: [ ] }
- - { id: 16, class: b1, preferred-register: '', flags: [ ] }
+ - { id: 16, class: b64, preferred-register: '', flags: [ ] }
- { id: 17, class: b1, preferred-register: '', flags: [ ] }
- { id: 18, class: b32, preferred-register: '', flags: [ ] }
+ - { id: 19, class: b1, preferred-register: '', flags: [ ] }
body: |
bb.0.entry:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
@@ -38,14 +39,15 @@ body: |
%9:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101)
%10:b32 = INT_PTX_SREG_CTAID_x
%11:b32 = INT_PTX_SREG_NTID_x
- %12:b32 = INT_PTX_SREG_TID_x
- %13:b64 = CVT_u64_u32 killed %12, 0
- %14:b64 = nuw MAD_WIDE_U32rrr killed %11, killed %10, killed %13
- %15:b64 = nuw nsw SHL64_ri killed %14, 2
- %0:b64 = nuw ADD64rr killed %9, killed %15
+ %12:b64 = MUL_WIDEu32_rr killed %11, killed %10
+ %13:b32 = INT_PTX_SREG_TID_x
+ %14:b64 = CVT_u64_u32 killed %13, 0
+ %15:b64 = nuw ADD64rr killed %12, killed %14
+ %16:b64 = nuw nsw SHL64_ri killed %15, 2
+ %0:b64 = nuw ADD64rr killed %9, killed %16
%1:b32 = LD_i32 0, 0, 1, 3, 32, %0, 0
- %16:b1 = SETP_i32ri %8, 0, 0
- CBranch killed %16, %bb.2
+ %17:b1 = SETP_i32ri %8, 0, 0
+ CBranch killed %17, %bb.2
GOTO %bb.1
; CHECK: bb.3:
@@ -61,8 +63,8 @@ body: |
%18:b32 = ADD32ri %7, -1
%4:b32 = SREM32rr %3, %18
%5:b32 = ADD32ri %2, -1
- %17:b1 = SETP_i32ri %5, 0, 1
- CBranch killed %17, %bb.1
+ %19:b1 = SETP_i32ri %5, 0, 1
+ CBranch killed %19, %bb.1
GOTO %bb.2
bb.2:
>From 865d70f72252f589f8232af8025c84ac4b387059 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Wed, 24 Sep 2025 07:27:16 +0200
Subject: [PATCH 5/5] remove lambda
---
llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index efc8ad50b53d7..87d7e8048dec6 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1405,13 +1405,12 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
// where both sides of the branches are always executed.
// However, if `Succ` is a loop header, splitting the critical edge will not
// break structured CFG.
- auto SuccIsLoopHeader = [&]() {
- if (MLI)
- if (MachineLoop *L = MLI->getLoopFor(Succ); L && L->getHeader() == Succ)
- return true;
- return false;
- };
- if (MF->getTarget().requiresStructuredCFG() && !SuccIsLoopHeader())
+ bool SuccIsLoopHeader = false;
+ if (MLI) {
+ const MachineLoop *L = MLI->getLoopFor(Succ);
+ SuccIsLoopHeader = L && L->getHeader() == Succ;
+ }
+ if (MF->getTarget().requiresStructuredCFG() && !SuccIsLoopHeader)
return false;
// Do we have an Indirect jump with a jumptable that we can rewrite?
More information about the llvm-commits
mailing list