[llvm] [StructurizeCFG] Order IF Else block using Heuristics (PR #139605)
Vigneshwar Jayakumar via llvm-commits
llvm-commits at lists.llvm.org
Mon May 12 11:53:39 PDT 2025
https://github.com/VigneshwarJ updated https://github.com/llvm/llvm-project/pull/139605
>From d7da7dd35211a3a4d94ed657ac64cfd682aefe15 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 12 May 2025 13:15:28 -0500
Subject: [PATCH 1/2] [StructurizeCFG] Order IF Else block using Heuristics
Then and Else block order in SCC is arbitrary. But based on the
order, after structurization there are cases where there might be
extra VGPR copies due to interference during register coelescing.
This patch introduces heuristics to order the then and else block
based on the potential VGPR copies to maximize coelescing.
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 80 +++++++++++
.../StructurizeCFG/order-if-else.ll | 129 ++++++++++++++++++
2 files changed, 209 insertions(+)
create mode 100644 llvm/test/Transforms/StructurizeCFG/order-if-else.ll
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index eb22b50532695..ec54f53d6165b 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -307,6 +307,8 @@ class StructurizeCFG {
RegionNode *PrevNode;
+ void reorderIfElseBlock(BasicBlock *BB, unsigned Idx);
+
void orderNodes();
void analyzeLoops(RegionNode *N);
@@ -409,6 +411,31 @@ class StructurizeCFGLegacyPass : public RegionPass {
} // end anonymous namespace
+/// Helper function for heuristics to order if else block
+/// Checks whether an instruction is potential vector copy instruction, if so,
+/// checks if the operands are from different BB. if so, returns True.
+// Then there's a possibility of coelescing without interference when ordered
+// first.
+static bool hasAffectingInstructions(Instruction *I, BasicBlock *BB) {
+
+ if (!I || I->getParent() != BB)
+ return true;
+
+ // If the instruction is not a poterntial copy instructoin, return true.
+ if (!isa<ExtractElementInst>(*I) && !isa<ExtractValueInst>(*I))
+ return false;
+
+ // Check if any operands are instructions defined in the same block.
+ for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
+ if (auto *OpI = dyn_cast<Instruction>(I->getOperand(i))) {
+ if (OpI->getParent() == BB)
+ return false;
+ }
+ }
+
+ return true;
+}
+
char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
@@ -419,6 +446,58 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
+/// Then and Else block order in SCC is arbitrary. But based on the
+/// order, after structurization there are cases where there might be extra
+/// VGPR copies due to interference during register coelescing.
+/// eg:- incoming phi values from Else block contains only vgpr copies and
+/// incoming phis in Then block has are some modification for the vgprs.
+/// after structurization, there would be interference when coelesing when Then
+/// block is ordered first. But those copies can be coelesced when Else is
+/// ordered first.
+///
+/// This function checks the incoming phi values in the merge block and
+/// orders based on the following heuristics of Then and Else block. Checks
+/// whether an incoming phi can be potential copy instructions and if so
+/// checks whether copy within the block or not.
+/// Increases score if its a potential copy from outside the block.
+/// the higher scored block is ordered first.
+void StructurizeCFG::reorderIfElseBlock(BasicBlock *BB, unsigned Idx) {
+ BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
+
+ if (Term && Term->isConditional()) {
+ BasicBlock *ThenBB = Term->getSuccessor(0);
+ BasicBlock *ElseBB = Term->getSuccessor(1);
+ BasicBlock *ThenSucc = ThenBB->getSingleSuccessor();
+
+ if (BB == ThenBB->getSinglePredecessor() &&
+ (ThenBB->getSinglePredecessor() == ElseBB->getSinglePredecessor()) &&
+ (ThenSucc && ThenSucc == ElseBB->getSingleSuccessor())) {
+ unsigned ThenScore = 0, ElseScore = 0;
+
+ for (PHINode &Phi : ThenSucc->phis()) {
+ Value *ThenVal = Phi.getIncomingValueForBlock(ThenBB);
+ Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
+
+ if (auto *Inst = dyn_cast<Instruction>(ThenVal))
+ ThenScore += hasAffectingInstructions(Inst, ThenBB);
+ if (auto *Inst = dyn_cast<Instruction>(ElseVal))
+ ElseScore += hasAffectingInstructions(Inst, ElseBB);
+ }
+
+ if (ThenScore != ElseScore) {
+ if (ThenScore < ElseScore)
+ std::swap(ThenBB, ElseBB);
+
+ // reorder the last two inserted elements in Order
+ if (Idx >= 2 && Order[Idx - 1]->getEntry() == ElseBB &&
+ Order[Idx - 2]->getEntry() == ThenBB) {
+ std::swap(Order[Idx - 1], Order[Idx - 2]);
+ }
+ }
+ }
+ }
+}
+
/// Build up the general order of nodes, by performing a topological sort of the
/// parent region's nodes, while ensuring that there is no outer cycle node
/// between any two inner cycle nodes.
@@ -452,6 +531,7 @@ void StructurizeCFG::orderNodes() {
// Add the SCC nodes to the Order array.
for (const auto &N : SCC) {
assert(I < E && "SCC size mismatch!");
+ reorderIfElseBlock(N.first->getEntry(), I);
Order[I++] = N.first;
}
}
diff --git a/llvm/test/Transforms/StructurizeCFG/order-if-else.ll b/llvm/test/Transforms/StructurizeCFG/order-if-else.ll
new file mode 100644
index 0000000000000..02641f405f3b4
--- /dev/null
+++ b/llvm/test/Transforms/StructurizeCFG/order-if-else.ll
@@ -0,0 +1,129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s
+
+define amdgpu_kernel void @test_extractelement_1(<4 x i32> %vec, i1 %cond, ptr %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @test_extractelement_1(
+; CHECK-SAME: <4 x i32> [[VEC:%.*]], i1 [[COND:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A:%.*]], %[[ELSE]] ], [ poison, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[X:%.*]] = extractelement <4 x i32> [[VEC]], i32 0
+; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], 1
+; CHECK-NEXT: br label %[[MERGE]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[A]] = extractelement <4 x i32> [[VEC]], i32 1
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[MERGE]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[Z]], %[[THEN]] ]
+; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = extractelement <4 x i32> %vec, i32 0
+ %z = add i32 %x, 1
+ br label %merge
+
+else:
+ %a = extractelement <4 x i32> %vec, i32 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %z, %then ], [ %a, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+define amdgpu_kernel void @test_extractelement_2(<4 x i32> %vec, i1 %cond, ptr %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @test_extractelement_2(
+; CHECK-SAME: <4 x i32> [[VEC:%.*]], i1 [[COND:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A:%.*]], %[[ELSE]] ], [ poison, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[X:%.*]] = extractelement <4 x i32> [[VEC]], i32 1
+; CHECK-NEXT: [[Y:%.*]] = add i32 [[X]], 1
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
+; CHECK-NEXT: [[Z:%.*]] = extractelement <4 x i32> [[VEC1]], i32 0
+; CHECK-NEXT: br label %[[MERGE]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[A]] = extractelement <4 x i32> [[VEC]], i32 1
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[MERGE]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[Z]], %[[THEN]] ]
+; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = extractelement <4 x i32> %vec, i32 1
+ %y = add i32 %x, 1
+ %vec1 = insertelement <4 x i32> poison, i32 %y, i32 0
+ %z = extractelement <4 x i32> %vec1, i32 0
+ br label %merge
+
+else:
+ %a = extractelement <4 x i32> %vec, i32 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %z, %then ], [ %a, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+%pair = type { i32, i32 }
+define amdgpu_kernel void @test_extractvalue(ptr %ptr, i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @test_extractvalue(
+; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[FLOW:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A_THEN]], %[[THEN]] ], [ poison, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[THEN]] ], [ true, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[ELSE:.*]], label %[[MERGE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
+; CHECK-NEXT: [[SUM_ELSE:%.*]] = add i32 [[A_ELSE]], 1
+; CHECK-NEXT: br label %[[MERGE]]
+; CHECK: [[MERGE]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[SUM_ELSE]], %[[ELSE]] ]
+; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %load_then = load %pair, ptr %ptr
+ br i1 %cond, label %then, label %else
+
+then:
+ %a_then = extractvalue %pair %load_then, 0
+ br label %merge
+
+else:
+ %a_else = extractvalue %pair %load_then, 0
+ %sum_else = add i32 %a_else, 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
>From 95c47d22998030dc79e9f0867160111a557d6d28 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 12 May 2025 13:52:53 -0500
Subject: [PATCH 2/2] format correction
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index ec54f53d6165b..af9efe9f4d160 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -447,7 +447,7 @@ INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
/// Then and Else block order in SCC is arbitrary. But based on the
-/// order, after structurization there are cases where there might be extra
+/// order, after structurization there are cases where there might be extra
/// VGPR copies due to interference during register coelescing.
/// eg:- incoming phi values from Else block contains only vgpr copies and
/// incoming phis in Then block has are some modification for the vgprs.
@@ -457,7 +457,7 @@ INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
///
/// This function checks the incoming phi values in the merge block and
/// orders based on the following heuristics of Then and Else block. Checks
-/// whether an incoming phi can be potential copy instructions and if so
+/// whether an incoming phi can be potential copy instructions and if so
/// checks whether copy within the block or not.
/// Increases score if its a potential copy from outside the block.
/// the higher scored block is ordered first.
More information about the llvm-commits
mailing list