[llvm] [StructurizeCFG] Hoist and simplify zero-cost incoming else phi values (PR #139605)
Vigneshwar Jayakumar via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 23 22:12:10 PDT 2025
https://github.com/VigneshwarJ updated https://github.com/llvm/llvm-project/pull/139605
>From d7da7dd35211a3a4d94ed657ac64cfd682aefe15 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 12 May 2025 13:15:28 -0500
Subject: [PATCH 1/8] [StructurizeCFG] Order IF Else block using Heuristics
Then and Else block order in SCC is arbitrary. But based on the
order, after structurization there are cases where there might be
extra VGPR copies due to interference during register coelescing.
This patch introduces heuristics to order the then and else block
based on the potential VGPR copies to maximize coelescing.
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 80 +++++++++++
.../StructurizeCFG/order-if-else.ll | 129 ++++++++++++++++++
2 files changed, 209 insertions(+)
create mode 100644 llvm/test/Transforms/StructurizeCFG/order-if-else.ll
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index eb22b50532695..ec54f53d6165b 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -307,6 +307,8 @@ class StructurizeCFG {
RegionNode *PrevNode;
+ void reorderIfElseBlock(BasicBlock *BB, unsigned Idx);
+
void orderNodes();
void analyzeLoops(RegionNode *N);
@@ -409,6 +411,31 @@ class StructurizeCFGLegacyPass : public RegionPass {
} // end anonymous namespace
+/// Helper function for heuristics to order if else block
+/// Checks whether an instruction is potential vector copy instruction, if so,
+/// checks if the operands are from different BB. if so, returns True.
+// Then there's a possibility of coelescing without interference when ordered
+// first.
+static bool hasAffectingInstructions(Instruction *I, BasicBlock *BB) {
+
+ if (!I || I->getParent() != BB)
+ return true;
+
+ // If the instruction is not a poterntial copy instructoin, return true.
+ if (!isa<ExtractElementInst>(*I) && !isa<ExtractValueInst>(*I))
+ return false;
+
+ // Check if any operands are instructions defined in the same block.
+ for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
+ if (auto *OpI = dyn_cast<Instruction>(I->getOperand(i))) {
+ if (OpI->getParent() == BB)
+ return false;
+ }
+ }
+
+ return true;
+}
+
char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
@@ -419,6 +446,58 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
+/// Then and Else block order in SCC is arbitrary. But based on the
+/// order, after structurization there are cases where there might be extra
+/// VGPR copies due to interference during register coelescing.
+/// eg:- incoming phi values from Else block contains only vgpr copies and
+/// incoming phis in Then block has are some modification for the vgprs.
+/// after structurization, there would be interference when coelesing when Then
+/// block is ordered first. But those copies can be coelesced when Else is
+/// ordered first.
+///
+/// This function checks the incoming phi values in the merge block and
+/// orders based on the following heuristics of Then and Else block. Checks
+/// whether an incoming phi can be potential copy instructions and if so
+/// checks whether copy within the block or not.
+/// Increases score if its a potential copy from outside the block.
+/// the higher scored block is ordered first.
+void StructurizeCFG::reorderIfElseBlock(BasicBlock *BB, unsigned Idx) {
+ BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
+
+ if (Term && Term->isConditional()) {
+ BasicBlock *ThenBB = Term->getSuccessor(0);
+ BasicBlock *ElseBB = Term->getSuccessor(1);
+ BasicBlock *ThenSucc = ThenBB->getSingleSuccessor();
+
+ if (BB == ThenBB->getSinglePredecessor() &&
+ (ThenBB->getSinglePredecessor() == ElseBB->getSinglePredecessor()) &&
+ (ThenSucc && ThenSucc == ElseBB->getSingleSuccessor())) {
+ unsigned ThenScore = 0, ElseScore = 0;
+
+ for (PHINode &Phi : ThenSucc->phis()) {
+ Value *ThenVal = Phi.getIncomingValueForBlock(ThenBB);
+ Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
+
+ if (auto *Inst = dyn_cast<Instruction>(ThenVal))
+ ThenScore += hasAffectingInstructions(Inst, ThenBB);
+ if (auto *Inst = dyn_cast<Instruction>(ElseVal))
+ ElseScore += hasAffectingInstructions(Inst, ElseBB);
+ }
+
+ if (ThenScore != ElseScore) {
+ if (ThenScore < ElseScore)
+ std::swap(ThenBB, ElseBB);
+
+ // reorder the last two inserted elements in Order
+ if (Idx >= 2 && Order[Idx - 1]->getEntry() == ElseBB &&
+ Order[Idx - 2]->getEntry() == ThenBB) {
+ std::swap(Order[Idx - 1], Order[Idx - 2]);
+ }
+ }
+ }
+ }
+}
+
/// Build up the general order of nodes, by performing a topological sort of the
/// parent region's nodes, while ensuring that there is no outer cycle node
/// between any two inner cycle nodes.
@@ -452,6 +531,7 @@ void StructurizeCFG::orderNodes() {
// Add the SCC nodes to the Order array.
for (const auto &N : SCC) {
assert(I < E && "SCC size mismatch!");
+ reorderIfElseBlock(N.first->getEntry(), I);
Order[I++] = N.first;
}
}
diff --git a/llvm/test/Transforms/StructurizeCFG/order-if-else.ll b/llvm/test/Transforms/StructurizeCFG/order-if-else.ll
new file mode 100644
index 0000000000000..02641f405f3b4
--- /dev/null
+++ b/llvm/test/Transforms/StructurizeCFG/order-if-else.ll
@@ -0,0 +1,129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s
+
+define amdgpu_kernel void @test_extractelement_1(<4 x i32> %vec, i1 %cond, ptr %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @test_extractelement_1(
+; CHECK-SAME: <4 x i32> [[VEC:%.*]], i1 [[COND:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A:%.*]], %[[ELSE]] ], [ poison, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[X:%.*]] = extractelement <4 x i32> [[VEC]], i32 0
+; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], 1
+; CHECK-NEXT: br label %[[MERGE]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[A]] = extractelement <4 x i32> [[VEC]], i32 1
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[MERGE]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[Z]], %[[THEN]] ]
+; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = extractelement <4 x i32> %vec, i32 0
+ %z = add i32 %x, 1
+ br label %merge
+
+else:
+ %a = extractelement <4 x i32> %vec, i32 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %z, %then ], [ %a, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+define amdgpu_kernel void @test_extractelement_2(<4 x i32> %vec, i1 %cond, ptr %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @test_extractelement_2(
+; CHECK-SAME: <4 x i32> [[VEC:%.*]], i1 [[COND:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A:%.*]], %[[ELSE]] ], [ poison, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[X:%.*]] = extractelement <4 x i32> [[VEC]], i32 1
+; CHECK-NEXT: [[Y:%.*]] = add i32 [[X]], 1
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
+; CHECK-NEXT: [[Z:%.*]] = extractelement <4 x i32> [[VEC1]], i32 0
+; CHECK-NEXT: br label %[[MERGE]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[A]] = extractelement <4 x i32> [[VEC]], i32 1
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[MERGE]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[Z]], %[[THEN]] ]
+; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = extractelement <4 x i32> %vec, i32 1
+ %y = add i32 %x, 1
+ %vec1 = insertelement <4 x i32> poison, i32 %y, i32 0
+ %z = extractelement <4 x i32> %vec1, i32 0
+ br label %merge
+
+else:
+ %a = extractelement <4 x i32> %vec, i32 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %z, %then ], [ %a, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+%pair = type { i32, i32 }
+define amdgpu_kernel void @test_extractvalue(ptr %ptr, i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @test_extractvalue(
+; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[FLOW:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A_THEN]], %[[THEN]] ], [ poison, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[THEN]] ], [ true, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[ELSE:.*]], label %[[MERGE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
+; CHECK-NEXT: [[SUM_ELSE:%.*]] = add i32 [[A_ELSE]], 1
+; CHECK-NEXT: br label %[[MERGE]]
+; CHECK: [[MERGE]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[SUM_ELSE]], %[[ELSE]] ]
+; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %load_then = load %pair, ptr %ptr
+ br i1 %cond, label %then, label %else
+
+then:
+ %a_then = extractvalue %pair %load_then, 0
+ br label %merge
+
+else:
+ %a_else = extractvalue %pair %load_then, 0
+ %sum_else = add i32 %a_else, 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
>From 95c47d22998030dc79e9f0867160111a557d6d28 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 12 May 2025 13:52:53 -0500
Subject: [PATCH 2/8] format correction
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index ec54f53d6165b..af9efe9f4d160 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -447,7 +447,7 @@ INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
/// Then and Else block order in SCC is arbitrary. But based on the
-/// order, after structurization there are cases where there might be extra
+/// order, after structurization there are cases where there might be extra
/// VGPR copies due to interference during register coelescing.
/// eg:- incoming phi values from Else block contains only vgpr copies and
/// incoming phis in Then block has are some modification for the vgprs.
@@ -457,7 +457,7 @@ INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
///
/// This function checks the incoming phi values in the merge block and
/// orders based on the following heuristics of Then and Else block. Checks
-/// whether an incoming phi can be potential copy instructions and if so
+/// whether an incoming phi can be potential copy instructions and if so
/// checks whether copy within the block or not.
/// Increases score if its a potential copy from outside the block.
/// the higher scored block is ordered first.
>From e7c1f9c3a6eac55e09a1dd19ddc48b955f164c7d Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 12 May 2025 16:39:00 -0500
Subject: [PATCH 3/8] review changes
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 71 ++++++++++---------
1 file changed, 37 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index af9efe9f4d160..d39d66f950aaf 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -414,14 +414,14 @@ class StructurizeCFGLegacyPass : public RegionPass {
/// Helper function for heuristics to order if else block
/// Checks whether an instruction is potential vector copy instruction, if so,
/// checks if the operands are from different BB. if so, returns True.
-// Then there's a possibility of coelescing without interference when ordered
+// Then there's a possibility of coalescing without interference when ordered
// first.
static bool hasAffectingInstructions(Instruction *I, BasicBlock *BB) {
- if (!I || I->getParent() != BB)
+ if (I->getParent() != BB)
return true;
- // If the instruction is not a poterntial copy instructoin, return true.
+ // If the instruction is not a poterntial copy instruction, return true.
if (!isa<ExtractElementInst>(*I) && !isa<ExtractValueInst>(*I))
return false;
@@ -448,53 +448,56 @@ INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
/// Then and Else block order in SCC is arbitrary. But based on the
/// order, after structurization there are cases where there might be extra
-/// VGPR copies due to interference during register coelescing.
+/// VGPR copies due to interference during register coalescing.
/// eg:- incoming phi values from Else block contains only vgpr copies and
/// incoming phis in Then block has are some modification for the vgprs.
-/// after structurization, there would be interference when coelesing when Then
-/// block is ordered first. But those copies can be coelesced when Else is
+/// after structurization, there would be interference when coalesing when Then
+/// block is ordered first. But those copies can be coalesced when Else is
/// ordered first.
///
/// This function checks the incoming phi values in the merge block and
/// orders based on the following heuristics of Then and Else block. Checks
/// whether an incoming phi can be potential copy instructions and if so
-/// checks whether copy within the block or not.
-/// Increases score if its a potential copy from outside the block.
+/// checks whether copy within the block or not.
+/// Increases score if its a potential copy from outside the block.
/// the higher scored block is ordered first.
void StructurizeCFG::reorderIfElseBlock(BasicBlock *BB, unsigned Idx) {
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
- if (Term && Term->isConditional()) {
- BasicBlock *ThenBB = Term->getSuccessor(0);
- BasicBlock *ElseBB = Term->getSuccessor(1);
- BasicBlock *ThenSucc = ThenBB->getSingleSuccessor();
+ if (!Term || !(Term->isConditional()))
+ return;
- if (BB == ThenBB->getSinglePredecessor() &&
- (ThenBB->getSinglePredecessor() == ElseBB->getSinglePredecessor()) &&
- (ThenSucc && ThenSucc == ElseBB->getSingleSuccessor())) {
- unsigned ThenScore = 0, ElseScore = 0;
+ BasicBlock *ThenBB = Term->getSuccessor(0);
+ BasicBlock *ElseBB = Term->getSuccessor(1);
+ BasicBlock *ThenSucc = ThenBB->getSingleSuccessor();
- for (PHINode &Phi : ThenSucc->phis()) {
- Value *ThenVal = Phi.getIncomingValueForBlock(ThenBB);
- Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
+ if (BB != ThenBB->getSinglePredecessor() || !ThenSucc ||
+ (ThenBB->getSinglePredecessor() != ElseBB->getSinglePredecessor()) ||
+ ThenSucc != ElseBB->getSingleSuccessor())
+ return;
- if (auto *Inst = dyn_cast<Instruction>(ThenVal))
- ThenScore += hasAffectingInstructions(Inst, ThenBB);
- if (auto *Inst = dyn_cast<Instruction>(ElseVal))
- ElseScore += hasAffectingInstructions(Inst, ElseBB);
- }
+ unsigned ThenScore = 0, ElseScore = 0;
- if (ThenScore != ElseScore) {
- if (ThenScore < ElseScore)
- std::swap(ThenBB, ElseBB);
+ for (PHINode &Phi : ThenSucc->phis()) {
+ Value *ThenVal = Phi.getIncomingValueForBlock(ThenBB);
+ Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
- // reorder the last two inserted elements in Order
- if (Idx >= 2 && Order[Idx - 1]->getEntry() == ElseBB &&
- Order[Idx - 2]->getEntry() == ThenBB) {
- std::swap(Order[Idx - 1], Order[Idx - 2]);
- }
- }
- }
+ if (auto *Inst = dyn_cast<Instruction>(ThenVal))
+ ThenScore += hasAffectingInstructions(Inst, ThenBB);
+ if (auto *Inst = dyn_cast<Instruction>(ElseVal))
+ ElseScore += hasAffectingInstructions(Inst, ElseBB);
+ }
+
+ if (ThenScore == ElseScore)
+ return;
+
+ if (ThenScore < ElseScore)
+ std::swap(ThenBB, ElseBB);
+
+ // reorder the last two inserted elements in Order
+ if (Idx >= 2 && Order[Idx - 1]->getEntry() == ElseBB &&
+ Order[Idx - 2]->getEntry() == ThenBB) {
+ std::swap(Order[Idx - 1], Order[Idx - 2]);
}
}
>From 05c24b8c76aef230228ca2065e10da6cf65fa13f Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 12 May 2025 17:30:13 -0500
Subject: [PATCH 4/8] update tests
---
llvm/test/CodeGen/AMDGPU/if-else.ll | 166 ++++++++++++++++++
.../StructurizeCFG/order-if-else.ll | 12 +-
2 files changed, 172 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/if-else.ll
diff --git a/llvm/test/CodeGen/AMDGPU/if-else.ll b/llvm/test/CodeGen/AMDGPU/if-else.ll
new file mode 100644
index 0000000000000..67907b120b362
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/if-else.ll
@@ -0,0 +1,166 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX900 %s
+
+define amdgpu_kernel void @test_extractelement_then_else(<4 x i32> %vec, i1 %cond, ptr %ptr) {
+; GFX900-LABEL: test_extractelement_then_else:
+; GFX900: ; %bb.0: ; %if
+; GFX900-NEXT: s_load_dword s6, s[4:5], 0x34
+; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-NEXT: s_bitcmp0_b32 s6, 0
+; GFX900-NEXT: s_cbranch_scc0 .LBB0_2
+; GFX900-NEXT: ; %bb.1: ; %else
+; GFX900-NEXT: s_cbranch_execz .LBB0_3
+; GFX900-NEXT: s_branch .LBB0_4
+; GFX900-NEXT: .LBB0_2:
+; GFX900-NEXT: .LBB0_3: ; %then
+; GFX900-NEXT: s_add_i32 s1, s1, 1
+; GFX900-NEXT: .LBB0_4: ; %merge
+; GFX900-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x3c
+; GFX900-NEXT: v_mov_b32_e32 v2, s1
+; GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s2
+; GFX900-NEXT: v_mov_b32_e32 v1, s3
+; GFX900-NEXT: flat_store_dword v[0:1], v2
+; GFX900-NEXT: s_endpgm
+if:
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = extractelement <4 x i32> %vec, i32 1
+ %y = add i32 %x, 1
+ %vec1 = insertelement <4 x i32> poison, i32 %y, i32 0
+ %z = extractelement <4 x i32> %vec1, i32 0
+ br label %merge
+
+else:
+ %a = extractelement <4 x i32> %vec, i32 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %z, %then ], [ %a, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+define amdgpu_kernel void @test_extractelement_else_then(<4 x i32> %vec, i1 %cond, ptr %ptr) {
+; GFX900-LABEL: test_extractelement_else_then:
+; GFX900: ; %bb.0: ; %if
+; GFX900-NEXT: s_load_dword s6, s[4:5], 0x34
+; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-NEXT: s_bitcmp0_b32 s6, 0
+; GFX900-NEXT: s_cbranch_scc1 .LBB1_2
+; GFX900-NEXT: ; %bb.1: ; %else
+; GFX900-NEXT: s_cbranch_execz .LBB1_3
+; GFX900-NEXT: s_branch .LBB1_4
+; GFX900-NEXT: .LBB1_2:
+; GFX900-NEXT: .LBB1_3: ; %then
+; GFX900-NEXT: s_add_i32 s1, s1, 1
+; GFX900-NEXT: .LBB1_4: ; %merge
+; GFX900-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x3c
+; GFX900-NEXT: v_mov_b32_e32 v2, s1
+; GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, s2
+; GFX900-NEXT: v_mov_b32_e32 v1, s3
+; GFX900-NEXT: flat_store_dword v[0:1], v2
+; GFX900-NEXT: s_endpgm
+if:
+ br i1 %cond, label %else, label %then
+
+else:
+ %a = extractelement <4 x i32> %vec, i32 1
+ br label %merge
+
+then:
+ %x = extractelement <4 x i32> %vec, i32 1
+ %y = add i32 %x, 1
+ %vec1 = insertelement <4 x i32> poison, i32 %y, i32 0
+ %z = extractelement <4 x i32> %vec1, i32 0
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %z, %then ], [ %a, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+%pair = type { i32, i32 }
+
+define void @test_extractvalue_then_else(ptr %ptr, i1 %cond) {
+; GFX900-LABEL: test_extractvalue_then_else:
+; GFX900: ; %bb.0: ; %if
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_load_dword v3, v[0:1]
+; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
+; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX900-NEXT: s_cbranch_execz .LBB2_2
+; GFX900-NEXT: ; %bb.1: ; %else
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
+; GFX900-NEXT: .LBB2_2: ; %merge
+; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_store_dword v[0:1], v3
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+if:
+ %load_then = load %pair, ptr %ptr
+ br i1 %cond, label %then, label %else
+
+then:
+ %a_then = extractvalue %pair %load_then, 0
+ br label %merge
+
+else:
+ %a_else = extractvalue %pair %load_then, 0
+ %sum_else = add i32 %a_else, 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+define void @test_extractvalue_else_then(ptr %ptr, i1 %cond) {
+; GFX900-LABEL: test_extractvalue_else_then:
+; GFX900: ; %bb.0: ; %if
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_load_dword v3, v[0:1]
+; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
+; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX900-NEXT: s_cbranch_execz .LBB3_2
+; GFX900-NEXT: ; %bb.1: ; %else
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
+; GFX900-NEXT: .LBB3_2: ; %merge
+; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_store_dword v[0:1], v3
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+if:
+ %load_then = load %pair, ptr %ptr
+ br i1 %cond, label %else, label %then
+
+else:
+ %a_else = extractvalue %pair %load_then, 0
+ %sum_else = add i32 %a_else, 1
+ br label %merge
+
+then:
+ %a_then = extractvalue %pair %load_then, 0
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
diff --git a/llvm/test/Transforms/StructurizeCFG/order-if-else.ll b/llvm/test/Transforms/StructurizeCFG/order-if-else.ll
index 02641f405f3b4..cfcf8e2e24e37 100644
--- a/llvm/test/Transforms/StructurizeCFG/order-if-else.ll
+++ b/llvm/test/Transforms/StructurizeCFG/order-if-else.ll
@@ -2,8 +2,8 @@
; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s
-define amdgpu_kernel void @test_extractelement_1(<4 x i32> %vec, i1 %cond, ptr %ptr) {
-; CHECK-LABEL: define amdgpu_kernel void @test_extractelement_1(
+define void @test_extractelement_1(<4 x i32> %vec, i1 %cond, ptr %ptr) {
+; CHECK-LABEL: define void @test_extractelement_1(
; CHECK-SAME: <4 x i32> [[VEC:%.*]], i1 [[COND:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
@@ -42,8 +42,8 @@ merge:
ret void
}
-define amdgpu_kernel void @test_extractelement_2(<4 x i32> %vec, i1 %cond, ptr %ptr) {
-; CHECK-LABEL: define amdgpu_kernel void @test_extractelement_2(
+define void @test_extractelement_2(<4 x i32> %vec, i1 %cond, ptr %ptr) {
+; CHECK-LABEL: define void @test_extractelement_2(
; CHECK-SAME: <4 x i32> [[VEC:%.*]], i1 [[COND:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
@@ -87,8 +87,8 @@ merge:
}
%pair = type { i32, i32 }
-define amdgpu_kernel void @test_extractvalue(ptr %ptr, i1 %cond) {
-; CHECK-LABEL: define amdgpu_kernel void @test_extractvalue(
+define void @test_extractvalue(ptr %ptr, i1 %cond) {
+; CHECK-LABEL: define void @test_extractvalue(
; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
>From d279104c1c21cd0dd6883cb28304c747fa871b7a Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Tue, 10 Jun 2025 16:58:44 -0500
Subject: [PATCH 5/8] zero cost instruction
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 50 ++++++++++++-------
1 file changed, 31 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index d39d66f950aaf..023cfdcfd5d34 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -287,6 +288,7 @@ class StructurizeCFG {
UniformityInfo *UA = nullptr;
DominatorTree *DT;
+ TargetTransformInfo *TTI;
SmallVector<RegionNode *, 8> Order;
BBSet Visited;
@@ -367,7 +369,7 @@ class StructurizeCFG {
public:
void init(Region *R);
- bool run(Region *R, DominatorTree *DT);
+ bool run(Region *R, DominatorTree *DT, TargetTransformInfo *TTI);
bool makeUniformRegion(Region *R, UniformityInfo &UA);
};
@@ -393,8 +395,11 @@ class StructurizeCFGLegacyPass : public RegionPass {
if (SCFG.makeUniformRegion(R, UA))
return false;
}
+ Function *F = R->getEntry()->getParent();
+ TargetTransformInfo *TTI =
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return SCFG.run(R, DT);
+ return SCFG.run(R, DT, TTI);
}
StringRef getPassName() const override { return "Structurize control flow"; }
@@ -402,6 +407,7 @@ class StructurizeCFGLegacyPass : public RegionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
AU.addRequired<UniformityInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -411,18 +417,23 @@ class StructurizeCFGLegacyPass : public RegionPass {
} // end anonymous namespace
-/// Helper function for heuristics to order if else block
-/// Checks whether an instruction is potential vector copy instruction, if so,
-/// checks if the operands are from different BB. if so, returns True.
-// Then there's a possibility of coalescing without interference when ordered
-// first.
-static bool hasAffectingInstructions(Instruction *I, BasicBlock *BB) {
+/// Helper function for heuristics to order if else block.
+/// Checks whether an instruction is zero cost instruction and checks if the
+/// operands are from different BB. If so, this instruction can be coalesced
+/// when this block is ordered first. So, this returns true.
+static bool hasAffectingInstructions(Instruction *I, BasicBlock *BB,
+ TargetTransformInfo *TTI) {
if (I->getParent() != BB)
return true;
- // If the instruction is not a poterntial copy instruction, return true.
- if (!isa<ExtractElementInst>(*I) && !isa<ExtractValueInst>(*I))
+ // If the instruction is not a zero cost instruction, return false.
+ auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
+ InstructionCost::CostType CostVal =
+ Cost.isValid()
+ ? Cost.getValue()
+ : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
+ if (CostVal != 0)
return false;
// Check if any operands are instructions defined in the same block.
@@ -457,9 +468,9 @@ INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
///
/// This function checks the incoming phi values in the merge block and
/// orders based on the following heuristics of Then and Else block. Checks
-/// whether an incoming phi can be potential copy instructions and if so
-/// checks whether copy within the block or not.
-/// Increases score if its a potential copy from outside the block.
+/// whether an incoming phi is a zero cost instructions and if so
+/// checks whether operands are within the block or not.
+/// Increases score if its a operands from outside the block.
/// the higher scored block is ordered first.
void StructurizeCFG::reorderIfElseBlock(BasicBlock *BB, unsigned Idx) {
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
@@ -483,9 +494,9 @@ void StructurizeCFG::reorderIfElseBlock(BasicBlock *BB, unsigned Idx) {
Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
if (auto *Inst = dyn_cast<Instruction>(ThenVal))
- ThenScore += hasAffectingInstructions(Inst, ThenBB);
+ ThenScore += hasAffectingInstructions(Inst, ThenBB, TTI);
if (auto *Inst = dyn_cast<Instruction>(ElseVal))
- ElseScore += hasAffectingInstructions(Inst, ElseBB);
+ ElseScore += hasAffectingInstructions(Inst, ElseBB, TTI);
}
if (ThenScore == ElseScore)
@@ -1390,12 +1401,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
}
/// Run the transformation for each region found
-bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
+bool StructurizeCFG::run(Region *R, DominatorTree *DT,
+ TargetTransformInfo *TTI) {
if (R->isTopLevelRegion())
return false;
this->DT = DT;
-
+ this->TTI = TTI;
Func = R->getEntry()->getParent();
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
@@ -1457,7 +1469,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
bool Changed = false;
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
auto &RI = AM.getResult<RegionInfoAnalysis>(F);
-
+ TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
UniformityInfo *UI = nullptr;
if (SkipUniformRegions)
UI = &AM.getResult<UniformityInfoAnalysis>(F);
@@ -1476,7 +1488,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
continue;
}
- Changed |= SCFG.run(R, DT);
+ Changed |= SCFG.run(R, DT, TTI);
}
if (!Changed)
return PreservedAnalyses::all();
>From 44614f65d9ad5d554c1100c05825757617f11c0d Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 23 Jun 2025 20:37:04 -0500
Subject: [PATCH 6/8] changed to hoisting
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 136 +++++++------
llvm/test/CodeGen/AMDGPU/if-else.ll | 166 ----------------
.../CodeGen/AMDGPU/memintrinsic-unroll.ll | 6 +-
llvm/test/CodeGen/AMDGPU/structurize-hoist.ll | 180 ++++++++++++++++++
.../StructurizeCFG/hoist-zerocost.ll | 163 ++++++++++++++++
.../StructurizeCFG/order-if-else.ll | 129 -------------
6 files changed, 424 insertions(+), 356 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/if-else.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
create mode 100644 llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll
delete mode 100644 llvm/test/Transforms/StructurizeCFG/order-if-else.ll
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index a04905624e136..205da5d78d533 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -129,6 +129,7 @@ struct PredInfo {
using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
+using Val2BBMap = DenseMap<Value *, BasicBlock *>;
// A traits type that is intended to be used in graph algorithms. The graph
// traits starts at an entry node, and traverses the RegionNodes that are in
@@ -280,13 +281,12 @@ class StructurizeCFG {
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
Value *BoolPoison;
-
+ TargetTransformInfo *TTI;
Function *Func;
Region *ParentRegion;
UniformityInfo *UA = nullptr;
DominatorTree *DT;
- TargetTransformInfo *TTI;
SmallVector<RegionNode *, 8> Order;
BBSet Visited;
@@ -303,10 +303,14 @@ class StructurizeCFG {
PredMap LoopPreds;
BranchVector LoopConds;
+ Val2BBMap HoistedValues;
+
RegionNode *PrevNode;
void reorderIfElseBlock(BasicBlock *BB, unsigned Idx);
+ void HoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
+
void orderNodes();
void analyzeLoops(RegionNode *N);
@@ -336,6 +340,8 @@ class StructurizeCFG {
void simplifyAffectedPhis();
+ void SimplifyHoistedPhis();
+
DebugLoc killTerminator(BasicBlock *BB);
void changeExit(RegionNode *Node, BasicBlock *NewExit,
@@ -403,6 +409,7 @@ class StructurizeCFGLegacyPass : public RegionPass {
AU.addRequired<UniformityInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
@@ -411,15 +418,14 @@ class StructurizeCFGLegacyPass : public RegionPass {
} // end anonymous namespace
-/// Helper function for heuristics to order if else block.
/// Checks whether an instruction is zero cost instruction and checks if the
/// operands are from different BB. If so, this instruction can be coalesced
-/// when this block is ordered first. So, this returns true.
-static bool hasAffectingInstructions(Instruction *I, BasicBlock *BB,
- TargetTransformInfo *TTI) {
+/// if its hoisted to predecessor block. So, this returns true.
+static bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
+ TargetTransformInfo *TTI) {
if (I->getParent() != BB)
- return true;
+ return false;
// If the instruction is not a zero cost instruction, return false.
auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
@@ -451,58 +457,39 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
-/// Then and Else block order in SCC is arbitrary. But based on the
-/// order, after structurization there are cases where there might be extra
-/// VGPR copies due to interference during register coalescing.
-/// eg:- incoming phi values from Else block contains only vgpr copies and
-/// incoming phis in Then block has are some modification for the vgprs.
-/// after structurization, there would be interference when coalesing when Then
-/// block is ordered first. But those copies can be coalesced when Else is
-/// ordered first.
+/// Because the SCC order of Then and Else blocks is arbitrary, structurization
+/// can introduce unnecessary VGPR copies due to register coalescing
+/// interference.
+/// For example, if the Else block has a zero-cost instruction and
+/// the Then block modifies the VGPR value, only one value is live at a time in
+/// merge block before structurization. After structurization, the coalescer may
+/// incorrectly treat the Then value as live in the Else block (via the path
+/// Then → Flow → Else), leading to unnecessary VGPR copies.
///
-/// This function checks the incoming phi values in the merge block and
-/// orders based on the following heuristics of Then and Else block. Checks
-/// whether an incoming phi is a zero cost instructions and if so
-/// checks whether operands are within the block or not.
-/// Increases score if its a operands from outside the block.
-/// the higher scored block is ordered first.
-void StructurizeCFG::reorderIfElseBlock(BasicBlock *BB, unsigned Idx) {
- BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
-
- if (!Term || !(Term->isConditional()))
+/// This function examines phi nodes whose incoming values are zero-cost
+/// instructions in the Else block. It identifies such values that can be safely
+/// hoisted and moves them to the nearest common dominator of Then and Else
+/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
+/// value to poison phi nodes along the if→flow edge, aiding register coalescing
+/// and minimizing unnecessary live ranges.
+void StructurizeCFG::HoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
+ BasicBlock *ThenBB) {
+
+ BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
+ BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
+
+ if (!ElseSucc || !CommonDominator)
return;
-
- BasicBlock *ThenBB = Term->getSuccessor(0);
- BasicBlock *ElseBB = Term->getSuccessor(1);
- BasicBlock *ThenSucc = ThenBB->getSingleSuccessor();
-
- if (BB != ThenBB->getSinglePredecessor() || !ThenSucc ||
- (ThenBB->getSinglePredecessor() != ElseBB->getSinglePredecessor()) ||
- ThenSucc != ElseBB->getSingleSuccessor())
- return;
-
- unsigned ThenScore = 0, ElseScore = 0;
-
- for (PHINode &Phi : ThenSucc->phis()) {
- Value *ThenVal = Phi.getIncomingValueForBlock(ThenBB);
+ Instruction *Term = CommonDominator->getTerminator();
+ for (PHINode &Phi : ElseSucc->phis()) {
Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
-
- if (auto *Inst = dyn_cast<Instruction>(ThenVal))
- ThenScore += hasAffectingInstructions(Inst, ThenBB, TTI);
- if (auto *Inst = dyn_cast<Instruction>(ElseVal))
- ElseScore += hasAffectingInstructions(Inst, ElseBB, TTI);
- }
-
- if (ThenScore == ElseScore)
- return;
-
- if (ThenScore < ElseScore)
- std::swap(ThenBB, ElseBB);
-
- // reorder the last two inserted elements in Order
- if (Idx >= 2 && Order[Idx - 1]->getEntry() == ElseBB &&
- Order[Idx - 2]->getEntry() == ThenBB) {
- std::swap(Order[Idx - 1], Order[Idx - 2]);
+ if (auto *Inst = dyn_cast<Instruction>(ElseVal)) {
+ if (isHoistableInstruction(Inst, ElseBB, TTI)) {
+ Inst->removeFromParent();
+ Inst->insertInto(CommonDominator, Term->getIterator());
+ HoistedValues[Inst] = CommonDominator;
+ }
+ }
}
}
@@ -539,7 +526,6 @@ void StructurizeCFG::orderNodes() {
// Add the SCC nodes to the Order array.
for (const auto &N : SCC) {
assert(I < E && "SCC size mismatch!");
- reorderIfElseBlock(N.first->getEntry(), I);
Order[I++] = N.first;
}
}
@@ -629,7 +615,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
BasicBlock *Other = Term->getSuccessor(!i);
if (Visited.count(Other) && !Loops.count(Other) &&
!Pred.count(Other) && !Pred.count(P)) {
-
+ HoistZeroCostElseBlockPhiValues(Succ, Other);
Pred[Other] = {BoolFalse, std::nullopt};
Pred[P] = {BoolTrue, std::nullopt};
continue;
@@ -985,6 +971,39 @@ void StructurizeCFG::setPhiValues() {
AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
}
+/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
+/// entries on Flow nodes with the appropriate hoisted values
+void StructurizeCFG::SimplifyHoistedPhis() {
+ for (WeakVH VH : AffectedPhis) {
+ if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
+ if (Phi->getNumIncomingValues() != 2)
+ continue;
+ for (int i = 0; i < 2; i++) {
+ Value *V = Phi->getIncomingValue(i);
+ if (HoistedValues.count(V)) {
+ Value *OtherV = Phi->getIncomingValue(!i);
+ if (PHINode *OtherPhi = dyn_cast<PHINode>(OtherV)) {
+ int PoisonValBBIdx = -1;
+ for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
+ if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
+ continue;
+ PoisonValBBIdx = i;
+ break;
+ }
+
+ if (PoisonValBBIdx == -1 ||
+ !DT->dominates(HoistedValues[V],
+ OtherPhi->getIncomingBlock(PoisonValBBIdx)))
+ continue;
+ OtherPhi->setIncomingValue(PoisonValBBIdx, V);
+ Phi->setIncomingValue(i, OtherV);
+ }
+ }
+ }
+ }
+ }
+}
+
void StructurizeCFG::simplifyAffectedPhis() {
bool Changed;
do {
@@ -1395,6 +1414,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
insertConditions(false);
insertConditions(true);
setPhiValues();
+ SimplifyHoistedPhis();
simplifyConditions();
simplifyAffectedPhis();
rebuildSSA();
diff --git a/llvm/test/CodeGen/AMDGPU/if-else.ll b/llvm/test/CodeGen/AMDGPU/if-else.ll
deleted file mode 100644
index 67907b120b362..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/if-else.ll
+++ /dev/null
@@ -1,166 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX900 %s
-
-define amdgpu_kernel void @test_extractelement_then_else(<4 x i32> %vec, i1 %cond, ptr %ptr) {
-; GFX900-LABEL: test_extractelement_then_else:
-; GFX900: ; %bb.0: ; %if
-; GFX900-NEXT: s_load_dword s6, s[4:5], 0x34
-; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: s_bitcmp0_b32 s6, 0
-; GFX900-NEXT: s_cbranch_scc0 .LBB0_2
-; GFX900-NEXT: ; %bb.1: ; %else
-; GFX900-NEXT: s_cbranch_execz .LBB0_3
-; GFX900-NEXT: s_branch .LBB0_4
-; GFX900-NEXT: .LBB0_2:
-; GFX900-NEXT: .LBB0_3: ; %then
-; GFX900-NEXT: s_add_i32 s1, s1, 1
-; GFX900-NEXT: .LBB0_4: ; %merge
-; GFX900-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x3c
-; GFX900-NEXT: v_mov_b32_e32 v2, s1
-; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v0, s2
-; GFX900-NEXT: v_mov_b32_e32 v1, s3
-; GFX900-NEXT: flat_store_dword v[0:1], v2
-; GFX900-NEXT: s_endpgm
-if:
- br i1 %cond, label %then, label %else
-
-then:
- %x = extractelement <4 x i32> %vec, i32 1
- %y = add i32 %x, 1
- %vec1 = insertelement <4 x i32> poison, i32 %y, i32 0
- %z = extractelement <4 x i32> %vec1, i32 0
- br label %merge
-
-else:
- %a = extractelement <4 x i32> %vec, i32 1
- br label %merge
-
-merge:
- %phi = phi i32 [ %z, %then ], [ %a, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-define amdgpu_kernel void @test_extractelement_else_then(<4 x i32> %vec, i1 %cond, ptr %ptr) {
-; GFX900-LABEL: test_extractelement_else_then:
-; GFX900: ; %bb.0: ; %if
-; GFX900-NEXT: s_load_dword s6, s[4:5], 0x34
-; GFX900-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: s_bitcmp0_b32 s6, 0
-; GFX900-NEXT: s_cbranch_scc1 .LBB1_2
-; GFX900-NEXT: ; %bb.1: ; %else
-; GFX900-NEXT: s_cbranch_execz .LBB1_3
-; GFX900-NEXT: s_branch .LBB1_4
-; GFX900-NEXT: .LBB1_2:
-; GFX900-NEXT: .LBB1_3: ; %then
-; GFX900-NEXT: s_add_i32 s1, s1, 1
-; GFX900-NEXT: .LBB1_4: ; %merge
-; GFX900-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x3c
-; GFX900-NEXT: v_mov_b32_e32 v2, s1
-; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v0, s2
-; GFX900-NEXT: v_mov_b32_e32 v1, s3
-; GFX900-NEXT: flat_store_dword v[0:1], v2
-; GFX900-NEXT: s_endpgm
-if:
- br i1 %cond, label %else, label %then
-
-else:
- %a = extractelement <4 x i32> %vec, i32 1
- br label %merge
-
-then:
- %x = extractelement <4 x i32> %vec, i32 1
- %y = add i32 %x, 1
- %vec1 = insertelement <4 x i32> poison, i32 %y, i32 0
- %z = extractelement <4 x i32> %vec1, i32 0
- br label %merge
-
-merge:
- %phi = phi i32 [ %z, %then ], [ %a, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-%pair = type { i32, i32 }
-
-define void @test_extractvalue_then_else(ptr %ptr, i1 %cond) {
-; GFX900-LABEL: test_extractvalue_then_else:
-; GFX900: ; %bb.0: ; %if
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_load_dword v3, v[0:1]
-; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; GFX900-NEXT: s_cbranch_execz .LBB2_2
-; GFX900-NEXT: ; %bb.1: ; %else
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
-; GFX900-NEXT: .LBB2_2: ; %merge
-; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_store_dword v[0:1], v3
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-if:
- %load_then = load %pair, ptr %ptr
- br i1 %cond, label %then, label %else
-
-then:
- %a_then = extractvalue %pair %load_then, 0
- br label %merge
-
-else:
- %a_else = extractvalue %pair %load_then, 0
- %sum_else = add i32 %a_else, 1
- br label %merge
-
-merge:
- %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-define void @test_extractvalue_else_then(ptr %ptr, i1 %cond) {
-; GFX900-LABEL: test_extractvalue_else_then:
-; GFX900: ; %bb.0: ; %if
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_load_dword v3, v[0:1]
-; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
-; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; GFX900-NEXT: s_cbranch_execz .LBB3_2
-; GFX900-NEXT: ; %bb.1: ; %else
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
-; GFX900-NEXT: .LBB3_2: ; %merge
-; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_store_dword v[0:1], v3
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-if:
- %load_then = load %pair, ptr %ptr
- br i1 %cond, label %else, label %then
-
-else:
- %a_else = extractvalue %pair %load_then, 0
- %sum_else = add i32 %a_else, 1
- br label %merge
-
-then:
- %a_then = extractvalue %pair %load_then, 0
- br label %merge
-
-merge:
- %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
index 9cc42ac448067..be020457ce87d 100644
--- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
+++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
@@ -9851,8 +9851,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s6
; CHECK-NEXT: s_cbranch_execz .LBB8_6
; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
-; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
+; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
; CHECK-NEXT: s_movk_i32 s4, 0xf800
; CHECK-NEXT: s_mov_b32 s5, -1
; CHECK-NEXT: .LBB8_5: ; %memmove_bwd_loop
@@ -11167,8 +11167,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; ALIGNED-NEXT: s_andn2_saveexec_b32 s6, s6
; ALIGNED-NEXT: s_cbranch_execz .LBB8_6
; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
-; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
+; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
; ALIGNED-NEXT: s_movk_i32 s4, 0xf800
; ALIGNED-NEXT: s_mov_b32 s5, -1
; ALIGNED-NEXT: .LBB8_5: ; %memmove_bwd_loop
@@ -12381,8 +12381,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2024
; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2020
; UNROLL3-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:2016
-; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0
; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1
+; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0
; UNROLL3-NEXT: s_waitcnt vmcnt(3)
; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2028
; UNROLL3-NEXT: s_waitcnt vmcnt(2)
diff --git a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
new file mode 100644
index 0000000000000..47dfd83b79edd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX900 %s
+
+
+%pair = type { i32, i32 }
+
+define void @test_extractvalue_then_else(ptr %ptr, i1 %cond) {
+; GFX900-LABEL: test_extractvalue_then_else:
+; GFX900: ; %bb.0: ; %if
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_load_dword v3, v[0:1]
+; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
+; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GFX900-NEXT: s_cbranch_execz .LBB0_2
+; GFX900-NEXT: ; %bb.1: ; %else
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
+; GFX900-NEXT: .LBB0_2: ; %Flow
+; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_store_dword v[0:1], v3
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+if:
+ %load_then = load %pair, ptr %ptr
+ br i1 %cond, label %then, label %else
+
+then:
+ %a_then = extractvalue %pair %load_then, 0
+ br label %merge
+
+else:
+ %a_else = extractvalue %pair %load_then, 0
+ %sum_else = add i32 %a_else, 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+define void @test_extractvalue_else_then(ptr %ptr, i1 %cond) {
+; GFX900-LABEL: test_extractvalue_else_then:
+; GFX900: ; %bb.0: ; %if
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_load_dword v3, v[0:1]
+; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
+; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX900-NEXT: s_cbranch_execz .LBB1_2
+; GFX900-NEXT: ; %bb.1: ; %else
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
+; GFX900-NEXT: .LBB1_2: ; %merge
+; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_store_dword v[0:1], v3
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+if:
+ %load_then = load %pair, ptr %ptr
+ br i1 %cond, label %else, label %then
+
+else:
+ %a_else = extractvalue %pair %load_then, 0
+ %sum_else = add i32 %a_else, 1
+ br label %merge
+
+then:
+ %a_then = extractvalue %pair %load_then, 0
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 {
+; GFX900-LABEL: test_loop_with_if:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: s_mov_b64 s[4:5], 0
+; GFX900-NEXT: s_movk_i32 s10, 0xfe
+; GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-NEXT: s_bitcmp1_b32 s2, 0
+; GFX900-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GFX900-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v2, s1
+; GFX900-NEXT: s_xor_b64 s[2:3], s[2:3], -1
+; GFX900-NEXT: v_mov_b32_e32 v1, s0
+; GFX900-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v3
+; GFX900-NEXT: s_branch .LBB2_2
+; GFX900-NEXT: .LBB2_1: ; %latch
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_u32_e32 v5, 20, v3
+; GFX900-NEXT: v_cmp_lt_i32_e32 vcc, s10, v5
+; GFX900-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX900-NEXT: flat_store_dword v[1:2], v3
+; GFX900-NEXT: s_andn2_b64 exec, exec, s[4:5]
+; GFX900-NEXT: s_cbranch_execz .LBB2_8
+; GFX900-NEXT: .LBB2_2: ; %loop
+; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX900-NEXT: flat_load_dwordx2 v[3:4], v[1:2]
+; GFX900-NEXT: s_and_b64 vcc, exec, s[0:1]
+; GFX900-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX900-NEXT: s_mov_b64 s[6:7], 0
+; GFX900-NEXT: s_cbranch_vccnz .LBB2_4
+; GFX900-NEXT: ; %bb.3: ; %if
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: v_cmp_gt_i32_e32 vcc, 11, v5
+; GFX900-NEXT: s_andn2_b64 s[8:9], s[2:3], exec
+; GFX900-NEXT: s_and_b64 s[12:13], vcc, exec
+; GFX900-NEXT: s_mov_b64 s[6:7], -1
+; GFX900-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
+; GFX900-NEXT: .LBB2_4: ; %Flow
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: s_and_saveexec_b64 s[12:13], s[8:9]
+; GFX900-NEXT: s_xor_b64 s[8:9], exec, s[12:13]
+; GFX900-NEXT: s_cbranch_execz .LBB2_6
+; GFX900-NEXT: ; %bb.5: ; %else
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_u32_e32 v3, v3, v4
+; GFX900-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
+; GFX900-NEXT: .LBB2_6: ; %Flow1
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
+; GFX900-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
+; GFX900-NEXT: s_cbranch_execz .LBB2_1
+; GFX900-NEXT: ; %bb.7: ; %then
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: flat_store_dword v[1:2], v0
+; GFX900-NEXT: s_branch .LBB2_1
+; GFX900-NEXT: .LBB2_8: ; %end
+; GFX900-NEXT: s_endpgm
+entry:
+ %a = tail call i32 @llvm.amdgcn.workitem.id.x()
+ br label %loop
+
+loop:
+ %entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ]
+ %load = load %pair, ptr %ptr
+ br i1 %cond, label %if, label %else
+
+if:
+ %cmp = icmp sgt i32 %entry_phi, 10
+ br i1 %cmp, label %then, label %else
+
+then:
+ %a_then = extractvalue %pair %load, 0
+ store i32 %a, ptr %ptr, align 4
+ br label %latch
+
+else:
+ %a2 = extractvalue %pair %load, 1
+ %y = extractvalue %pair %load, 0
+ %a_else = add i32 %y, %a2
+ br label %latch
+
+latch:
+ %a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ]
+ store i32 %a_test, ptr %ptr
+ %a15 = add nsw i32 %a_test, 20
+ %a16 = icmp slt i32 %a15, 255
+ br i1 %a16, label %loop, label %end
+
+end:
+ ret void
+}
diff --git a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll b/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll
new file mode 100644
index 0000000000000..b2711af5bb828
--- /dev/null
+++ b/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s
+
+
+
+%pair = type { i32, i32 }
+define void @test_if_then_else(ptr %ptr, i1 %cond) {
+; CHECK-LABEL: define void @test_if_then_else(
+; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
+; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
+; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[SUM_ELSE:%.*]], %[[ELSE]] ], [ [[A_THEN]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: br label %[[MERGE]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
+; CHECK-NEXT: [[SUM_ELSE]] = add i32 [[A_ELSE]], 1
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[MERGE]]:
+; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %load_then = load %pair, ptr %ptr
+ br i1 %cond, label %then, label %else
+
+then:
+ %a_then = extractvalue %pair %load_then, 0
+ br label %merge
+
+else:
+ %a_else = extractvalue %pair %load_then, 0
+ %sum_else = add i32 %a_else, 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+define void @test_if_else_then(ptr %ptr, i1 %cond) {
+; CHECK-LABEL: define void @test_if_else_then(
+; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
+; CHECK-NEXT: br i1 [[COND_INV]], label %[[THEN:.*]], label %[[FLOW:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A_THEN]], %[[THEN]] ], [ poison, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[THEN]] ], [ true, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[ELSE:.*]], label %[[MERGE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
+; CHECK-NEXT: [[SUM_ELSE:%.*]] = add i32 [[A_ELSE]], 1
+; CHECK-NEXT: br label %[[MERGE]]
+; CHECK: [[MERGE]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[SUM_ELSE]], %[[ELSE]] ]
+; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %load_then = load %pair, ptr %ptr
+ br i1 %cond, label %else, label %then
+
+then:
+ %a_then = extractvalue %pair %load_then, 0
+ br label %merge
+
+else:
+ %a_else = extractvalue %pair %load_then, 0
+ %sum_else = add i32 %a_else, 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @test_loop_with_if(
+; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I15:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT: [[LOAD:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
+; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD]], 0
+; CHECK-NEXT: br i1 [[COND]], label %[[IF:.*]], label %[[FLOW:.*]]
+; CHECK: [[IF]]:
+; CHECK-NEXT: [[I9:%.*]] = icmp sle i32 [[I3]], 10
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[FLOW1:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[Y:%.*]], %[[ELSE:.*]] ], [ [[A_THEN]], %[[FLOW]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ [[TMP2:%.*]], %[[FLOW]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[LATCH]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: store i32 [[I]], ptr [[PTR]], align 4
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: [[TMP2]] = phi i1 [ true, %[[IF]] ], [ false, %[[LOOP]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[I9]], %[[IF]] ], [ [[COND_INV]], %[[LOOP]] ]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[ELSE]], label %[[FLOW1]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[I2:%.*]] = extractvalue [[PAIR]] [[LOAD]], 1
+; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD]], 0
+; CHECK-NEXT: [[Y]] = add i32 [[A_ELSE]], [[I2]]
+; CHECK-NEXT: br label %[[FLOW1]]
+; CHECK: [[LATCH]]:
+; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4
+; CHECK-NEXT: [[I15]] = add nsw i32 [[TMP0]], 20
+; CHECK-NEXT: [[I16:%.*]] = icmp sge i32 [[I15]], 255
+; CHECK-NEXT: br i1 [[I16]], label %[[END:.*]], label %[[LOOP]]
+; CHECK: [[END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = tail call i32 @llvm.amdgcn.workitem.id.x()
+ br label %loop
+
+loop:
+ %entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ]
+ %load = load %pair, ptr %ptr
+ br i1 %cond, label %if, label %else
+
+if:
+ %cmp = icmp sgt i32 %entry_phi, 10
+ br i1 %cmp, label %then, label %else
+
+then:
+ %a_then = extractvalue %pair %load, 0
+ store i32 %a, ptr %ptr, align 4
+ br label %latch
+
+else:
+ %a2 = extractvalue %pair %load, 1
+ %y = extractvalue %pair %load, 0
+ %a_else = add i32 %y, %a2
+ br label %latch
+
+latch:
+ %a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ]
+ store i32 %a_test, ptr %ptr
+ %a15 = add nsw i32 %a_test, 20
+ %a16 = icmp slt i32 %a15, 255
+ br i1 %a16, label %loop, label %end
+
+end:
+ ret void
+}
diff --git a/llvm/test/Transforms/StructurizeCFG/order-if-else.ll b/llvm/test/Transforms/StructurizeCFG/order-if-else.ll
deleted file mode 100644
index cfcf8e2e24e37..0000000000000
--- a/llvm/test/Transforms/StructurizeCFG/order-if-else.ll
+++ /dev/null
@@ -1,129 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
-; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s
-
-define void @test_extractelement_1(<4 x i32> %vec, i1 %cond, ptr %ptr) {
-; CHECK-LABEL: define void @test_extractelement_1(
-; CHECK-SAME: <4 x i32> [[VEC:%.*]], i1 [[COND:%.*]], ptr [[PTR:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
-; CHECK: [[FLOW]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A:%.*]], %[[ELSE]] ], [ poison, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
-; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
-; CHECK: [[THEN]]:
-; CHECK-NEXT: [[X:%.*]] = extractelement <4 x i32> [[VEC]], i32 0
-; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], 1
-; CHECK-NEXT: br label %[[MERGE]]
-; CHECK: [[ELSE]]:
-; CHECK-NEXT: [[A]] = extractelement <4 x i32> [[VEC]], i32 1
-; CHECK-NEXT: br label %[[FLOW]]
-; CHECK: [[MERGE]]:
-; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[Z]], %[[THEN]] ]
-; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
-; CHECK-NEXT: ret void
-;
-entry:
- br i1 %cond, label %then, label %else
-
-then:
- %x = extractelement <4 x i32> %vec, i32 0
- %z = add i32 %x, 1
- br label %merge
-
-else:
- %a = extractelement <4 x i32> %vec, i32 1
- br label %merge
-
-merge:
- %phi = phi i32 [ %z, %then ], [ %a, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-define void @test_extractelement_2(<4 x i32> %vec, i1 %cond, ptr %ptr) {
-; CHECK-LABEL: define void @test_extractelement_2(
-; CHECK-SAME: <4 x i32> [[VEC:%.*]], i1 [[COND:%.*]], ptr [[PTR:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
-; CHECK: [[FLOW]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A:%.*]], %[[ELSE]] ], [ poison, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
-; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
-; CHECK: [[THEN]]:
-; CHECK-NEXT: [[X:%.*]] = extractelement <4 x i32> [[VEC]], i32 1
-; CHECK-NEXT: [[Y:%.*]] = add i32 [[X]], 1
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
-; CHECK-NEXT: [[Z:%.*]] = extractelement <4 x i32> [[VEC1]], i32 0
-; CHECK-NEXT: br label %[[MERGE]]
-; CHECK: [[ELSE]]:
-; CHECK-NEXT: [[A]] = extractelement <4 x i32> [[VEC]], i32 1
-; CHECK-NEXT: br label %[[FLOW]]
-; CHECK: [[MERGE]]:
-; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[Z]], %[[THEN]] ]
-; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
-; CHECK-NEXT: ret void
-;
-entry:
- br i1 %cond, label %then, label %else
-
-then:
- %x = extractelement <4 x i32> %vec, i32 1
- %y = add i32 %x, 1
- %vec1 = insertelement <4 x i32> poison, i32 %y, i32 0
- %z = extractelement <4 x i32> %vec1, i32 0
- br label %merge
-
-else:
- %a = extractelement <4 x i32> %vec, i32 1
- br label %merge
-
-merge:
- %phi = phi i32 [ %z, %then ], [ %a, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-%pair = type { i32, i32 }
-define void @test_extractvalue(ptr %ptr, i1 %cond) {
-; CHECK-LABEL: define void @test_extractvalue(
-; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
-; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[FLOW:.*]]
-; CHECK: [[THEN]]:
-; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
-; CHECK-NEXT: br label %[[FLOW]]
-; CHECK: [[FLOW]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A_THEN]], %[[THEN]] ], [ poison, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[THEN]] ], [ true, %[[ENTRY]] ]
-; CHECK-NEXT: br i1 [[TMP1]], label %[[ELSE:.*]], label %[[MERGE:.*]]
-; CHECK: [[ELSE]]:
-; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
-; CHECK-NEXT: [[SUM_ELSE:%.*]] = add i32 [[A_ELSE]], 1
-; CHECK-NEXT: br label %[[MERGE]]
-; CHECK: [[MERGE]]:
-; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[SUM_ELSE]], %[[ELSE]] ]
-; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
-; CHECK-NEXT: ret void
-;
-entry:
- %load_then = load %pair, ptr %ptr
- br i1 %cond, label %then, label %else
-
-then:
- %a_then = extractvalue %pair %load_then, 0
- br label %merge
-
-else:
- %a_else = extractvalue %pair %load_then, 0
- %sum_else = add i32 %a_else, 1
- br label %merge
-
-merge:
- %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
>From cf34c41716ddfbfcc25ad8b756f7c18078fa23e4 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 23 Jun 2025 23:13:06 -0500
Subject: [PATCH 7/8] minor review
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 205da5d78d533..a5d5d9ef02c53 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -307,9 +307,7 @@ class StructurizeCFG {
RegionNode *PrevNode;
- void reorderIfElseBlock(BasicBlock *BB, unsigned Idx);
-
- void HoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
+ void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
void orderNodes();
@@ -340,7 +338,7 @@ class StructurizeCFG {
void simplifyAffectedPhis();
- void SimplifyHoistedPhis();
+ void simplifyHoistedPhis();
DebugLoc killTerminator(BasicBlock *BB);
@@ -472,7 +470,7 @@ INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
/// value to poison phi nodes along the if→flow edge, aiding register coalescing
/// and minimizing unnecessary live ranges.
-void StructurizeCFG::HoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
+void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
BasicBlock *ThenBB) {
BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
@@ -615,7 +613,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
BasicBlock *Other = Term->getSuccessor(!i);
if (Visited.count(Other) && !Loops.count(Other) &&
!Pred.count(Other) && !Pred.count(P)) {
- HoistZeroCostElseBlockPhiValues(Succ, Other);
+ hoistZeroCostElseBlockPhiValues(Succ, Other);
Pred[Other] = {BoolFalse, std::nullopt};
Pred[P] = {BoolTrue, std::nullopt};
continue;
@@ -973,7 +971,7 @@ void StructurizeCFG::setPhiValues() {
/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
/// entries on Flow nodes with the appropriate hoisted values
-void StructurizeCFG::SimplifyHoistedPhis() {
+void StructurizeCFG::simplifyHoistedPhis() {
for (WeakVH VH : AffectedPhis) {
if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
if (Phi->getNumIncomingValues() != 2)
@@ -1414,7 +1412,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
insertConditions(false);
insertConditions(true);
setPhiValues();
- SimplifyHoistedPhis();
+ simplifyHoistedPhis();
simplifyConditions();
simplifyAffectedPhis();
rebuildSSA();
>From 65b72ae610607920c625aa98abb2971fad20001a Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Tue, 24 Jun 2025 00:10:48 -0500
Subject: [PATCH 8/8] review comments
---
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 60 ++++++++++---------
1 file changed, 31 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index a5d5d9ef02c53..e5bdd1402c006 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -481,13 +481,12 @@ void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
Instruction *Term = CommonDominator->getTerminator();
for (PHINode &Phi : ElseSucc->phis()) {
Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
- if (auto *Inst = dyn_cast<Instruction>(ElseVal)) {
- if (isHoistableInstruction(Inst, ElseBB, TTI)) {
- Inst->removeFromParent();
- Inst->insertInto(CommonDominator, Term->getIterator());
- HoistedValues[Inst] = CommonDominator;
- }
- }
+ auto *Inst = dyn_cast<Instruction>(ElseVal);
+ if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI))
+ continue;
+ Inst->removeFromParent();
+ Inst->insertInto(CommonDominator, Term->getIterator());
+ HoistedValues[Inst] = CommonDominator;
}
}
@@ -973,31 +972,34 @@ void StructurizeCFG::setPhiValues() {
/// entries on Flow nodes with the appropriate hoisted values
void StructurizeCFG::simplifyHoistedPhis() {
for (WeakVH VH : AffectedPhis) {
- if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
- if (Phi->getNumIncomingValues() != 2)
+ PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
+ if (!Phi || Phi->getNumIncomingValues() != 2)
+ continue;
+
+ for (int i = 0; i < 2; i++) {
+ Value *V = Phi->getIncomingValue(i);
+ if (!HoistedValues.count(V))
continue;
- for (int i = 0; i < 2; i++) {
- Value *V = Phi->getIncomingValue(i);
- if (HoistedValues.count(V)) {
- Value *OtherV = Phi->getIncomingValue(!i);
- if (PHINode *OtherPhi = dyn_cast<PHINode>(OtherV)) {
- int PoisonValBBIdx = -1;
- for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
- if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
- continue;
- PoisonValBBIdx = i;
- break;
- }
- if (PoisonValBBIdx == -1 ||
- !DT->dominates(HoistedValues[V],
- OtherPhi->getIncomingBlock(PoisonValBBIdx)))
- continue;
- OtherPhi->setIncomingValue(PoisonValBBIdx, V);
- Phi->setIncomingValue(i, OtherV);
- }
- }
+ Value *OtherV = Phi->getIncomingValue(!i);
+ PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
+ if (!OtherPhi)
+ continue;
+
+ int PoisonValBBIdx = -1;
+ for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
+ if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
+ continue;
+ PoisonValBBIdx = i;
+ break;
}
+ if (PoisonValBBIdx == -1 ||
+ !DT->dominates(HoistedValues[V],
+ OtherPhi->getIncomingBlock(PoisonValBBIdx)))
+ continue;
+
+ OtherPhi->setIncomingValue(PoisonValBBIdx, V);
+ Phi->setIncomingValue(i, OtherV);
}
}
}
More information about the llvm-commits
mailing list