[llvm] [AMDGPU] Do not use original PHIs in coercion chains (PR #98063)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 8 13:53:49 PDT 2024
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/98063
>From e2a0067a432a3f0505eec39d0bb6599c4d261487 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 8 Jul 2024 10:45:20 -0700
Subject: [PATCH 1/3] [AMDGPU] Do not use original PHIs in coercion chains
Change-Id: Ib15b2716d69c796eefe6683bd5f0a6ba0b94f6a2
---
.../AMDGPU/AMDGPULateCodeGenPrepare.cpp | 2 +-
.../test/CodeGen/AMDGPU/vni8-across-blocks.ll | 83 +++++++++++++++++++
2 files changed, 84 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 2cc95f81d2f94..88811a24e2fcd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -369,7 +369,7 @@ bool LiveRegOptimizer::optimizeLiveType(
if (MissingIncVal) {
DeadInst = cast<Instruction>(ValMap[Phi]);
// Do not use the dead phi
- ValMap[Phi] = Phi;
+ ValMap.erase(Phi);
}
DeadInsts.emplace_back(DeadInst);
}
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
index 441f00faf329e..3202926e94074 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
@@ -866,5 +866,88 @@ bb.3:
ret void
}
+; This should not cause Assertion `getType() == V->getType() && "All operands to PHI node must be the same type as the PHI node
+; Note: whether or not the assertion fires depends on the iteration ortder of PhiNodes in AMDGPULateCodeGenPrepare, which
+; is non-deterministic due to iterators over a set of pointers.
+
+define amdgpu_kernel void @MissingInc_PhiChain(i1 %cmp1.i.i.i.i.i.not, <16 x i8> %promotealloca31.i.i.i.i) {
+; GFX906-LABEL: MissingInc_PhiChain:
+; GFX906: ; %bb.0: ; %entry
+; GFX906-NEXT: s_load_dword s2, s[0:1], 0x24
+; GFX906-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX906-NEXT: s_mov_b32 s10, 1
+; GFX906-NEXT: v_mov_b32_e32 v4, 1
+; GFX906-NEXT: s_mov_b32 s11, 1
+; GFX906-NEXT: s_waitcnt lgkmcnt(0)
+; GFX906-NEXT: s_bitcmp1_b32 s2, 0
+; GFX906-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GFX906-NEXT: s_xor_b64 s[0:1], s[2:3], -1
+; GFX906-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; GFX906-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0
+; GFX906-NEXT: s_branch .LBB14_2
+; GFX906-NEXT: .LBB14_1: ; %if.end.1.i.i.i.i.i
+; GFX906-NEXT: ; in Loop: Header=BB14_2 Depth=1
+; GFX906-NEXT: v_lshrrev_b32_e32 v4, 8, v0
+; GFX906-NEXT: s_mov_b32 s10, 0
+; GFX906-NEXT: s_mov_b32 s11, 0
+; GFX906-NEXT: .LBB14_2: ; %for.body10.i.i.i.i
+; GFX906-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX906-NEXT: s_and_b64 vcc, exec, s[0:1]
+; GFX906-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX906-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+; GFX906-NEXT: s_cbranch_vccnz .LBB14_4
+; GFX906-NEXT: ; %bb.3: ; %if.then.i.i.i.i.i
+; GFX906-NEXT: ; in Loop: Header=BB14_2 Depth=1
+; GFX906-NEXT: v_lshlrev_b16_e64 v0, 8, s11
+; GFX906-NEXT: v_or_b32_sdwa v0, s10, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX906-NEXT: v_lshlrev_b16_e32 v1, 8, v4
+; GFX906-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX906-NEXT: s_mov_b64 s[8:9], -1
+; GFX906-NEXT: .LBB14_4: ; %Flow
+; GFX906-NEXT: ; in Loop: Header=BB14_2 Depth=1
+; GFX906-NEXT: s_andn2_b64 vcc, exec, s[8:9]
+; GFX906-NEXT: s_cbranch_vccnz .LBB14_7
+; GFX906-NEXT: ; %bb.5: ; %if.end.i.i.i.i.i
+; GFX906-NEXT: ; in Loop: Header=BB14_2 Depth=1
+; GFX906-NEXT: s_and_b64 vcc, exec, s[0:1]
+; GFX906-NEXT: s_cbranch_vccnz .LBB14_1
+; GFX906-NEXT: ; %bb.6: ; %if.then.1.i.i.i.i.i
+; GFX906-NEXT: ; in Loop: Header=BB14_2 Depth=1
+; GFX906-NEXT: v_mov_b32_e32 v0, s4
+; GFX906-NEXT: v_mov_b32_e32 v1, s5
+; GFX906-NEXT: v_mov_b32_e32 v2, s6
+; GFX906-NEXT: v_mov_b32_e32 v3, s7
+; GFX906-NEXT: s_branch .LBB14_1
+; GFX906-NEXT: .LBB14_7: ; in Loop: Header=BB14_2 Depth=1
+; GFX906-NEXT: ; implicit-def: $vgpr4
+; GFX906-NEXT: ; implicit-def: $sgpr10
+; GFX906-NEXT: ; implicit-def: $sgpr11
+; GFX906-NEXT: s_cbranch_execz .LBB14_2
+; GFX906-NEXT: ; %bb.8: ; %DummyReturnBlock
+; GFX906-NEXT: s_endpgm
+entry:
+ br label %for.body10.i.i.i.i
+
+for.body10.i.i.i.i: ; preds = %if.end.1.i.i.i.i.i, %entry
+ %promotealloca3237.i.i.i.i = phi <16 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %entry ], [ %1, %if.end.1.i.i.i.i.i ]
+ br i1 %cmp1.i.i.i.i.i.not, label %if.end.i.i.i.i.i, label %if.then.i.i.i.i.i
+
+if.then.i.i.i.i.i: ; preds = %for.body10.i.i.i.i
+ %0 = insertelement <16 x i8> %promotealloca3237.i.i.i.i, i8 0, i64 0
+ br label %if.end.i.i.i.i.i
+
+if.end.i.i.i.i.i: ; preds = %if.then.i.i.i.i.i, %for.body10.i.i.i.i
+ %promotealloca31.i.i.i.i3 = phi <16 x i8> [ %0, %if.then.i.i.i.i.i ], [ %promotealloca3237.i.i.i.i, %for.body10.i.i.i.i ]
+ br i1 %cmp1.i.i.i.i.i.not, label %if.end.1.i.i.i.i.i, label %if.then.1.i.i.i.i.i
+
+if.then.1.i.i.i.i.i: ; preds = %if.end.i.i.i.i.i
+ br label %if.end.1.i.i.i.i.i
+
+if.end.1.i.i.i.i.i: ; preds = %if.then.1.i.i.i.i.i, %if.end.i.i.i.i.i
+ %promotealloca30.i.i.i.i = phi <16 x i8> [ %promotealloca31.i.i.i.i, %if.then.1.i.i.i.i.i ], [ %promotealloca31.i.i.i.i3, %if.end.i.i.i.i.i ]
+ %1 = shufflevector <16 x i8> %promotealloca30.i.i.i.i, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ br label %for.body10.i.i.i.i
+}
+
declare i32 @llvm.amdgcn.workitem.id.x()
>From 5a75f4d1e4f26c603bf496c178724028d41c414a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 8 Jul 2024 13:23:04 -0700
Subject: [PATCH 2/3] Propagate missing inc val to PHINode users of PHINodes
Change-Id: I29c485cb30bbc51324d6701fc77697936f324a96
---
.../AMDGPU/AMDGPULateCodeGenPrepare.cpp | 22 ++++++++++++++-----
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 88811a24e2fcd..13bd128688b5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -365,13 +365,23 @@ bool LiveRegOptimizer::optimizeLiveType(
else
MissingIncVal = true;
}
- Instruction *DeadInst = Phi;
if (MissingIncVal) {
- DeadInst = cast<Instruction>(ValMap[Phi]);
- // Do not use the dead phi
- ValMap.erase(Phi);
- }
- DeadInsts.emplace_back(DeadInst);
+ Value *DeadVal = ValMap[Phi];
+ // The coercion chain of the PHI is broken. Delete the Phi
+ // from the ValMap and any connected / user Phis.
+ SmallVector<Value *, 4> PHIWorklist;
+ PHIWorklist.push_back(DeadVal);
+ while (!PHIWorklist.empty()) {
+ Value *NextDeadValue = PHIWorklist.pop_back_val();
+ for (User *U : cast<Instruction>(NextDeadValue)->users()) {
+ assert(isa<PHINode>(U));
+ PHIWorklist.push_back(U);
+ }
+ ValMap.erase(NextDeadValue);
+ DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
+ }
+ } else
+ DeadInsts.emplace_back(cast<Instruction>(Phi));
}
// Coerce back to the original type and replace the uses.
for (Instruction *U : Uses) {
>From 8870669dd1397b8c204d0486ea0b44767c2f5961 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 8 Jul 2024 13:50:59 -0700
Subject: [PATCH 3/3] Prevent infinite loops
Change-Id: Ie1f906462f094d8ffa3ac8d949dd01fadbdeab7b
---
llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 13bd128688b5c..6c9264cea89a7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -365,6 +365,7 @@ bool LiveRegOptimizer::optimizeLiveType(
else
MissingIncVal = true;
}
+
if (MissingIncVal) {
Value *DeadVal = ValMap[Phi];
// The coercion chain of the PHI is broken. Delete the Phi
@@ -373,12 +374,14 @@ bool LiveRegOptimizer::optimizeLiveType(
PHIWorklist.push_back(DeadVal);
while (!PHIWorklist.empty()) {
Value *NextDeadValue = PHIWorklist.pop_back_val();
+ ValMap.erase(NextDeadValue);
+ DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
+
for (User *U : cast<Instruction>(NextDeadValue)->users()) {
assert(isa<PHINode>(U));
- PHIWorklist.push_back(U);
+ if (ValMap.contains(cast<Instruction>(U)))
+ PHIWorklist.push_back(U);
}
- ValMap.erase(NextDeadValue);
- DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
}
} else
DeadInsts.emplace_back(cast<Instruction>(Phi));
More information about the llvm-commits
mailing list