[llvm] [AMDGPU] Reland: Do not use original PHIs in coercion chains (PR #98985)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 15 20:12:22 PDT 2024
https://github.com/jrbyrnes created https://github.com/llvm/llvm-project/pull/98985
Reland https://github.com/llvm/llvm-project/commit/1612e4a3510982692f22e3f8190fc7c977185cbe
The original version didn't correctly remove the broken PHI chain from the ValMap, so it would still be used in some cases.
During this, I spotted another issue where users try to use the type coerced version of the value even though the original value was defined in the same block. I've also addressed that in this reland.
>From 57eb97caa3dabc824e630296beb8fd8b6c6de2dd Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 15 Jul 2024 19:46:07 -0700
Subject: [PATCH] [AMDGPU] Reland: Do not use original PHIs in coercion chains
Change-Id: I579b5c69a85997f168ed35354b326524b6f84ef7
---
.../AMDGPU/AMDGPULateCodeGenPrepare.cpp | 46 ++++++++---
llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll | 76 +++++++++++++++++++
2 files changed, 113 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 2cc95f81d2f94..eade79dd3bfeb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -365,13 +365,33 @@ bool LiveRegOptimizer::optimizeLiveType(
else
MissingIncVal = true;
}
- Instruction *DeadInst = Phi;
if (MissingIncVal) {
- DeadInst = cast<Instruction>(ValMap[Phi]);
- // Do not use the dead phi
- ValMap[Phi] = Phi;
+ Value *DeadVal = ValMap[Phi];
+ // The coercion chain of the PHI is broken. Delete the Phi
+ // from the ValMap and any connected / user Phis.
+ SmallVector<Value *, 4> PHIWorklist;
+ SmallPtrSet<Value *, 4> Visited;
+ PHIWorklist.push_back(DeadVal);
+ while (!PHIWorklist.empty()) {
+ Value *NextDeadValue = PHIWorklist.pop_back_val();
+ Visited.insert(NextDeadValue);
+ auto OriginalPhi =
+ std::find_if(PhiNodes.begin(), PhiNodes.end(),
+ [this, &NextDeadValue](PHINode *CandPhi) {
+ return ValMap[CandPhi] == NextDeadValue;
+ });
+ assert(OriginalPhi != PhiNodes.end());
+ ValMap.erase(*OriginalPhi);
+ DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
+
+ for (User *U : NextDeadValue->users()) {
+ if (!Visited.contains(cast<PHINode>(U)))
+ PHIWorklist.push_back(U);
+ }
+ }
+ } else {
+ DeadInsts.emplace_back(cast<Instruction>(Phi));
}
- DeadInsts.emplace_back(DeadInst);
}
// Coerce back to the original type and replace the uses.
for (Instruction *U : Uses) {
@@ -384,10 +404,18 @@ bool LiveRegOptimizer::optimizeLiveType(
NewVal = BBUseValMap[U->getParent()][ValMap[Op]];
else {
BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
- NewVal =
- convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),
- InsertPt, U->getParent());
- BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
+ // We may pick up ops that were previously converted for users in
+ // other blocks. If there is a originally typed definition of the Op
+ // already in this block, simply reuse it.
+ if (isa<Instruction>(Op) && !isa<PHINode>(Op) &&
+ U->getParent() == cast<Instruction>(Op)->getParent()) {
+ NewVal = Op;
+ } else {
+ NewVal =
+ convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),
+ InsertPt, U->getParent());
+ BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
+ }
}
assert(NewVal);
U->setOperand(OpIdx, NewVal);
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll b/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll
index 5d2e299aa854a..8d27e09562651 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll
@@ -349,4 +349,80 @@ bb.2:
ret void
}
+; Should not produce a broken phi
+
+define void @broken_phi() {
+; GFX906-LABEL: define void @broken_phi(
+; GFX906-SAME: ) #[[ATTR0]] {
+; GFX906-NEXT: bb:
+; GFX906-NEXT: br label [[BB1:%.*]]
+; GFX906: bb1:
+; GFX906-NEXT: [[I:%.*]] = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ]
+; GFX906-NEXT: br i1 false, label [[BB3:%.*]], label [[BB2:%.*]]
+; GFX906: bb2:
+; GFX906-NEXT: br label [[BB3]]
+; GFX906: bb3:
+; GFX906-NEXT: [[I4:%.*]] = phi <4 x i8> [ zeroinitializer, [[BB2]] ], [ [[I]], [[BB1]] ]
+; GFX906-NEXT: br i1 false, label [[BB7]], label [[BB5:%.*]]
+; GFX906: bb5:
+; GFX906-NEXT: [[I6:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[I4]], <4 x i8> zeroinitializer)
+; GFX906-NEXT: br label [[BB7]]
+; GFX906: bb7:
+; GFX906-NEXT: [[I8]] = phi <4 x i8> [ zeroinitializer, [[BB5]] ], [ zeroinitializer, [[BB3]] ]
+; GFX906-NEXT: br label [[BB1]]
+;
+bb:
+ br label %bb1
+bb1:
+ %i = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, %bb ], [ %i8, %bb7 ]
+ br i1 false, label %bb3, label %bb2
+bb2:
+ br label %bb3
+bb3:
+ %i4 = phi <4 x i8> [ zeroinitializer, %bb2 ], [ %i, %bb1 ]
+ br i1 false, label %bb7, label %bb5
+bb5:
+ %i6 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %i4, <4 x i8> zeroinitializer)
+ br label %bb7
+bb7:
+ %i8 = phi <4 x i8> [ zeroinitializer, %bb5 ], [ zeroinitializer, %bb3 ]
+ br label %bb1
+}
+
+; %sel1 should just use %sel0 instead of trying to convert back the
+; converted version of %sel0
+
+define amdgpu_kernel void @reuseOp() {
+; GFX906-LABEL: define amdgpu_kernel void @reuseOp(
+; GFX906-SAME: ) #[[ATTR0]] {
+; GFX906-NEXT: entry:
+; GFX906-NEXT: [[VEC1:%.*]] = insertelement <16 x i8> zeroinitializer, i8 0, i64 0
+; GFX906-NEXT: [[VEC1_BC:%.*]] = bitcast <16 x i8> [[VEC1]] to <4 x i32>
+; GFX906-NEXT: br label [[BB_1:%.*]]
+; GFX906: bb.1:
+; GFX906-NEXT: [[VEC1_BC_BC:%.*]] = bitcast <4 x i32> [[VEC1_BC]] to <16 x i8>
+; GFX906-NEXT: [[SEL0:%.*]] = select i1 false, <16 x i8> zeroinitializer, <16 x i8> zeroinitializer
+; GFX906-NEXT: [[SEL0_BC:%.*]] = bitcast <16 x i8> [[SEL0]] to <4 x i32>
+; GFX906-NEXT: [[SEL1:%.*]] = select i1 false, <16 x i8> [[VEC1_BC_BC]], <16 x i8> [[SEL0]]
+; GFX906-NEXT: br label [[BB_2:%.*]]
+; GFX906: bb.2:
+; GFX906-NEXT: [[SEL0_BC_BC:%.*]] = bitcast <4 x i32> [[SEL0_BC]] to <16 x i8>
+; GFX906-NEXT: [[VAL:%.*]] = extractelement <16 x i8> [[SEL0_BC_BC]], i64 0
+; GFX906-NEXT: ret void
+;
+entry:
+ %vec1 = insertelement <16 x i8> zeroinitializer, i8 0, i64 0
+ br label %bb.1
+
+bb.1:
+ %sel0 = select i1 false, <16 x i8> zeroinitializer, <16 x i8> zeroinitializer
+ %sel1 = select i1 false, <16 x i8> %vec1, <16 x i8> %sel0
+ br label %bb.2
+
+bb.2:
+ %val = extractelement <16 x i8> %sel0, i64 0
+ ret void
+}
+
+
declare i32 @llvm.amdgcn.workitem.id.x()
More information about the llvm-commits
mailing list