[llvm] 6e68b75 - [AMDGPU] Reland: Do not use original PHIs in coercion chains

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 19 09:03:07 PDT 2024


Author: Jeffrey Byrnes
Date: 2024-07-19T09:02:28-07:00
New Revision: 6e68b75e6634e9809e2af5d68c87a4eb4282217e

URL: https://github.com/llvm/llvm-project/commit/6e68b75e6634e9809e2af5d68c87a4eb4282217e
DIFF: https://github.com/llvm/llvm-project/commit/6e68b75e6634e9809e2af5d68c87a4eb4282217e.diff

LOG: [AMDGPU] Reland: Do not use original PHIs in coercion chains

Change-Id: I579b5c69a85997f168ed35354b326524b6f84ef7

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
    llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 2cc95f81d2f94..fe7731b9550bf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -365,29 +365,60 @@ bool LiveRegOptimizer::optimizeLiveType(
       else
         MissingIncVal = true;
     }
-    Instruction *DeadInst = Phi;
     if (MissingIncVal) {
-      DeadInst = cast<Instruction>(ValMap[Phi]);
-      // Do not use the dead phi
-      ValMap[Phi] = Phi;
+      Value *DeadVal = ValMap[Phi];
+      // The coercion chain of the PHI is broken. Delete the Phi
+      // from the ValMap and any connected / user Phis.
+      SmallVector<Value *, 4> PHIWorklist;
+      SmallPtrSet<Value *, 4> Visited;
+      PHIWorklist.push_back(DeadVal);
+      while (!PHIWorklist.empty()) {
+        Value *NextDeadValue = PHIWorklist.pop_back_val();
+        Visited.insert(NextDeadValue);
+        auto OriginalPhi =
+            std::find_if(PhiNodes.begin(), PhiNodes.end(),
+                         [this, &NextDeadValue](PHINode *CandPhi) {
+                           return ValMap[CandPhi] == NextDeadValue;
+                         });
+        // This PHI may have already been removed from maps when
+        // unwinding a previous Phi
+        if (OriginalPhi != PhiNodes.end())
+          ValMap.erase(*OriginalPhi);
+
+        DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
+
+        for (User *U : NextDeadValue->users()) {
+          if (!Visited.contains(cast<PHINode>(U)))
+            PHIWorklist.push_back(U);
+        }
+      }
+    } else {
+      DeadInsts.emplace_back(cast<Instruction>(Phi));
     }
-    DeadInsts.emplace_back(DeadInst);
   }
   // Coerce back to the original type and replace the uses.
   for (Instruction *U : Uses) {
     // Replace all converted operands for a use.
     for (auto [OpIdx, Op] : enumerate(U->operands())) {
-      if (ValMap.contains(Op)) {
+      if (ValMap.contains(Op) && ValMap[Op]) {
         Value *NewVal = nullptr;
         if (BBUseValMap.contains(U->getParent()) &&
             BBUseValMap[U->getParent()].contains(ValMap[Op]))
           NewVal = BBUseValMap[U->getParent()][ValMap[Op]];
         else {
           BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
-          NewVal =
-              convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),
-                                 InsertPt, U->getParent());
-          BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
+          // We may pick up ops that were previously converted for users in
+          // other blocks. If there is an originally typed definition of the Op
+          // already in this block, simply reuse it.
+          if (isa<Instruction>(Op) && !isa<PHINode>(Op) &&
+              U->getParent() == cast<Instruction>(Op)->getParent()) {
+            NewVal = Op;
+          } else {
+            NewVal =
+                convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),
+                                   InsertPt, U->getParent());
+            BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
+          }
         }
         assert(NewVal);
         U->setOperand(OpIdx, NewVal);

diff  --git a/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll b/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll
index 5d2e299aa854a..1e60261df1304 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll
@@ -349,4 +349,223 @@ bb.2:
   ret void
 }
 
+; Should not produce a broken phi
+
+define void @broken_phi() {
+; GFX906-LABEL: define void @broken_phi(
+; GFX906-SAME: ) #[[ATTR0]] {
+; GFX906-NEXT:  bb:
+; GFX906-NEXT:    br label [[BB1:%.*]]
+; GFX906:       bb1:
+; GFX906-NEXT:    [[I:%.*]] = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ]
+; GFX906-NEXT:    br i1 false, label [[BB3:%.*]], label [[BB2:%.*]]
+; GFX906:       bb2:
+; GFX906-NEXT:    br label [[BB3]]
+; GFX906:       bb3:
+; GFX906-NEXT:    [[I4:%.*]] = phi <4 x i8> [ zeroinitializer, [[BB2]] ], [ [[I]], [[BB1]] ]
+; GFX906-NEXT:    br i1 false, label [[BB7]], label [[BB5:%.*]]
+; GFX906:       bb5:
+; GFX906-NEXT:    [[I6:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[I4]], <4 x i8> zeroinitializer)
+; GFX906-NEXT:    br label [[BB7]]
+; GFX906:       bb7:
+; GFX906-NEXT:    [[I8]] = phi <4 x i8> [ zeroinitializer, [[BB5]] ], [ zeroinitializer, [[BB3]] ]
+; GFX906-NEXT:    br label [[BB1]]
+;
+bb:
+  br label %bb1
+bb1:
+  %i = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, %bb ], [ %i8, %bb7 ]
+  br i1 false, label %bb3, label %bb2
+bb2:
+  br label %bb3
+bb3:
+  %i4 = phi <4 x i8> [ zeroinitializer, %bb2 ], [ %i, %bb1 ]
+  br i1 false, label %bb7, label %bb5
+bb5:
+  %i6 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %i4, <4 x i8> zeroinitializer)
+  br label %bb7
+bb7:
+  %i8 = phi <4 x i8> [ zeroinitializer, %bb5 ], [ zeroinitializer, %bb3 ]
+  br label %bb1
+}
+
+; %sel1 should just use %sel0 instead of trying to convert back the
+; converted version of %sel0
+
+define amdgpu_kernel void @reuseOp() {
+; GFX906-LABEL: define amdgpu_kernel void @reuseOp(
+; GFX906-SAME: ) #[[ATTR0]] {
+; GFX906-NEXT:  entry:
+; GFX906-NEXT:    [[VEC1:%.*]] = insertelement <16 x i8> zeroinitializer, i8 0, i64 0
+; GFX906-NEXT:    [[VEC1_BC:%.*]] = bitcast <16 x i8> [[VEC1]] to <4 x i32>
+; GFX906-NEXT:    br label [[BB_1:%.*]]
+; GFX906:       bb.1:
+; GFX906-NEXT:    [[VEC1_BC_BC:%.*]] = bitcast <4 x i32> [[VEC1_BC]] to <16 x i8>
+; GFX906-NEXT:    [[SEL0:%.*]] = select i1 false, <16 x i8> zeroinitializer, <16 x i8> zeroinitializer
+; GFX906-NEXT:    [[SEL0_BC:%.*]] = bitcast <16 x i8> [[SEL0]] to <4 x i32>
+; GFX906-NEXT:    [[SEL1:%.*]] = select i1 false, <16 x i8> [[VEC1_BC_BC]], <16 x i8> [[SEL0]]
+; GFX906-NEXT:    br label [[BB_2:%.*]]
+; GFX906:       bb.2:
+; GFX906-NEXT:    [[SEL0_BC_BC:%.*]] = bitcast <4 x i32> [[SEL0_BC]] to <16 x i8>
+; GFX906-NEXT:    [[VAL:%.*]] = extractelement <16 x i8> [[SEL0_BC_BC]], i64 0
+; GFX906-NEXT:    ret void
+;
+entry:
+  %vec1 = insertelement <16 x i8> zeroinitializer, i8 0, i64 0
+  br label %bb.1
+
+bb.1:
+  %sel0 = select i1 false, <16 x i8> zeroinitializer, <16 x i8> zeroinitializer
+  %sel1 = select i1 false, <16 x i8> %vec1, <16 x i8> %sel0
+  br label %bb.2
+
+bb.2:
+  %val = extractelement <16 x i8> %sel0, i64 0
+  ret void
+}
+
+
+define amdgpu_kernel void @deletedPHI(i32 %in0, i1 %cmp, <10 x i8> %invec0) {
+; GFX906-LABEL: define amdgpu_kernel void @deletedPHI(
+; GFX906-SAME: i32 [[IN0:%.*]], i1 [[CMP:%.*]], <10 x i8> [[INVEC0:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT:  entry:
+; GFX906-NEXT:    br label [[BB_1:%.*]]
+; GFX906:       bb.1:
+; GFX906-NEXT:    [[PHI0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB_11:%.*]] ]
+; GFX906-NEXT:    [[PHI1:%.*]] = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[ENTRY]] ], [ [[VEC1:%.*]], [[BB_11]] ]
+; GFX906-NEXT:    br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]]
+; GFX906:       bb.2:
+; GFX906-NEXT:    br label [[BB_3]]
+; GFX906:       bb.3:
+; GFX906-NEXT:    [[PHI2:%.*]] = phi <10 x i8> [ zeroinitializer, [[BB_2]] ], [ [[PHI1]], [[BB_1]] ]
+; GFX906-NEXT:    br i1 [[CMP]], label [[BB_5:%.*]], label [[BB_4:%.*]]
+; GFX906:       bb.4:
+; GFX906-NEXT:    [[VEC0:%.*]] = insertelement <10 x i8> [[PHI2]], i8 0, i64 0
+; GFX906-NEXT:    br label [[BB_5]]
+; GFX906:       bb.5:
+; GFX906-NEXT:    [[PHI3:%.*]] = phi <10 x i8> [ [[VEC0]], [[BB_4]] ], [ [[PHI2]], [[BB_3]] ]
+; GFX906-NEXT:    br i1 [[CMP]], label [[BB_7:%.*]], label [[BB_6:%.*]]
+; GFX906:       bb.6:
+; GFX906-NEXT:    br label [[BB_7]]
+; GFX906:       bb.7:
+; GFX906-NEXT:    [[PHI4:%.*]] = phi <10 x i8> [ [[INVEC0]], [[BB_6]] ], [ [[PHI3]], [[BB_5]] ]
+; GFX906-NEXT:    br i1 [[CMP]], label [[BB_9:%.*]], label [[BB_8:%.*]]
+; GFX906:       bb.8:
+; GFX906-NEXT:    br label [[BB_9]]
+; GFX906:       bb.9:
+; GFX906-NEXT:    [[PHI5:%.*]] = phi <10 x i8> [ [[INVEC0]], [[BB_8]] ], [ [[PHI4]], [[BB_7]] ]
+; GFX906-NEXT:    br i1 [[CMP]], label [[BB_11]], label [[BB_10:%.*]]
+; GFX906:       bb.10:
+; GFX906-NEXT:    br label [[BB_11]]
+; GFX906:       bb.11:
+; GFX906-NEXT:    [[PHI6:%.*]] = phi <10 x i8> [ zeroinitializer, [[BB_10]] ], [ [[PHI5]], [[BB_9]] ]
+; GFX906-NEXT:    [[VEC1]] = shufflevector <10 x i8> [[PHI6]], <10 x i8> zeroinitializer, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 15, i32 16, i32 17, i32 18, i32 19>
+; GFX906-NEXT:    br label [[BB_1]]
+;
+entry:
+  br label %bb.1
+
+bb.1:
+  %phi0 = phi i32 [ 0, %entry ], [ 1, %bb.11 ]
+  %phi1 = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %entry ], [ %vec1, %bb.11 ]
+  br i1 %cmp, label %bb.3, label %bb.2
+
+bb.2:
+  br label %bb.3
+
+bb.3:
+  %phi2 = phi <10 x i8> [ zeroinitializer, %bb.2 ], [ %phi1, %bb.1 ]
+  br i1 %cmp, label %bb.5, label %bb.4
+
+bb.4:
+  %vec0 = insertelement <10 x i8> %phi2, i8 0, i64 0
+  br label %bb.5
+
+bb.5:                               ; preds = %bb.4, %bb.3
+  %phi3 = phi <10 x i8> [ %vec0, %bb.4 ], [ %phi2, %bb.3 ]
+  br i1 %cmp, label %bb.7, label %bb.6
+
+bb.6:
+  br label %bb.7
+
+bb.7:                               ; preds = %bb.6, %bb.5
+  %phi4 = phi <10 x i8> [ %invec0, %bb.6 ], [ %phi3, %bb.5 ]
+  br i1 %cmp, label %bb.9, label %bb.8
+
+bb.8:
+  br label %bb.9
+
+bb.9:
+  %phi5 = phi <10 x i8> [ %invec0, %bb.8 ], [ %phi4, %bb.7 ]
+  br i1 %cmp, label %bb.11, label %bb.10
+
+bb.10:
+  br label %bb.11
+
+bb.11:
+  %phi6 = phi <10 x i8> [ zeroinitializer, %bb.10 ], [ %phi5, %bb.9 ]
+  %vec1 = shufflevector <10 x i8> %phi6, <10 x i8> zeroinitializer, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 15, i32 16, i32 17, i32 18, i32 19>
+  br label %bb.1
+}
+
+define amdgpu_kernel void @multiple_unwind(i1 %cmp, <10 x i8> %invec) {
+; GFX906-LABEL: define amdgpu_kernel void @multiple_unwind(
+; GFX906-SAME: i1 [[CMP:%.*]], <10 x i8> [[INVEC:%.*]]) #[[ATTR0]] {
+; GFX906-NEXT:  entry:
+; GFX906-NEXT:    br label [[BB_1:%.*]]
+; GFX906:       bb.1:
+; GFX906-NEXT:    [[PHI0:%.*]] = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[ENTRY:%.*]] ], [ [[PHI3:%.*]], [[BB_8:%.*]] ]
+; GFX906-NEXT:    br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]]
+; GFX906:       bb.2:
+; GFX906-NEXT:    br label [[BB_3]]
+; GFX906:       bb.3:
+; GFX906-NEXT:    [[PHI1:%.*]] = phi <10 x i8> [ zeroinitializer, [[BB_2]] ], [ [[PHI0]], [[BB_1]] ]
+; GFX906-NEXT:    br i1 [[CMP]], label [[BB_5:%.*]], label [[BB_4:%.*]]
+; GFX906:       bb.4:
+; GFX906-NEXT:    br label [[BB_5]]
+; GFX906:       bb.5:
+; GFX906-NEXT:    [[PHI2:%.*]] = phi <10 x i8> [ [[PHI0]], [[BB_4]] ], [ [[PHI1]], [[BB_3]] ]
+; GFX906-NEXT:    br i1 [[CMP]], label [[BB_7:%.*]], label [[BB_6:%.*]]
+; GFX906:       bb.6:
+; GFX906-NEXT:    br label [[BB_7]]
+; GFX906:       bb.7:
+; GFX906-NEXT:    [[PHI3]] = phi <10 x i8> [ [[INVEC]], [[BB_6]] ], [ [[PHI2]], [[BB_5]] ]
+; GFX906-NEXT:    br label [[BB_8]]
+; GFX906:       bb.8:
+; GFX906-NEXT:    br label [[BB_1]]
+;
+entry:
+  br label %bb.1
+
+bb.1:
+  %phi0 = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %entry ], [ %phi3, %bb.8 ]
+  br i1 %cmp, label %bb.3, label %bb.2
+
+bb.2:
+  br label %bb.3
+
+bb.3:
+  %phi1 = phi <10 x i8> [ zeroinitializer, %bb.2 ], [ %phi0, %bb.1 ]
+  br i1 %cmp, label %bb.5, label %bb.4
+
+bb.4:
+  br label %bb.5
+
+bb.5:
+  %phi2 = phi <10 x i8> [ %phi0, %bb.4 ], [ %phi1, %bb.3 ]
+  br i1 %cmp, label %bb.7, label %bb.6
+
+bb.6:                              ; preds = %bb.5
+  br label %bb.7
+
+bb.7:
+  %phi3 = phi <10 x i8> [ %invec, %bb.6 ], [ %phi2, %bb.5 ]
+  br label %bb.8
+
+bb.8:
+  br label %bb.1
+}
+
+
+
 declare i32 @llvm.amdgcn.workitem.id.x()


        


More information about the llvm-commits mailing list