[llvm] a12403c - [LV] Do not consider instrs dead if used by phi that's not in plan.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 9 08:05:01 PST 2022


Author: Florian Hahn
Date: 2022-03-09T16:04:44Z
New Revision: a12403cfea15fc86fadfc28b5c76c056b36609cc

URL: https://github.com/llvm/llvm-project/commit/a12403cfea15fc86fadfc28b5c76c056b36609cc
DIFF: https://github.com/llvm/llvm-project/commit/a12403cfea15fc86fadfc28b5c76c056b36609cc.diff

LOG: [LV] Do not consider instrs dead if used by phi that's not in plan.

Single value phis won't be modeled in VPlan. If the phi only gets used
outside the loop, the current code misses the fact that the incoming
value is not dead. Update the code to also look through such phis to
check for outside users.

Fixes #54266

Added: 
    llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll

Modified: 
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b27016f31cd9d..9ba9dec664c6e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -359,6 +359,27 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
   }
 }
 
+// Check for live-out users currently not modeled in VPlan.
+// Note that exit values of inductions are generated independent of
+// the recipe. This means  VPWidenIntOrFpInductionRecipe &
+// VPScalarIVStepsRecipe can be removed, independent of uses outside
+// the loop.
+// TODO: Remove once live-outs are modeled in VPlan.
+static bool hasOutsideUser(Instruction &I, Loop &OrigLoop) {
+  return any_of(I.users(), [&OrigLoop](User *U) {
+    if (!OrigLoop.contains(cast<Instruction>(U)))
+      return true;
+
+    // Look through single-value phis in the loop, as they won't be modeled in
+    // VPlan and may be used outside the loop.
+    if (auto *PN = dyn_cast<PHINode>(U))
+      if (PN->getNumIncomingValues() == 1)
+        return hasOutsideUser(*PN, OrigLoop);
+
+    return false;
+  });
+}
+
 void VPlanTransforms::removeDeadRecipes(VPlan &Plan, Loop &OrigLoop) {
   VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
   // Remove dead recipes in header block. The recipes in the block are processed
@@ -370,15 +391,7 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan, Loop &OrigLoop) {
                [](VPValue *V) { return V->getNumUsers() > 0; }) ||
         (!isa<VPWidenIntOrFpInductionRecipe>(&R) &&
          !isa<VPScalarIVStepsRecipe>(&R) && R.getUnderlyingInstr() &&
-         any_of(R.getUnderlyingInstr()->users(), [&OrigLoop](User *U) {
-           // Check for live-out users currently not modeled in VPlan.
-           // Note that exit values of inductions are generated independent of
-           // the recipe. This means  VPWidenIntOrFpInductionRecipe &
-           // VPScalarIVStepsRecipe can be removed, independent of uses outside
-           // the loop.
-           // TODO: Remove once live-outs are modeled in VPlan.
-           return !OrigLoop.contains(cast<Instruction>(U));
-         })))
+         hasOutsideUser(*R.getUnderlyingInstr(), OrigLoop)))
       continue;
     R.eraseFromParent();
   }

diff  --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll
new file mode 100644
index 0000000000000..05b4f0e30d5b6
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
+
+; Tests for PR54266.
+define i32 @one_direct_branch(i32* %src) {
+; CHECK-LABEL: @one_direct_branch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <4 x i32> <i32 25500, i32 25500, i32 25500, i32 25500>, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 0, 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[SRC_GEP:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i32 [[IV]]
+; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[SRC_GEP]], align 4
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 25500, [[LV]]
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[PHI_XOR:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i32 [[XOR_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %src.gep = getelementptr inbounds i32, i32* %src, i32 %iv
+  %lv = load i32, i32* %src.gep
+  %xor = xor i32 25500, %lv
+  br label %loop.latch
+
+loop.latch:
+  %phi.xor = phi i32 [ %xor, %loop ]
+  %iv.next = add nsw i32 %iv, 1
+  %tobool.not = icmp eq i32 %iv.next, 0
+  br i1 %tobool.not, label %exit, label %loop
+
+exit:
+  %xor.lcssa = phi i32 [ %phi.xor, %loop.latch ]
+  ret i32 %xor.lcssa
+}
+
+define i32 @two_direct_branch(i32* %src) {
+; CHECK-LABEL: @two_direct_branch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <4 x i32> <i32 25500, i32 25500, i32 25500, i32 25500>, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 0, 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[SRC_GEP:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i32 [[IV]]
+; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[SRC_GEP]], align 4
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 25500, [[LV]]
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[PHI_XOR_1:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ]
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[PHI_XOR:%.*]] = phi i32 [ [[PHI_XOR_1]], [[BB]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i32 [[XOR_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:                              ; preds = %for.inc3, %entry
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %src.gep = getelementptr inbounds i32, i32* %src, i32 %iv
+  %lv = load i32, i32* %src.gep
+  %xor = xor i32 25500, %lv
+  br label %bb
+
+bb:
+  %phi.xor.1 = phi i32 [ %xor, %loop ]
+  br label %loop.latch
+
+loop.latch:
+  %phi.xor = phi i32 [ %phi.xor.1, %bb ]
+  %iv.next = add nsw i32 %iv, 1
+  %tobool.not = icmp eq i32 %iv.next, 0
+  br i1 %tobool.not, label %exit, label %loop
+
+exit:
+  %xor.lcssa = phi i32 [ %phi.xor, %loop.latch ]
+  ret i32 %xor.lcssa
+}
+
+define i32 @cond_branch(i32 %a, i32* %src) {
+; CHECK-LABEL: @cond_branch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i32> <i32 25500, i32 25500, i32 25500, i32 25500>, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i32> [[TMP7]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 0, 0
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[SRC_GEP:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i32 [[IV]]
+; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[SRC_GEP]], align 4
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 25500, [[LV]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[IV]], [[A]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[THEN:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[PHI_XOR:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ], [ 10, [[THEN]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i32 [[XOR_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %src.gep = getelementptr inbounds i32, i32* %src, i32 %iv
+  %lv = load i32, i32* %src.gep
+  %xor = xor i32 25500, %lv
+  %cmp = icmp ne i32 %iv, %a
+  br i1 %cmp, label %loop.latch, label %then
+
+then:
+  br label %loop.latch
+
+loop.latch:
+  %phi.xor = phi i32 [ %xor, %loop ], [ 10, %then ]
+  %iv.next = add nsw i32 %iv, 1
+  %tobool.not = icmp eq i32 %iv.next, 0
+  br i1 %tobool.not, label %exit, label %loop
+
+exit:
+  %xor.lcssa = phi i32 [ %phi.xor, %loop.latch ]
+  ret i32 %xor.lcssa
+}


        


More information about the llvm-commits mailing list