[llvm] dae91f5 - [VPlan] Avoid VPTransformState::reset in fixReduction (NFCI).

Sat Oct 7 15:25:59 PDT 2023

Author: Florian Hahn
Date: 2023-10-07T23:24:24+01:00
New Revision: dae91f5dbc5bee579eac7f4cbb71e86f2934817f

URL: https://github.com/llvm/llvm-project/commit/dae91f5dbc5bee579eac7f4cbb71e86f2934817f
DIFF: https://github.com/llvm/llvm-project/commit/dae91f5dbc5bee579eac7f4cbb71e86f2934817f.diff

LOG: [VPlan] Avoid VPTransformState::reset in fixReduction (NFCI).

There's no need to repeatedly query and reset the state for
LoopExitInstDef. This removes one of the last uses of
VPTransformState::reset, by use a vector to store and update the
results. No other code should try to retrieve the result from State
outside the fixReductionCall.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 53ad37bf3599b5c..c84427d1e4ca73f 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3830,6 +3830,10 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
       LoopExitInstDef = Def;
   }
 
+  VectorParts RdxParts(UF);
+  for (unsigned Part = 0; Part < UF; ++Part)
+    RdxParts[Part] = State.get(LoopExitInstDef, Part);
+
   // If the vector reduction can be performed in a smaller type, we truncate
   // then extend the loop exit value to enable InstCombine to evaluate the
   // entire expression in the smaller type.
@@ -3837,9 +3841,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
     assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
     Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
     Builder.SetInsertPoint(VectorLoopLatch->getTerminator());
-    VectorParts RdxParts(UF);
     for (unsigned Part = 0; Part < UF; ++Part) {
-      RdxParts[Part] = State.get(LoopExitInstDef, Part);
       Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
       Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
                                         : Builder.CreateZExt(Trunc, VecTy);
@@ -3851,14 +3853,12 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
     }
     Builder.SetInsertPoint(LoopMiddleBlock,
                            LoopMiddleBlock->getFirstInsertionPt());
-    for (unsigned Part = 0; Part < UF; ++Part) {
+    for (unsigned Part = 0; Part < UF; ++Part)
       RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
-      State.reset(LoopExitInstDef, RdxParts[Part], Part);
-    }
   }
 
   // Reduce all of the unrolled parts into a single vector.
-  Value *ReducedPartRdx = State.get(LoopExitInstDef, 0);
+  Value *ReducedPartRdx = RdxParts[0];
   unsigned Op = RecurrenceDescriptor::getOpcode(RK);
 
   // The middle block terminator has already been assigned a DebugLoc here (the
@@ -3870,13 +3870,13 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
   // accidentally cause an extra step back into the loop while debugging.
   State.setDebugLocFrom(LoopMiddleBlock->getTerminator()->getDebugLoc());
   if (PhiR->isOrdered())
-    ReducedPartRdx = State.get(LoopExitInstDef, UF - 1);
+    ReducedPartRdx = RdxParts[UF - 1];
   else {
     // Floating-point operations should have some FMF to enable the reduction.
     IRBuilderBase::FastMathFlagGuard FMFG(Builder);
     Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
     for (unsigned Part = 1; Part < UF; ++Part) {
-      Value *RdxPart = State.get(LoopExitInstDef, Part);
+      Value *RdxPart = RdxParts[Part];
       if (Op != Instruction::ICmp && Op != Instruction::FCmp)
         ReducedPartRdx = Builder.CreateBinOp(
             (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");