[llvm] r280390 - [LV] Move VectorParts allocation and mapping into PHI widening (NFC)

Tue Sep 6 12:18:31 PDT 2016

Thanks, Adam!

-- Matt

-----Original Message-----
From: anemet at apple.com [mailto:anemet at apple.com] 
Sent: Tuesday, September 06, 2016 3:04 PM
To: Matthew Simpson <mssimpso at codeaurora.org>
Cc: Adam Nemet via llvm-commits <llvm-commits at lists.llvm.org>
Subject: Re: [llvm] r280390 - [LV] Move VectorParts allocation and mapping
into PHI widening (NFC)

Good stuff!

> On Sep 1, 2016, at 11:14 AM, Matthew Simpson via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> 
> Author: mssimpso
> Date: Thu Sep  1 13:14:27 2016
> New Revision: 280390
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=280390&view=rev
> Log:
> [LV] Move VectorParts allocation and mapping into PHI widening (NFC)
> 
> This patch moves the allocation of VectorParts for PHI nodes into the 
> actual PHI widening code. Previously, we allocated these VectorParts 
> in vectorizeBlockInLoop, and passed them by reference to 
> widenPHIInstruction. Upon returning, we would then map the VectorParts 
> in VectorLoopValueMap. This behavior is problematic for the cases 
> where we only want to generate a scalar version of a PHI node. For 
> example, if in the future we only generate a scalar version of an 
> induction variable, we would end up inserting an empty vector entry 
> into the map once we return to vectorizeBlockInLoop. We now no longer 
> need to pass VectorParts to the various PHI widening functions, and we 
> can keep VectorParts allocation as close as possible to the point at which
they are actually mapped in VectorLoopValueMap.
> 
> Modified:
>    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> 
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL: 
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectoriz
> e/LoopVectorize.cpp?rev=280390&r1=280389&r2=280390&view=diff
> ======================================================================
> ========
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu Sep  1 
> +++ 13:14:27 2016
> @@ -415,8 +415,8 @@ protected:
>   /// Vectorize a single PHINode in a block. This method handles the
induction
>   /// variable canonicalization. It supports both VF = 1 for unrolled
loops and
>   /// arbitrary length vectors.
> -  void widenPHIInstruction(Instruction *PN, VectorParts &Entry, unsigned
UF,
> -                           unsigned VF, PhiVector *PV);
> +  void widenPHIInstruction(Instruction *PN, unsigned UF, unsigned VF,
> +                           PhiVector *PV);
> 
>   /// Insert the new loop to the loop hierarchy and pass manager
>   /// and update the analysis passes.
> @@ -455,16 +455,16 @@ protected:
> 
>   /// Create a vector induction phi node based on an existing scalar one.
This
>   /// currently only works for integer induction variables with a 
> constant
> -  /// step. If \p TruncType is non-null, instead of widening the 
> original IV,
> -  /// we widen a version of the IV truncated to \p TruncType.
> +  /// step. \p EntryVal is the value from the original loop that maps 
> + to the  /// vector phi node. If \p EntryVal is a truncate 
> + instruction, instead of  /// widening the original IV, we widen a 
> + version of the IV truncated to \p  /// EntryVal's type.
>   void createVectorIntInductionPHI(const InductionDescriptor &II,
> -                                   VectorParts &Entry, IntegerType
*TruncType);
> +                                   Instruction *EntryVal);
> 
>   /// Widen an integer induction variable \p IV. If \p Trunc is 
> provided, the
> -  /// induction variable will first be truncated to the corresponding 
> type. The
> -  /// widened values are placed in \p Entry.
> -  void widenIntInduction(PHINode *IV, VectorParts &Entry,
> -                         TruncInst *Trunc = nullptr);
> +  /// induction variable will first be truncated to the corresponding
type.
> +  void widenIntInduction(PHINode *IV, TruncInst *Trunc = nullptr);
> 
>   /// Returns true if we should generate a scalar version of \p IV.
>   bool needsScalarInduction(Instruction *IV) const; @@ -2035,7 +2035,7 
> @@ Value *InnerLoopVectorizer::getBroadcast
> }
> 
> void InnerLoopVectorizer::createVectorIntInductionPHI(
> -    const InductionDescriptor &II, VectorParts &Entry, IntegerType
*TruncType) {
> +    const InductionDescriptor &II, Instruction *EntryVal) {
>   Value *Start = II.getStartValue();
>   ConstantInt *Step = II.getConstIntStepValue();
>   assert(Step && "Can not widen an IV with a non-constant step"); @@ 
> -2043,7 +2043,8 @@ void InnerLoopVectorizer::createVectorIn
>   // Construct the initial value of the vector IV in the vector loop
preheader
>   auto CurrIP = Builder.saveIP();
>   Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
> -  if (TruncType) {
> +  if (isa<TruncInst>(EntryVal)) {
> +    auto *TruncType = cast<IntegerType>(EntryVal->getType());
>     Step = ConstantInt::getSigned(TruncType, Step->getSExtValue());
>     Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
>   }
> @@ -2059,11 +2060,15 @@ void InnerLoopVectorizer::createVectorIn
>   PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
>
&*LoopVectorBody->getFirstInsertionPt());
>   Instruction *LastInduction = VecInd;
> +  VectorParts Entry(UF);
>   for (unsigned Part = 0; Part < UF; ++Part) {
>     Entry[Part] = LastInduction;
>     LastInduction = cast<Instruction>(
>         Builder.CreateAdd(LastInduction, SplatVF, "step.add"));
>   }
> +  VectorLoopValueMap.initVector(EntryVal, Entry);  if 
> + (isa<TruncInst>(EntryVal))
> +    addMetadata(Entry, EntryVal);
> 
>   // Move the last step to the end of the latch block. This ensures
consistent
>   // placement of all induction updates.
> @@ -2087,8 +2092,7 @@ bool InnerLoopVectorizer::needsScalarInd
>   return any_of(IV->users(), isScalarInst); }
> 
> -void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts
&Entry,
> -                                            TruncInst *Trunc) {
> +void InnerLoopVectorizer::widenIntInduction(PHINode *IV, TruncInst 
> +*Trunc) {
> 
>   auto II = Legal->getInductionVars()->find(IV);
>   assert(II != Legal->getInductionVars()->end() && "IV is not an 
> induction"); @@ -2096,9 +2100,6 @@ void
InnerLoopVectorizer::widenIntInduct
>   auto ID = II->second;
>   assert(IV->getType() == ID.getStartValue()->getType() && "Types must 
> match");
> 
> -  // If a truncate instruction was provided, get the smaller type.
> -  auto *TruncType = Trunc ? cast<IntegerType>(Trunc->getType()) : 
> nullptr;
> -
>   // The scalar value to broadcast. This will be derived from the
canonical
>   // induction variable.
>   Value *ScalarIV = nullptr;
> @@ -2128,7 +2129,7 @@ void InnerLoopVectorizer::widenIntInduct
>   // loop iteration.
>   if (VF > 1 && IV->getType() == Induction->getType() && Step &&
>       !Legal->isScalarAfterVectorization(EntryVal)) {
> -    createVectorIntInductionPHI(ID, Entry, TruncType);
> +    createVectorIntInductionPHI(ID, EntryVal);
>     VectorizedIV = true;
>   }
> 
> @@ -2138,7 +2139,8 @@ void InnerLoopVectorizer::widenIntInduct
>   // induction variable and constant step. Otherwise, derive these values
from
>   // the induction descriptor.
>   if (!VectorizedIV || NeedsScalarIV) {
> -    if (TruncType) {
> +    if (Trunc) {
> +      auto *TruncType = cast<IntegerType>(Trunc->getType());
>       assert(Step && "Truncation requires constant integer step");
>       auto StepInt = cast<ConstantInt>(Step)->getSExtValue();
>       ScalarIV = Builder.CreateCast(Instruction::Trunc, Induction, 
> TruncType); @@ -2163,8 +2165,12 @@ void
InnerLoopVectorizer::widenIntInduct
>   // induction variable, and build the necessary step vectors.
>   if (!VectorizedIV) {
>     Value *Broadcasted = getBroadcastInstrs(ScalarIV);
> +    VectorParts Entry(UF);
>     for (unsigned Part = 0; Part < UF; ++Part)
>       Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
> +    VectorLoopValueMap.initVector(EntryVal, Entry);
> +    if (Trunc)
> +      addMetadata(Entry, Trunc);
>   }
> 
>   // If an induction variable is only used for counting loop 
> iterations or @@ -4371,12 +4377,12 @@
InnerLoopVectorizer::createBlockInMask(B
>   return BlockMask;
> }
> 
> -void InnerLoopVectorizer::widenPHIInstruction(
> -    Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned
UF,
> -    unsigned VF, PhiVector *PV) {
> +void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned
UF,
> +                                              unsigned VF, PhiVector 
> +*PV) {
>   PHINode *P = cast<PHINode>(PN);
>   // Handle recurrences.
>   if (Legal->isReductionVariable(P) || 
> Legal->isFirstOrderRecurrence(P)) {
> +    VectorParts Entry(UF);
>     for (unsigned part = 0; part < UF; ++part) {
>       // This is phase one of vectorizing PHIs.
>       Type *VecTy =
> @@ -4384,6 +4390,7 @@ void InnerLoopVectorizer::widenPHIInstru
>       Entry[part] = PHINode::Create(
>           VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
>     }
> +    VectorLoopValueMap.initVector(P, Entry);
>     PV->push_back(P);
>     return;
>   }
> @@ -4404,6 +4411,7 @@ void InnerLoopVectorizer::widenPHIInstru
>     // SELECT(Mask3, In3,
>     //      SELECT(Mask2, In2,
>     //                   ( ...)))
> +    VectorParts Entry(UF);
>     for (unsigned In = 0; In < NumIncoming; In++) {
>       VectorParts Cond =
>           createEdgeMask(P->getIncomingBlock(In), P->getParent()); @@ 
> -4421,6 +4429,7 @@ void InnerLoopVectorizer::widenPHIInstru
>                                              "predphi");
>       }
>     }
> +    VectorLoopValueMap.initVector(P, Entry);
>     return;
>   }
> 
> @@ -4437,7 +4446,7 @@ void InnerLoopVectorizer::widenPHIInstru
>   case InductionDescriptor::IK_NoInduction:
>     llvm_unreachable("Unknown induction");
>   case InductionDescriptor::IK_IntInduction:
> -    return widenIntInduction(P, Entry);
> +    return widenIntInduction(P);
>   case InductionDescriptor::IK_PtrInduction: {
>     // Handle the pointer induction variable case.
>     assert(P->getType()->isPointerTy() && "Unexpected type."); @@ 
> -4446,6 +4455,7 @@ void InnerLoopVectorizer::widenPHIInstru
>     PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType());
>     // This is the vector of results. Notice that we don't generate
>     // vector geps because scalar geps result in better code.
> +    VectorParts Entry(UF);
>     for (unsigned part = 0; part < UF; ++part) {
>       if (VF == 1) {
>         int EltIndex = part;
> @@ -4469,6 +4479,7 @@ void InnerLoopVectorizer::widenPHIInstru
>       }
>       Entry[part] = VecVal;
>     }
> +    VectorLoopValueMap.initVector(P, Entry);
>     return;
>   }
>   case InductionDescriptor::IK_FpInduction: { @@ -4488,9 +4499,11 @@ 
> void InnerLoopVectorizer::widenPHIInstru
>     // After broadcasting the induction variable we need to make the
vector
>     // consecutive by adding StepVal*0, StepVal*1, StepVal*2, etc.
>     Value *StepVal = cast<SCEVUnknown>(II.getStep())->getValue();
> +    VectorParts Entry(UF);
>     for (unsigned part = 0; part < UF; ++part)
>       Entry[part] = getStepVector(Broadcasted, VF * part, StepVal,
>                                   II.getInductionOpcode());
> +    VectorLoopValueMap.initVector(P, Entry);
>     return;
>   }
>   }
> @@ -4524,9 +4537,7 @@ void InnerLoopVectorizer::vectorizeBlock
>       continue;
>     case Instruction::PHI: {
>       // Vectorize PHINodes.
> -      VectorParts Entry(UF);
> -      widenPHIInstruction(&I, Entry, UF, VF, PV);
> -      VectorLoopValueMap.initVector(&I, Entry);
> +      widenPHIInstruction(&I, UF, VF, PV);
>       continue;
>     } // End of PHI.
> 
> @@ -4648,7 +4659,6 @@ void InnerLoopVectorizer::vectorizeBlock
>     case Instruction::BitCast: {
>       auto *CI = dyn_cast<CastInst>(&I);
>       setDebugLocFromInst(Builder, CI);
> -      VectorParts Entry(UF);
> 
>       // Optimize the special case where the source is a constant integer
>       // induction variable. Notice that we can only optimize the 
> 'trunc' case @@ -4657,9 +4667,7 @@ void
InnerLoopVectorizer::vectorizeBlock
>       auto ID = Legal->getInductionVars()->lookup(OldInduction);
>       if (isa<TruncInst>(CI) && CI->getOperand(0) == OldInduction &&
>           ID.getConstIntStepValue()) {
> -        widenIntInduction(OldInduction, Entry, cast<TruncInst>(CI));
> -        VectorLoopValueMap.initVector(&I, Entry);
> -        addMetadata(Entry, &I);
> +        widenIntInduction(OldInduction, cast<TruncInst>(CI));
>         break;
>       }
> 
> @@ -4668,6 +4676,7 @@ void InnerLoopVectorizer::vectorizeBlock
>           (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), 
> VF);
> 
>       const VectorParts &A = getVectorValue(CI->getOperand(0));
> +      VectorParts Entry(UF);
>       for (unsigned Part = 0; Part < UF; ++Part)
>         Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part],
DestTy);
>       VectorLoopValueMap.initVector(&I, Entry);
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits