[llvm] r246631 - [LV] Never widen an induction variable.
James Molloy via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 2 03:15:05 PDT 2015
Author: jamesm
Date: Wed Sep 2 05:15:05 2015
New Revision: 246631
URL: http://llvm.org/viewvc/llvm-project?rev=246631&view=rev
Log:
[LV] Never widen an induction variable.
There's no need to widen canonical induction variables. It's just as efficient to create a *new*, wide, induction variable.
Consider, if we widen an indvar, then we'll have to truncate it before its uses anyway (1 trunc). If we create a new indvar instead, we'll have to truncate that instead (1 trunc) [besides which IndVars should go and clean up our mess after us anyway on principle].
This lets us remove a ton of special-casing code.
Added:
llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=246631&r1=246630&r2=246631&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Sep 2 05:15:05 2015
@@ -468,8 +468,6 @@ protected:
PHINode *Induction;
/// The induction variable of the old basic block.
PHINode *OldInduction;
- /// Holds the extended (to the widest induction type) start index.
- Value *ExtendedIdx;
/// Maps scalars to widened vectors.
ValueMap WidenMap;
EdgeMaskCache MaskCache;
@@ -2605,9 +2603,16 @@ void InnerLoopVectorizer::createEmptyLoo
// don't. One example is c++ iterators that often have multiple pointer
// induction variables. In the code below we also support a case where we
// don't have a single induction variable.
+ //
+ // We try to obtain an induction variable from the original loop as hard
+ // as possible. However if we don't find one that:
+ // - is an integer
+ // - counts from zero, stepping by one
+ // - is the size of the widest induction variable type
+ // then we create a new one.
OldInduction = Legal->getInduction();
Type *IdxTy = Legal->getWidestInductionType();
-
+
// Find the loop boundaries.
const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
@@ -2653,11 +2658,7 @@ void InnerLoopVectorizer::createEmptyLoo
"min.iters.check", VectorPH->getTerminator());
Builder.SetInsertPoint(VectorPH->getTerminator());
- Value *StartIdx = ExtendedIdx = ConstantInt::get(IdxTy, 0);
-
- // Count holds the overall loop count (N).
- Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
- VectorPH->getTerminator());
+ Value *StartIdx = ConstantInt::get(IdxTy, 0);
LoopBypassBlocks.push_back(VectorPH);
@@ -2711,24 +2712,13 @@ void InnerLoopVectorizer::createEmptyLoo
setDebugLocFromInst(BypassBuilder,
getDebugLocFromInstOrOperands(OldInduction));
- // We may need to extend the index in case there is a type mismatch.
- // We know that the count starts at zero and does not overflow.
- if (Count->getType() != IdxTy) {
- // The exit count can be of pointer type. Convert it to the correct
- // integer type.
- if (ExitCount->getType()->isPointerTy())
- Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int");
- else
- Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast");
- }
-
// Add the start index to the loop count to get the new end index.
- Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx");
+ Value *IdxEnd = BypassBuilder.CreateAdd(ExitCountValue, StartIdx, "end.idx");
// Now we need to generate the expression for N - (N % VF), which is
// the part that the vectorized body will execute.
- Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf");
- Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec");
+ Value *R = BypassBuilder.CreateURem(ExitCountValue, Step, "n.mod.vf");
+ Value *CountRoundDown = BypassBuilder.CreateSub(ExitCountValue, R, "n.vec");
Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx,
"end.idx.rnd.down");
@@ -2804,7 +2794,9 @@ void InnerLoopVectorizer::createEmptyLoo
// If we come from a bypass edge then we need to start from the original
// start value.
- // This variable saves the new starting index for the scalar loop.
+ // This variable saves the new starting index for the scalar loop. It is used
+ // to test if there are any tail iterations left once the vector loop has
+ // completed.
PHINode *ResumeIndex = nullptr;
LoopVectorizationLegality::InductionList::iterator I, E;
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
@@ -2814,84 +2806,32 @@ void InnerLoopVectorizer::createEmptyLoo
PHINode *OrigPhi = I->first;
InductionDescriptor II = I->second;
- Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType();
- PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val",
+ PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
MiddleBlock->getTerminator());
- // We might have extended the type of the induction variable but we need a
- // truncated version for the scalar loop.
- PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
- PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
- MiddleBlock->getTerminator()) : nullptr;
-
// Create phi nodes to merge from the backedge-taken check block.
- PHINode *BCResumeVal = PHINode::Create(ResumeValTy, 3, "bc.resume.val",
+ PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3,
+ "bc.resume.val",
ScalarPH->getTerminator());
BCResumeVal->addIncoming(ResumeVal, MiddleBlock);
- PHINode *BCTruncResumeVal = nullptr;
+ Value *EndValue;
if (OrigPhi == OldInduction) {
- BCTruncResumeVal =
- PHINode::Create(OrigPhi->getType(), 2, "bc.trunc.resume.val",
- ScalarPH->getTerminator());
- BCTruncResumeVal->addIncoming(TruncResumeVal, MiddleBlock);
- }
-
- Value *EndValue = nullptr;
- switch (II.getKind()) {
- case InductionDescriptor::IK_NoInduction:
- llvm_unreachable("Unknown induction");
- case InductionDescriptor::IK_IntInduction: {
- // Handle the integer induction counter.
- assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
-
- // We have the canonical induction variable.
- if (OrigPhi == OldInduction) {
- // Create a truncated version of the resume value for the scalar loop,
- // we might have promoted the type to a larger width.
- EndValue =
- BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
- // The new PHI merges the original incoming value, in case of a bypass,
- // or the value at the end of the vectorized loop.
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
- TruncResumeVal->addIncoming(II.getStartValue(), LoopBypassBlocks[I]);
- TruncResumeVal->addIncoming(EndValue, VecBody);
-
- BCTruncResumeVal->addIncoming(II.getStartValue(), LoopBypassBlocks[0]);
-
- // We know what the end value is.
- EndValue = IdxEndRoundDown;
- // We also know which PHI node holds it.
- ResumeIndex = ResumeVal;
- break;
- }
-
- // Not the canonical induction variable - add the vector loop count to the
- // start value.
- Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
- II.getStartValue()->getType(),
- "cast.crd");
- EndValue = II.transform(BypassBuilder, CRD);
- EndValue->setName("ind.end");
- break;
- }
- case InductionDescriptor::IK_PtrInduction: {
+ // We know what the end value is.
+ EndValue = IdxEndRoundDown;
+ // We also know which PHI node holds it.
+ ResumeIndex = ResumeVal;
+ } else {
Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
II.getStepValue()->getType(),
"cast.crd");
EndValue = II.transform(BypassBuilder, CRD);
- EndValue->setName("ptr.ind.end");
- break;
+ EndValue->setName("ind.end");
}
- }// end of case
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) {
- if (OrigPhi == OldInduction)
- ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
- else
- ResumeVal->addIncoming(II.getStartValue(), LoopBypassBlocks[I]);
- }
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
+ ResumeVal->addIncoming(II.getStartValue(), LoopBypassBlocks[I]);
ResumeVal->addIncoming(EndValue, VecBody);
// Fix the scalar body counter (PHI node).
@@ -2899,13 +2839,8 @@ void InnerLoopVectorizer::createEmptyLoo
// The old induction's phi node in the scalar body needs the truncated
// value.
- if (OrigPhi == OldInduction) {
- BCResumeVal->addIncoming(StartIdx, LoopBypassBlocks[0]);
- OrigPhi->setIncomingValue(BlockIdx, BCTruncResumeVal);
- } else {
- BCResumeVal->addIncoming(II.getStartValue(), LoopBypassBlocks[0]);
- OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
- }
+ BCResumeVal->addIncoming(II.getStartValue(), LoopBypassBlocks[0]);
+ OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
}
// If we are generating a new induction variable then we also need to
@@ -3526,20 +3461,15 @@ void InnerLoopVectorizer::widenPHIInstru
llvm_unreachable("Unknown induction");
case InductionDescriptor::IK_IntInduction: {
assert(P->getType() == II.getStartValue()->getType() && "Types must match");
- Type *PhiTy = P->getType();
- Value *Broadcasted;
- if (P == OldInduction) {
- // Handle the canonical induction variable. We might have had to
- // extend the type.
- Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
- } else {
- // Handle other induction variables that are now based on the
- // canonical one.
- auto *V = Builder.CreateSExtOrTrunc(Induction, PhiTy);
- Broadcasted = II.transform(Builder, V);
- Broadcasted->setName("offset.idx");
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ Value *V = Induction;
+ if (P != OldInduction) {
+ V = Builder.CreateSExtOrTrunc(Induction, P->getType());
+ V = II.transform(Builder, V);
+ V->setName("offset.idx");
}
- Broadcasted = getBroadcastInstrs(Broadcasted);
+ Value *Broadcasted = getBroadcastInstrs(V);
// After broadcasting the induction variable we need to make the vector
// consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part)
@@ -3550,17 +3480,15 @@ void InnerLoopVectorizer::widenPHIInstru
// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");
// This is the normalized GEP that starts counting at zero.
- Value *NormalizedIdx =
- Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
- NormalizedIdx =
- Builder.CreateSExtOrTrunc(NormalizedIdx, II.getStepValue()->getType());
+ Value *PtrInd = Induction;
+ PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
// This is the vector of results. Notice that we don't generate
// vector geps because scalar geps result in better code.
for (unsigned part = 0; part < UF; ++part) {
if (VF == 1) {
int EltIndex = part;
- Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
- Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+ Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+ Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
SclrGep->setName("next.gep");
Entry[part] = SclrGep;
@@ -3570,8 +3498,8 @@ void InnerLoopVectorizer::widenPHIInstru
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
for (unsigned int i = 0; i < VF; ++i) {
int EltIndex = i + part * VF;
- Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
- Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+ Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+ Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
SclrGep->setName("next.gep");
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
@@ -4239,6 +4167,12 @@ bool LoopVectorizationLegality::canVecto
}
}
+ // Now we know the widest induction type, check if our found induction
+ // is the same size. If it's not, unset it here and InnerLoopVectorizer
+ // will create another.
+ if (Induction && WidestIndTy != Induction->getType())
+ Induction = nullptr;
+
return true;
}
Added: llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll?rev=246631&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll Wed Sep 2 05:15:05 2015
@@ -0,0 +1,34 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; This testcase causes SCEV to return a pointer-typed exit value.
+
+; CHECK: @f
+; Expect that the pointer indvar has been converted into an integer indvar.
+; CHECK: %index.next = add i64 %index, 4
+define i32 @f(i32* readonly %a, i32* readnone %b) #0 {
+entry:
+ %cmp.6 = icmp ult i32* %a, %b
+ br i1 %cmp.6, label %while.body.preheader, label %while.end
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ]
+ %acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+ %incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1
+ %0 = load i32, i32* %incdec.ptr8, align 1
+ %add = add nuw nsw i32 %0, %acc.07
+ %exitcond = icmp eq i32* %incdec.ptr8, %b
+ br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
+
+while.cond.while.end_crit_edge: ; preds = %while.body
+ %add.lcssa = phi i32 [ %add, %while.body ]
+ br label %while.end
+
+while.end: ; preds = %while.cond.while.end_crit_edge, %entry
+ %acc.0.lcssa = phi i32 [ %add.lcssa, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+ ret i32 %acc.0.lcssa
+}
\ No newline at end of file
More information about the llvm-commits
mailing list