[llvm] r181667 - LoopVectorize: Use the widest induction variable type

Arnold Schwaighofer aschwaighofer at apple.com
Sat May 11 16:04:28 PDT 2013


Author: arnolds
Date: Sat May 11 18:04:28 2013
New Revision: 181667

URL: http://llvm.org/viewvc/llvm-project?rev=181667&view=rev
Log:
LoopVectorize: Use the widest induction variable type

Use the widest induction type encountered for the cannonical induction variable.

We used to turn the following loop into an empty loop because we used i8 as
induction variable type and truncated 1024 to 0 as trip count.

int a[1024];
void fail() {
  int reverse_induction = 1023;
  unsigned char forward_induction = 0;
  while ((reverse_induction) >= 0) {
    forward_induction++;
    a[reverse_induction] = forward_induction;
    --reverse_induction;
  }
}

radar://13862901

Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=181667&r1=181666&r2=181667&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Sat May 11 18:04:28 2013
@@ -312,6 +312,8 @@ private:
   PHINode *Induction;
   /// The induction variable of the old basic block.
   PHINode *OldInduction;
+  /// Holds the extended (to the widest induction type) start index.
+  Value *ExtendedIdx;
   /// Maps scalars to widened vectors.
   ValueMap WidenMap;
 };
@@ -335,7 +337,7 @@ public:
                             DominatorTree *DT, TargetTransformInfo* TTI,
                             AliasAnalysis *AA, TargetLibraryInfo *TLI)
       : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
-        Induction(0), HasFunNoNaNAttr(false) {}
+        Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {}
 
   /// This enum represents the kinds of reductions that we support.
   enum ReductionKind {
@@ -473,6 +475,9 @@ public:
   /// Returns the induction variables found in the loop.
   InductionList *getInductionVars() { return &Inductions; }
 
+  /// Returns the widest induction type.
+  Type *getWidestInductionType() { return WidestIndTy; }
+
   /// Returns True if V is an induction variable in this loop.
   bool isInductionVariable(const Value *V);
 
@@ -579,6 +584,8 @@ private:
   /// Notice that inductions don't need to start at zero and that induction
   /// variables can be pointers.
   InductionList Inductions;
+  /// Holds the widest induction type encountered.
+  Type *WidestIndTy;
 
   /// Allowed outside users. This holds the reduction
   /// vars which can be accessed from outside the loop.
@@ -1243,8 +1250,7 @@ InnerLoopVectorizer::createEmptyLoop(Loo
   // induction variables. In the code below we also support a case where we
   // don't have a single induction variable.
   OldInduction = Legal->getInduction();
-  Type *IdxTy = OldInduction ? OldInduction->getType() :
-  DL->getIntPtrType(SE->getContext());
+  Type *IdxTy = Legal->getWidestInductionType();
 
   // Find the loop boundaries.
   const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch());
@@ -1265,9 +1271,11 @@ InnerLoopVectorizer::createEmptyLoop(Loo
   // The loop index does not have to start at Zero. Find the original start
   // value from the induction PHI node. If we don't have an induction variable
   // then we know that it starts at zero.
-  Value *StartIdx = OldInduction ?
-  OldInduction->getIncomingValueForBlock(BypassBlock):
-  ConstantInt::get(IdxTy, 0);
+  Builder.SetInsertPoint(BypassBlock->getTerminator());
+  Value *StartIdx = ExtendedIdx = OldInduction ?
+    Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock),
+                       IdxTy):
+    ConstantInt::get(IdxTy, 0);
 
   assert(BypassBlock && "Invalid loop structure");
   LoopBypassBlocks.push_back(BypassBlock);
@@ -1366,8 +1374,16 @@ InnerLoopVectorizer::createEmptyLoop(Loo
   for (I = List->begin(), E = List->end(); I != E; ++I) {
     PHINode *OrigPhi = I->first;
     LoopVectorizationLegality::InductionInfo II = I->second;
-    PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
+
+    Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType();
+    PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val",
                                          MiddleBlock->getTerminator());
+    // We might have extended the type of the induction variable but we need a
+    // truncated version for the scalar loop.
+    PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
+      PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
+                      MiddleBlock->getTerminator()) : 0;
+
     Value *EndValue = 0;
     switch (II.IK) {
     case LoopVectorizationLegality::IK_NoInduction:
@@ -1376,6 +1392,17 @@ InnerLoopVectorizer::createEmptyLoop(Loo
       // Handle the integer induction counter:
       assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
       assert(OrigPhi == OldInduction && "Unknown integer PHI");
+      if (OrigPhi == OldInduction) {
+        // Create a truncated version of the resume value for the scalar loop,
+        // we might have promoted the type to a larger width.
+        EndValue =
+          BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
+        // The new PHI merges the original incoming value, in case of a bypass,
+        // or the value at the end of the vectorized loop.
+        for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+          TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+        TruncResumeVal->addIncoming(EndValue, VecBody);
+      }
       // We know what the end value is.
       EndValue = IdxEndRoundDown;
       // We also know which PHI node holds it.
@@ -1412,13 +1439,21 @@ InnerLoopVectorizer::createEmptyLoop(Loo
 
     // The new PHI merges the original incoming value, in case of a bypass,
     // or the value at the end of the vectorized loop.
-    for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
-      ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+    for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) {
+      if (OrigPhi == OldInduction)
+        ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
+      else
+        ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+    }
     ResumeVal->addIncoming(EndValue, VecBody);
 
     // Fix the scalar body counter (PHI node).
     unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
-    OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
+    // The old inductions phi node in the scalar body needs the truncated value.
+    if (OrigPhi == OldInduction)
+      OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal);
+    else
+      OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
   }
 
   // If we are generating a new induction variable then we also need to
@@ -2022,7 +2057,9 @@ InnerLoopVectorizer::vectorizeBlockInLoo
         llvm_unreachable("Unknown induction");
       case LoopVectorizationLegality::IK_IntInduction: {
         assert(P == OldInduction && "Unexpected PHI");
-        Value *Broadcasted = getBroadcastInstrs(Induction);
+        // We might have had to extend the type.
+        Value *Trunc = Builder.CreateTrunc(Induction, P->getType());
+        Value *Broadcasted = getBroadcastInstrs(Trunc);
         // After broadcasting the induction variable we need to make the
         // vector consecutive by adding 0, 1, 2 ...
         for (unsigned part = 0; part < UF; ++part)
@@ -2033,16 +2070,7 @@ InnerLoopVectorizer::vectorizeBlockInLoo
       case LoopVectorizationLegality::IK_PtrInduction:
       case LoopVectorizationLegality::IK_ReversePtrInduction:
         // Handle reverse integer and pointer inductions.
-        Value *StartIdx = 0;
-        // If we have a single integer induction variable then use it.
-        // Otherwise, start counting at zero.
-        if (OldInduction) {
-          LoopVectorizationLegality::InductionInfo OldII =
-            Legal->getInductionVars()->lookup(OldInduction);
-          StartIdx = OldII.StartValue;
-        } else {
-          StartIdx = ConstantInt::get(Induction->getType(), 0);
-        }
+        Value *StartIdx = ExtendedIdx;
         // This is the normalized GEP that starts counting at zero.
         Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
                                                  "normalized.idx");
@@ -2362,6 +2390,20 @@ bool LoopVectorizationLegality::canVecto
   return true;
 }
 
+static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
+  if (Ty->isPointerTy())
+    return DL.getIntPtrType(Ty->getContext());
+  return Ty;
+}
+
+static Type* getWiderType(DataLayout &DL, Type *Ty0, Type *Ty1) {
+  Ty0 = convertPointerToIntegerType(DL, Ty0);
+  Ty1 = convertPointerToIntegerType(DL, Ty1);
+  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
+    return Ty0;
+  return Ty1;
+}
+
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *PreHeader = TheLoop->getLoopPreheader();
   BasicBlock *Header = TheLoop->getHeader();
@@ -2416,6 +2458,12 @@ bool LoopVectorizationLegality::canVecto
         InductionKind IK = isInductionVariable(Phi);
 
         if (IK_NoInduction != IK) {
+          // Get the widest type.
+          if (!WidestIndTy)
+            WidestIndTy = convertPointerToIntegerType(*DL, PhiTy);
+          else
+            WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
+
           // Int inductions are special because we only allow one IV.
           if (IK == IK_IntInduction) {
             if (Induction) {

Modified: llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll?rev=181667&r1=181666&r2=181667&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll Sat May 11 18:04:28 2013
@@ -77,3 +77,72 @@ loopend:
 }
 
 
+ at a = common global [1024 x i32] zeroinitializer, align 16
+
+; We incorrectly transformed this loop into an empty one because we left the
+; induction variable in i8 type and truncated the exit value 1024 to 0.
+; int a[1024];
+;
+; void fail() {
+;   int reverse_induction = 1023;
+;   unsigned char forward_induction = 0;
+;   while ((reverse_induction) >= 0) {
+;     forward_induction++;
+;     a[reverse_induction] = forward_induction;
+;     --reverse_induction;
+;   }
+; }
+
+; CHECK: reverse_forward_induction_i64_i8
+; CHECK: vector.body
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %normalized.idx = sub i64 %index, 0
+; CHECK: %reverse.idx = sub i64 1023, %normalized.idx
+; CHECK: trunc i64 %index to i8
+
+define void @reverse_forward_induction_i64_i8() {
+entry:
+  br label %while.body
+
+while.body:
+  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ]
+  %forward_induction.05 = phi i8 [ 0, %entry ], [ %inc, %while.body ]
+  %inc = add i8 %forward_induction.05, 1
+  %conv = zext i8 %inc to i32
+  %arrayidx = getelementptr inbounds [1024 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %0 = trunc i64 %indvars.iv to i32
+  %cmp = icmp sgt i32 %0, 0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.end:
+  ret void
+}
+
+; CHECK: reverse_forward_induction_i64_i8_signed
+; CHECK: vector.body:
+; CHECK:  %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK:  %normalized.idx = sub i64 %index, 129
+; CHECK:  %reverse.idx = sub i64 1023, %normalized.idx
+; CHECK:  trunc i64 %index to i8
+
+define void @reverse_forward_induction_i64_i8_signed() {
+entry:
+  br label %while.body
+
+while.body:
+  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ]
+  %forward_induction.05 = phi i8 [ -127, %entry ], [ %inc, %while.body ]
+  %inc = add i8 %forward_induction.05, 1
+  %conv = sext i8 %inc to i32
+  %arrayidx = getelementptr inbounds [1024 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %0 = trunc i64 %indvars.iv to i32
+  %cmp = icmp sgt i32 %0, 0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.end:
+  ret void
+}





More information about the llvm-commits mailing list