[llvm] r268147 - Reroll loops with multiple IV and negative step part 3

Lawrence Hu via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 29 17:51:22 PDT 2016


Author: lawrence
Date: Fri Apr 29 19:51:22 2016
New Revision: 268147

URL: http://llvm.org/viewvc/llvm-project?rev=268147&view=rev
Log:
    Reroll loops with multiple IV and negative step part 3
            support multiple induction variables

    This patch enable loop reroll for the following case:
        for(int i=0;  i<N; i += 2) {
           S += *a++;
           S += *a++;
        };

Differential Revision: http://reviews.llvm.org/D16550


Added:
    llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp?rev=268147&r1=268146&r2=268147&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp Fri Apr 29 19:51:22 2016
@@ -163,6 +163,9 @@ namespace {
 
     // Map between induction variable and its increment
     DenseMap<Instruction *, int64_t> IVToIncMap;
+    // For loop with multiple induction variable, remember the one used only to
+    // control the loop.
+    Instruction *LoopControlIV;
 
     // A chain of isomorphic instructions, identified by a single-use PHI
     // representing a reduction. Only the last value may be used outside the
@@ -350,9 +353,11 @@ namespace {
                      ScalarEvolution *SE, AliasAnalysis *AA,
                      TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
                      bool PreserveLCSSA,
-                     DenseMap<Instruction *, int64_t> &IncrMap)
+                     DenseMap<Instruction *, int64_t> &IncrMap,
+                     Instruction *LoopCtrlIV)
           : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
-            PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
+            PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
+            LoopControlIV(LoopCtrlIV) {}
 
       /// Stage 1: Find all the DAG roots for the induction variable.
       bool findRoots();
@@ -391,6 +396,7 @@ namespace {
                           UsesTy::iterator Start,
                           UsesTy::iterator End);
       void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
+      void updateNonLoopCtrlIncr();
 
       LoopReroll *Parent;
 
@@ -421,8 +427,18 @@ namespace {
       UsesTy Uses;
       // Map between induction variable and its increment
       DenseMap<Instruction *, int64_t> &IVToIncMap;
+      Instruction *LoopControlIV;
     };
 
+    // Check if it is a compare-like instruction whose user is a branch
+    bool isCompareUsedByBranch(Instruction *I) {
+      auto *TI = I->getParent()->getTerminator();
+      if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
+        return false;
+      return I->hasOneUse() && TI->getOperand(0) == I;
+    };
+
+    bool isLoopControlIV(Loop *L, Instruction *IV);
     void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
     void collectPossibleReductions(Loop *L,
            ReductionTracker &Reductions);
@@ -494,6 +510,60 @@ static const SCEVConstant *getIncrmentFa
   return CIncSCEV;
 }
 
+// Check if an IV is only used to control the loop. There are two cases:
+// 1. It only has one use which is loop increment, and the increment is only
+// used by comparison and the PHI, and the comparison is only used by branch.
+// 2. It is used by loop increment and the comparison, the loop increment is
+// only used by the PHI, and the comparison is used only by the branch.
+bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
+
+  unsigned IVUses = IV->getNumUses();
+  if (IVUses != 2 && IVUses != 1)
+    return false;
+
+  for (auto *User : IV->users()) {
+    int32_t IncOrCmpUses = User->getNumUses();
+    bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
+
+    // User can only have one or two uses.
+    if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
+      return false;
+
+    // Case 1
+    if (IVUses == 1) {
+      // The only user must be the loop increment.
+      // The loop increment must have two uses.
+      if (IsCompInst || IncOrCmpUses != 2)
+        return false;
+    }
+
+    // Case 2
+    if (IVUses == 2 && IncOrCmpUses != 1)
+      return false;
+
+    // The users of the IV must be a binary operation or a comparison
+    if (auto *BO = dyn_cast<BinaryOperator>(User)) {
+      if (BO->getOpcode() == Instruction::Add) {
+        // Loop Increment
+        // User of Loop Increment should be either PHI or CMP
+        for (auto *UU : User->users()) {
+          if (PHINode *PN = dyn_cast<PHINode>(UU)) {
+            if (PN != IV)
+              return false;
+          }
+          // Must be a CMP
+          else if (!isCompareUsedByBranch(dyn_cast<Instruction>(UU)))
+            return false;
+        }
+      } else
+        return false;
+      // Compare : can only have one use, and must be branch
+    } else if (!IsCompInst)
+      return false;
+  }
+  return true;
+}
+
 // Collect the list of loop induction variables with respect to which it might
 // be possible to reroll the loop.
 void LoopReroll::collectPossibleIVs(Loop *L,
@@ -525,7 +595,14 @@ void LoopReroll::collectPossibleIVs(Loop
         IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
         DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
                      << "\n");
-        PossibleIVs.push_back(&*I);
+
+        if (isLoopControlIV(L, &*I)) {
+          assert(!LoopControlIV && "Found two loop control only IV");
+          LoopControlIV = &(*I);
+          DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = "
+                       << *PHISCEV << "\n");
+        } else
+          PossibleIVs.push_back(&*I);
       }
     }
   }
@@ -1072,6 +1149,28 @@ bool LoopReroll::DAGRootTracker::validat
     Uses[I].set(IL_All);
   }
 
+  // Make sure we mark loop-control-only PHIs as used in all iterations. See
+  // comment above LoopReroll::isLoopControlIV for more information.
+  BasicBlock *Header = L->getHeader();
+  if (LoopControlIV && LoopControlIV != IV) {
+    for (auto *U : LoopControlIV->users()) {
+      Instruction *IVUser = dyn_cast<Instruction>(U);
+      // IVUser could be loop increment or compare
+      Uses[IVUser].set(IL_All);
+      for (auto *UU : IVUser->users()) {
+        Instruction *UUser = dyn_cast<Instruction>(UU);
+        // UUser could be compare, PHI or branch
+        Uses[UUser].set(IL_All);
+        // Is UUser a compare instruction?
+        if (UU->hasOneUse()) {
+          Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
+          if (BI == cast<BranchInst>(Header->getTerminator()))
+            Uses[BI].set(IL_All);
+        }
+      }
+    }
+  }
+
   // Make sure all instructions in the loop are in one and only one
   // set.
   for (auto &KV : Uses) {
@@ -1314,25 +1413,65 @@ void LoopReroll::DAGRootTracker::replace
     ++J;
   }
 
-  // We need to create a new induction variable for each different BaseInst.
-  for (auto &DRS : RootSets)
-    // Insert the new induction variable.
-    replaceIV(DRS.BaseInst, IV, IterCount);
+  bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;
+
+  if (HasTwoIVs) {
+    updateNonLoopCtrlIncr();
+    replaceIV(LoopControlIV, LoopControlIV, IterCount);
+  } else
+    // We need to create a new induction variable for each different BaseInst.
+    for (auto &DRS : RootSets)
+      // Insert the new induction variable.
+      replaceIV(DRS.BaseInst, IV, IterCount);
 
   SimplifyInstructionsInBlock(Header, TLI);
   DeleteDeadPHIs(Header, TLI);
 }
 
+// For non-loop-control IVs, we only need to update the last increment
+// with right amount, then we are done.
+void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
+  const SCEV *NewInc = nullptr;
+  for (auto *LoopInc : LoopIncs) {
+    GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LoopInc);
+    const SCEVConstant *COp = nullptr;
+    if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
+      COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
+    } else {
+      COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(0)));
+      if (!COp)
+        COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
+    }
+
+    assert(COp && "Didn't find constant operand of LoopInc!\n");
+
+    const APInt &AInt = COp->getValue()->getValue();
+    const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
+    if (AInt.isNegative()) {
+      NewInc = SE->getNegativeSCEV(COp);
+      NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
+      NewInc = SE->getNegativeSCEV(NewInc);
+    } else
+      NewInc = SE->getUDivExpr(COp, ScaleSCEV);
+
+    LoopInc->setOperand(1, dyn_cast<SCEVConstant>(NewInc)->getValue());
+  }
+}
+
 void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
                                            Instruction *InstIV,
                                            const SCEV *IterCount) {
   BasicBlock *Header = L->getHeader();
   int64_t Inc = IVToIncMap[InstIV];
-  bool Negative = Inc < 0;
+  bool NeedNewIV = InstIV == LoopControlIV;
+  bool Negative = !NeedNewIV && Inc < 0;
 
   const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(Inst));
   const SCEV *Start = RealIVSCEV->getStart();
 
+  if (NeedNewIV)
+    Start = SE->getConstant(Start->getType(), 0);
+
   const SCEV *SizeOfExpr = nullptr;
   const SCEV *IncrExpr =
       SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
@@ -1360,6 +1499,12 @@ void LoopReroll::DAGRootTracker::replace
       if (Uses[BI].find_first() == IL_All) {
         const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
 
+        if (NeedNewIV)
+          ICSCEV = SE->getMulExpr(IterCount,
+                                  SE->getConstant(IterCount->getType(), Scale));
+        else
+          ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+
         // Iteration count SCEV minus or plus 1
         const SCEV *MinusPlus1SCEV =
             SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
@@ -1514,7 +1659,7 @@ bool LoopReroll::reroll(Instruction *IV,
                         const SCEV *IterCount,
                         ReductionTracker &Reductions) {
   DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
-                          IVToIncMap);
+                          IVToIncMap, LoopControlIV);
 
   if (!DAGRoots.findRoots())
     return false;
@@ -1566,6 +1711,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPa
   // reroll (there may be several possible options).
   SmallInstructionVector PossibleIVs;
   IVToIncMap.clear();
+  LoopControlIV = nullptr;
   collectPossibleIVs(L, PossibleIVs);
 
   if (PossibleIVs.empty()) {

Added: llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll?rev=268147&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll (added)
+++ llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll Fri Apr 29 19:51:22 2016
@@ -0,0 +1,134 @@
+; RUN: opt -S  -loop-reroll   %s | FileCheck %s
+declare i32 @goo(i32, i32)
+
+ at buf = external global i8*
+ at aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1
+
+define i32 @test1(i32 %len) {
+entry:
+  br label %while.body
+
+while.body:
+;CHECK-LABEL: while.body:
+;CHECK-NEXT:    %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %entry ]
+;CHECK-NEXT:    %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
+;CHECK-NEXT:    %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ]
+;CHECK-NEXT:    [[T2:%[0-9]+]] = load i8, i8* %buf.021, align 1
+;CHECK-NEXT:    %conv = zext i8 [[T2]] to i64
+;CHECK-NEXT:    %add = add i64 %conv, %sum44.020
+;CHECK-NEXT:    %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 1
+;CHECK-NEXT:    %indvar.next = add i32 %indvar, 1
+;CHECK-NEXT:    %exitcond = icmp eq i32 %indvar, 1
+;CHECK-NEXT:    br i1 %exitcond, label %while.end, label %while.body
+
+  %dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ]
+  %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
+  %sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ]
+  %0 = load i8, i8* %buf.021, align 1
+  %conv = zext i8 %0 to i64
+  %add = add i64 %conv, %sum44.020
+  %arrayidx1 = getelementptr inbounds i8, i8* %buf.021, i64 1
+  %1 = load i8, i8* %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i64
+  %add3 = add i64 %add, %conv2
+  %arrayidx4 = getelementptr inbounds i8, i8* %buf.021, i64 2
+  %2 = load i8, i8* %arrayidx4, align 1
+  %conv5 = zext i8 %2 to i64
+  %add6 = add i64 %add3, %conv5
+  %arrayidx7 = getelementptr inbounds i8, i8* %buf.021, i64 3
+  %3 = load i8, i8* %arrayidx7, align 1
+  %conv8 = zext i8 %3 to i64
+  %add9 = add i64 %add6, %conv8
+  %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 4
+  %dec = add nsw i32 %dec22, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body
+  %conv11 = trunc i64 %add9 to i32
+  %call = tail call i32 @goo(i32 0, i32 %conv11)
+  unreachable
+}
+
+define i32 @test2(i32 %N, i32* nocapture readonly %a, i32 %S) {
+entry:
+  %cmp.9 = icmp sgt i32 %N, 0
+  br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:
+  br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %S.addr.0.lcssa = phi i32 [ %add2, %for.cond.for.cond.cleanup_crit_edge ], [ %S, %entry ]
+  ret i32 %S.addr.0.lcssa
+
+for.body:
+;CHECK-LABEL: for.body:
+;CHECK-NEXT:    %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
+;CHECK-NEXT:    %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ]
+;CHECK-NEXT:    %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
+;CHECK-NEXT:    %4 = load i32, i32* %a.addr.010, align 4
+;CHECK-NEXT:    %add = add nsw i32 %4, %S.addr.011
+;CHECK-NEXT:    %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 1
+;CHECK-NEXT:    %indvar.next = add i32 %indvar, 1
+;CHECK-NEXT:    %exitcond = icmp eq i32 %indvar, %3
+;CHECK-NEXT:    br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+
+  %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
+  %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ]
+  %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %a.addr.010, i64 1
+  %0 = load i32, i32* %a.addr.010, align 4
+  %add = add nsw i32 %0, %S.addr.011
+  %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 2
+  %1 = load i32, i32* %incdec.ptr, align 4
+  %add2 = add nsw i32 %add, %1
+  %add3 = add nsw i32 %i.012, 2
+  %cmp = icmp slt i32 %add3, %N
+  br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
+}
+
+define i32 @test3(i32* nocapture readonly %buf, i32 %len) #0 {
+entry:
+  %cmp10 = icmp sgt i32 %len, 1
+  br i1 %cmp10, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+;CHECK-LABEL: while.body:
+;CHECK-NEXT:  %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
+;CHECK-NEXT:  %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
+;CHECK-NEXT:  %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
+;CHECK-NEXT:  %4 = load i32, i32* %buf.addr.011, align 4
+;CHECK-NEXT:  %add = add nsw i32 %4, %S.012
+;CHECK-NEXT:  %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
+;CHECK-NEXT:  %indvar.next = add i32 %indvar, 1
+;CHECK-NEXT:  %exitcond = icmp eq i32 %indvar, %3
+;CHECK-NEXT:  br i1 %exitcond, label %while.end.loopexit, label %while.body
+
+  %i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ]
+  %S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
+  %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
+  %0 = load i32, i32* %buf.addr.011, align 4
+  %add = add nsw i32 %0, %S.012
+  %arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add2 = add nsw i32 %add, %1
+  %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -2
+  %sub = add nsw i32 %i.013, -2
+  %cmp = icmp sgt i32 %sub, 1
+  br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2, %while.end.loopexit ]
+  ret i32 %S.0.lcssa
+}
+




More information about the llvm-commits mailing list