[llvm] r201465 - SCEVExpander: Try hard not to create derived induction variables in other loops

Sat Feb 15 09:11:58 PST 2014

Author: arnolds
Date: Sat Feb 15 11:11:56 2014
New Revision: 201465

URL: http://llvm.org/viewvc/llvm-project?rev=201465&view=rev
Log:
SCEVExpander: Try hard not to create derived induction variables in other loops

During LSR of one loop we can run into a situation where we have to expand the
start of a recurrence of a loop induction variable in this loop. This start
value is a value derived of the induction variable of a preceeding loop. SCEV
has cannonicalized this value to a different recurrence than the recurrence of
the preceeding loop's induction variable (the type and/or step direction) has
changed). When we come to instantiate this SCEV we created a second induction
variable in this preceeding loop.  This patch tries to base such derived
induction variables of the preceeding loop's induction variable.

This helps twolf on arm and seems to help scimark2 on x86.

radar://15970709

Added:
    llvm/trunk/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll
Modified:
    llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h
    llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp

Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h?rev=201465&r1=201464&r2=201465&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h (original)
+++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h Sat Feb 15 11:11:56 2014
@@ -260,7 +260,9 @@ namespace llvm {
     PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
                                        const Loop *L,
                                        Type *ExpandTy,
-                                       Type *IntTy);
+                                       Type *IntTy,
+                                       Type *&TruncTy,
+                                       bool &InvertStep);
     Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
                        Type *ExpandTy, Type *IntTy, bool useSubtract);
   };

Modified: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp?rev=201465&r1=201464&r2=201465&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp (original)
+++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp Sat Feb 15 11:11:56 2014
@@ -1017,6 +1017,54 @@ Value *SCEVExpander::expandIVInc(PHINode
   return IncV;
 }
 
+/// \brief Hoist the addrec instruction chain rooted in the loop phi above the
+/// position. This routine assumes that this is possible (has been checked).
+static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
+                           Instruction *Pos, PHINode *LoopPhi) {
+  do {
+    if (DT->dominates(InstToHoist, Pos))
+      break;
+    // Make sure the increment is where we want it. But don't move it
+    // down past a potential existing post-inc user.
+    InstToHoist->moveBefore(Pos);
+    Pos = InstToHoist;
+    InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
+  } while (InstToHoist != LoopPhi);
+}
+
+/// \brief Check whether we can cheaply express the requested SCEV in terms of
+/// the available PHI SCEV by truncation and/or invertion of the step.
+static bool canBeCheaplyTransformed(ScalarEvolution &SE,
+                                    const SCEVAddRecExpr *Phi,
+                                    const SCEVAddRecExpr *Requested,
+                                    bool &InvertStep) {
+  Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
+  Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
+
+  if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
+    return false;
+
+  // Try truncate it if necessary.
+  Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
+  if (!Phi)
+    return false;
+
+  // Check whether truncation will help.
+  if (Phi == Requested) {
+    InvertStep = false;
+    return true;
+  }
+
+  // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
+  if (SE.getAddExpr(Requested->getStart(),
+                    SE.getNegativeSCEV(Requested)) == Phi) {
+    InvertStep = true;
+    return true;
+  }
+
+  return false;
+}
+
 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
 /// the base addrec, which is the addrec without any non-loop-dominating
 /// values, and return the PHI.
@@ -1024,49 +1072,84 @@ PHINode *
 SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
                                         const Loop *L,
                                         Type *ExpandTy,
-                                        Type *IntTy) {
+                                        Type *IntTy,
+                                        Type *&TruncTy,
+                                        bool &InvertStep) {
   assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
 
   // Reuse a previously-inserted PHI, if present.
   BasicBlock *LatchBlock = L->getLoopLatch();
   if (LatchBlock) {
+    PHINode *AddRecPhiMatch = 0;
+    Instruction *IncV = 0;
+    TruncTy = 0;
+    InvertStep = false;
     for (BasicBlock::iterator I = L->getHeader()->begin();
          PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-      if (!SE.isSCEVable(PN->getType()) ||
-          (SE.getEffectiveSCEVType(PN->getType()) !=
-           SE.getEffectiveSCEVType(Normalized->getType())) ||
-          SE.getSCEV(PN) != Normalized)
+      if (!SE.isSCEVable(PN->getType()))
+        continue;
+
+      const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
+      if (!PhiSCEV)
         continue;
 
-      Instruction *IncV =
-        cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+      bool IsMatchingSCEV = PhiSCEV == Normalized;
+      Instruction *TempIncV =
+          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+      // We only handle truncation and inversion of phi recurrences for the
+      // expanded expression if the expanded expression's loop dominates the
+      // loop we insert to. Check now, so we can bail out early.
+      if (!IsMatchingSCEV)
+        if (!IVIncInsertLoop ||
+            !SE.DT->properlyDominates(L->getHeader(),
+                                      IVIncInsertLoop->getHeader()))
+          continue;
 
+      // Check whether we can reuse this PHI node.
       if (LSRMode) {
-        if (!isExpandedAddRecExprPHI(PN, IncV, L))
+        if (!isExpandedAddRecExprPHI(PN, TempIncV, L))
           continue;
-        if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos))
+        if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
           continue;
-      }
-      else {
-        if (!isNormalAddRecExprPHI(PN, IncV, L))
+      } else {
+        if (!isNormalAddRecExprPHI(PN, TempIncV, L))
           continue;
-        if (L == IVIncInsertLoop)
-          do {
-            if (SE.DT->dominates(IncV, IVIncInsertPos))
-              break;
-            // Make sure the increment is where we want it. But don't move it
-            // down past a potential existing post-inc user.
-            IncV->moveBefore(IVIncInsertPos);
-            IVIncInsertPos = IncV;
-            IncV = cast<Instruction>(IncV->getOperand(0));
-          } while (IncV != PN);
       }
+
+      // Stop if we have found an exact match SCEV.
+      if (IsMatchingSCEV) {
+        IncV = TempIncV;
+        TruncTy = 0;
+        InvertStep = false;
+        AddRecPhiMatch = PN;
+        break;
+      }
+
+      // Try whether the phi can be translated into the requested form
+      // (truncated and/or offset by a constant).
+      if ((!TruncTy || InvertStep) &&
+          canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
+        // Record the phi node. But don't stop we might find an exact match
+        // later.
+        AddRecPhiMatch = PN;
+        IncV = TempIncV;
+        TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
+      }
+    }
+
+    if (AddRecPhiMatch) {
+      // Potentially, move the increment. We have made sure in
+      // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
+      if (L == IVIncInsertLoop)
+        hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
+
       // Ok, the add recurrence looks usable.
       // Remember this PHI, even in post-inc mode.
-      InsertedValues.insert(PN);
+      InsertedValues.insert(AddRecPhiMatch);
       // Remember the increment.
       rememberInstruction(IncV);
-      return PN;
+      return AddRecPhiMatch;
     }
   }
 
@@ -1191,7 +1274,12 @@ Value *SCEVExpander::expandAddRecExprLit
   // Expand the core addrec. If we need post-loop scaling, force it to
   // expand to an integer type to avoid the need for additional casting.
   Type *ExpandTy = PostLoopScale ? IntTy : STy;
-  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
+  // In some cases, we decide to reuse an existing phi node but need to truncate
+  // it and/or invert the step.
+  Type *TruncTy = 0;
+  bool InvertStep = false;
+  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy,
+                                          TruncTy, InvertStep);
 
   // Accommodate post-inc mode, if necessary.
   Value *Result;
@@ -1232,6 +1320,20 @@ Value *SCEVExpander::expandAddRecExprLit
     }
   }
 
+  // We have decided to reuse an induction variable of a dominating loop. Apply
+  // truncation and/or invertion of the step.
+  if (TruncTy) {
+    if (Result->getType() != TruncTy) {
+      Result = Builder.CreateTrunc(Result, TruncTy);
+      rememberInstruction(Result);
+    }
+    if (InvertStep) {
+      Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy),
+                                 Result);
+      rememberInstruction(Result);
+    }
+  }
+
   // Re-apply any non-loop-dominating scale.
   if (PostLoopScale) {
     assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll?rev=201465&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll Sat Feb 15 11:11:56 2014
@@ -0,0 +1,50 @@
+; RUN: opt -S -loop-reduce -mcpu=corei7-avx -mtriple=x86_64-apple-macosx < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @indvar_expansion(i8* nocapture readonly %rowsptr) {
+entry:
+  br label %for.cond
+
+; SCEVExpander used to create induction variables in the loop %for.cond while
+; expanding the recurrence start value of loop strength reduced values from
+; %vector.body.
+
+; CHECK-LABEL: indvar_expansion
+; CHECK: for.cond:
+; CHECK-NOT: phi i3
+; CHECK: br i1 {{.+}}, label %for.cond
+
+for.cond:
+  %indvars.iv44 = phi i64 [ %indvars.iv.next45, %for.cond ], [ 0, %entry ]
+  %cmp = icmp eq i8 undef, 0
+  %indvars.iv.next45 = add nuw nsw i64 %indvars.iv44, 1
+  br i1 %cmp, label %for.cond, label %for.cond2
+
+for.cond2:
+  br i1 undef, label %for.cond2, label %for.body14.lr.ph
+
+for.body14.lr.ph:
+  %sext = shl i64 %indvars.iv44, 32
+  %0 = ashr exact i64 %sext, 32
+  %1 = sub i64 undef, %indvars.iv44
+  %2 = and i64 %1, 4294967295
+  %3 = add i64 %2, 1
+  %fold = add i64 %1, 1
+  %n.mod.vf = and i64 %fold, 7
+  %n.vec = sub i64 %3, %n.mod.vf
+  %end.idx.rnd.down = add i64 %n.vec, %0
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ]
+  %4 = getelementptr inbounds i8* %rowsptr, i64 %index
+  %5 = bitcast i8* %4 to <4 x i8>*
+  %wide.load = load <4 x i8>* %5, align 1
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, %end.idx.rnd.down
+  br i1 %6, label %for.end24, label %vector.body
+
+for.end24:
+  ret void
+}