[llvm-commits] [llvm] r145906 - in /llvm/trunk: lib/Transforms/Scalar/LoopStrengthReduce.cpp test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll

Andrew Trick atrick at apple.com
Mon Dec 5 19:13:32 PST 2011


Author: atrick
Date: Mon Dec  5 21:13:31 2011
New Revision: 145906

URL: http://llvm.org/viewvc/llvm-project?rev=145906&view=rev
Log:
LSR: prune undesirable formulae early.

It's always good to prune early, but formulae that are unsatisfactory
in their own right need to be removed before running any other pruning
heuristics. We easily avoid generating such formulae, but we need them
as an intermediate basis for forming other good formulae.

Added:
    llvm/trunk/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=145906&r1=145905&r2=145906&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec  5 21:13:31 2011
@@ -634,6 +634,19 @@
   return AccessTy;
 }
 
+/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+  for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    if (SE.isSCEVable(PN->getType()) &&
+        (SE.getEffectiveSCEVType(PN->getType()) ==
+         SE.getEffectiveSCEVType(AR->getType())) &&
+        SE.getSCEV(PN) == AR)
+      return true;
+  }
+  return false;
+}
+
 /// DeleteTriviallyDeadInstructions - If any of the instructions is the
 /// specified set are trivially dead, delete them and see if this makes any of
 /// their operands subsequently dead.
@@ -703,7 +716,8 @@
                    const DenseSet<const SCEV *> &VisitedRegs,
                    const Loop *L,
                    const SmallVectorImpl<int64_t> &Offsets,
-                   ScalarEvolution &SE, DominatorTree &DT);
+                   ScalarEvolution &SE, DominatorTree &DT,
+                   SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
 
   void print(raw_ostream &OS) const;
   void dump() const;
@@ -716,7 +730,8 @@
   void RatePrimaryRegister(const SCEV *Reg,
                            SmallPtrSet<const SCEV *, 16> &Regs,
                            const Loop *L,
-                           ScalarEvolution &SE, DominatorTree &DT);
+                           ScalarEvolution &SE, DominatorTree &DT,
+                           SmallPtrSet<const SCEV *, 16> *LoserRegs);
 };
 
 }
@@ -736,18 +751,13 @@
     // on other loops, and cannot be expected to change sibling loops. If the
     // AddRec exists, consider it's register free and leave it alone. Otherwise,
     // do not consider this formula at all.
-    // FIXME: why do we need to generate such fomulae?
     else if (!EnableNested || L->contains(AR->getLoop()) ||
              (!AR->getLoop()->contains(L) &&
               DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
-      for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
-           PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-        if (SE.isSCEVable(PN->getType()) &&
-            (SE.getEffectiveSCEVType(PN->getType()) ==
-             SE.getEffectiveSCEVType(AR->getType())) &&
-            SE.getSCEV(PN) == AR)
-          return;
-      }
+      if (isExistingPhi(AR, SE))
+        return;
+
+      // For !EnableNested, never rewrite IVs in other loops.
       if (!EnableNested) {
         Loose();
         return;
@@ -789,13 +799,22 @@
 }
 
 /// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it.
+/// before, rate it. Optional LoserRegs provides a way to declare any formula
+/// that refers to one of those regs an instant loser.
 void Cost::RatePrimaryRegister(const SCEV *Reg,
                                SmallPtrSet<const SCEV *, 16> &Regs,
                                const Loop *L,
-                               ScalarEvolution &SE, DominatorTree &DT) {
-  if (Regs.insert(Reg))
+                               ScalarEvolution &SE, DominatorTree &DT,
+                               SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+  if (LoserRegs && LoserRegs->count(Reg)) {
+    Loose();
+    return;
+  }
+  if (Regs.insert(Reg)) {
     RateRegister(Reg, Regs, L, SE, DT);
+    if (isLoser())
+      LoserRegs->insert(Reg);
+  }
 }
 
 void Cost::RateFormula(const Formula &F,
@@ -803,14 +822,15 @@
                        const DenseSet<const SCEV *> &VisitedRegs,
                        const Loop *L,
                        const SmallVectorImpl<int64_t> &Offsets,
-                       ScalarEvolution &SE, DominatorTree &DT) {
+                       ScalarEvolution &SE, DominatorTree &DT,
+                       SmallPtrSet<const SCEV *, 16> *LoserRegs) {
   // Tally up the registers.
   if (const SCEV *ScaledReg = F.ScaledReg) {
     if (VisitedRegs.count(ScaledReg)) {
       Loose();
       return;
     }
-    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
     if (isLoser())
       return;
   }
@@ -821,7 +841,7 @@
       Loose();
       return;
     }
-    RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+    RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
     if (isLoser())
       return;
   }
@@ -1103,7 +1123,6 @@
   Formulae.push_back(F);
 
   // Record registers now being used by this use.
-  if (F.ScaledReg) Regs.insert(F.ScaledReg);
   Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
 
   return true;
@@ -1114,7 +1133,6 @@
   if (&F != &Formulae.back())
     std::swap(F, Formulae.back());
   Formulae.pop_back();
-  assert(!Formulae.empty() && "LSRUse has no formulae left!");
 }
 
 /// RecomputeRegs - Recompute the Regs field, and update RegUses.
@@ -2912,6 +2930,7 @@
 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
   DenseSet<const SCEV *> VisitedRegs;
   SmallPtrSet<const SCEV *, 16> Regs;
+  SmallPtrSet<const SCEV *, 16> LoserRegs;
 #ifndef NDEBUG
   bool ChangedFormulae = false;
 #endif
@@ -2931,46 +2950,66 @@
          FIdx != NumForms; ++FIdx) {
       Formula &F = LU.Formulae[FIdx];
 
-      SmallVector<const SCEV *, 2> Key;
-      for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
-           JE = F.BaseRegs.end(); J != JE; ++J) {
-        const SCEV *Reg = *J;
-        if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
-          Key.push_back(Reg);
-      }
-      if (F.ScaledReg &&
-          RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
-        Key.push_back(F.ScaledReg);
-      // Unstable sort by host order ok, because this is only used for
-      // uniquifying.
-      std::sort(Key.begin(), Key.end());
-
-      std::pair<BestFormulaeTy::const_iterator, bool> P =
-        BestFormulae.insert(std::make_pair(Key, FIdx));
-      if (!P.second) {
+      // Some formulas are instant losers. For example, they may depend on
+      // nonexistent AddRecs from other loops. These need to be filtered
+      // immediately, otherwise heuristics could choose them over others leading
+      // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
+      // avoids the need to recompute this information across formulae using the
+      // same bad AddRec. Passing LoserRegs is also essential unless we remove
+      // the corresponding bad register from the Regs set.
+      Cost CostF;
+      Regs.clear();
+      CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+                        &LoserRegs);
+      if (CostF.isLoser()) {
+        // During initial formula generation, undesirable formulae are generated
+        // by uses within other loops that have some non-trivial address mode or
+        // use the postinc form of the IV. LSR needs to provide these formulae
+        // as the basis of rediscovering the desired formula that uses an AddRec
+        // corresponding to the existing phi. Once all formulae have been
+        // generated, these initial losers may be pruned.
+        DEBUG(dbgs() << "  Filtering loser "; F.print(dbgs());
+              dbgs() << "\n");
+      }
+      else {
+        SmallVector<const SCEV *, 2> Key;
+        for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+               JE = F.BaseRegs.end(); J != JE; ++J) {
+          const SCEV *Reg = *J;
+          if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+            Key.push_back(Reg);
+        }
+        if (F.ScaledReg &&
+            RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+          Key.push_back(F.ScaledReg);
+        // Unstable sort by host order ok, because this is only used for
+        // uniquifying.
+        std::sort(Key.begin(), Key.end());
+
+        std::pair<BestFormulaeTy::const_iterator, bool> P =
+          BestFormulae.insert(std::make_pair(Key, FIdx));
+        if (P.second)
+          continue;
+
         Formula &Best = LU.Formulae[P.first->second];
 
-        Cost CostF;
-        CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
-        Regs.clear();
         Cost CostBest;
-        CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
         Regs.clear();
+        CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
         if (CostF < CostBest)
           std::swap(F, Best);
         DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
               dbgs() << "\n"
                         "    in favor of formula "; Best.print(dbgs());
               dbgs() << '\n');
+      }
 #ifndef NDEBUG
-        ChangedFormulae = true;
+      ChangedFormulae = true;
 #endif
-        LU.DeleteFormula(F);
-        --FIdx;
-        --NumForms;
-        Any = true;
-        continue;
-      }
+      LU.DeleteFormula(F);
+      --FIdx;
+      --NumForms;
+      Any = true;
     }
 
     // Now that we've filtered out some formulae, recompute the Regs set.

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll?rev=145906&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll Mon Dec  5 21:13:31 2011
@@ -0,0 +1,96 @@
+; RUN: llc < %s | FileCheck %s
+;
+; Test LSR's ability to prune formulae that refer to nonexistant
+; AddRecs in other loops.
+;
+; Unable to reduce this case further because it requires LSR to exceed
+; ComplexityLimit.
+;
+; We really just want to ensure that LSR can process this loop without
+; finding an unsatisfactory solution and bailing out. I've added
+; dummyout, an obvious candidate for postinc replacement so we can
+; verify that LSR removes it.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; CHECK: @test
+; CHECK: # %for.body{{$}}
+; dummyiv copy should be removed
+; CHECK-NOT: movq
+; CHECK: # %for.cond19.preheader
+; dummycnt should be removed
+; CHECK-NOT: incq
+; CHECK: # %for.body23{{$}}
+define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
+entry:
+  %cmp34 = icmp eq i64 %count, 0
+  br i1 %cmp34, label %for.end29, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %dummyiv = phi i64 [ %dummycnt, %for.body ], [ 0, %entry ]
+  %indvars.iv39 = phi i64 [ %indvars.iv.next40, %for.body ], [ 0, %entry ]
+  %dp.036 = phi i32* [ %add.ptr, %for.body ], [ %destrow, %entry ]
+  %p.035 = phi float* [ %incdec.ptr4, %for.body ], [ %srcrow, %entry ]
+  %incdec.ptr = getelementptr inbounds float* %p.035, i64 1
+  %0 = load float* %incdec.ptr, align 4
+  %incdec.ptr2 = getelementptr inbounds float* %p.035, i64 2
+  %1 = load float* %incdec.ptr2, align 4
+  %incdec.ptr3 = getelementptr inbounds float* %p.035, i64 3
+  %2 = load float* %incdec.ptr3, align 4
+  %incdec.ptr4 = getelementptr inbounds float* %p.035, i64 4
+  %3 = load float* %incdec.ptr4, align 4
+  %4 = load i32* %dp.036, align 4
+  %conv5 = fptoui float %0 to i32
+  %or = or i32 %4, %conv5
+  %arrayidx6 = getelementptr inbounds i32* %dp.036, i64 1
+  %5 = load i32* %arrayidx6, align 4
+  %conv7 = fptoui float %1 to i32
+  %or8 = or i32 %5, %conv7
+  %arrayidx9 = getelementptr inbounds i32* %dp.036, i64 2
+  %6 = load i32* %arrayidx9, align 4
+  %conv10 = fptoui float %2 to i32
+  %or11 = or i32 %6, %conv10
+  %arrayidx12 = getelementptr inbounds i32* %dp.036, i64 3
+  %7 = load i32* %arrayidx12, align 4
+  %conv13 = fptoui float %3 to i32
+  %or14 = or i32 %7, %conv13
+  store i32 %or, i32* %dp.036, align 4
+  store i32 %or8, i32* %arrayidx6, align 4
+  store i32 %or11, i32* %arrayidx9, align 4
+  store i32 %or14, i32* %arrayidx12, align 4
+  %add.ptr = getelementptr inbounds i32* %dp.036, i64 4
+  %indvars.iv.next40 = add i64 %indvars.iv39, 4
+  %dummycnt = add i64 %dummyiv, 1
+  %cmp = icmp ult i64 %indvars.iv.next40, %count
+  br i1 %cmp, label %for.body, label %for.cond19.preheader
+
+for.cond19.preheader:                             ; preds = %for.body
+  %dummyout = add i64 %dummyiv, 1
+  %rem = and i64 %count, 3
+  %cmp2130 = icmp eq i64 %rem, 0
+  br i1 %cmp2130, label %for.end29, label %for.body23.lr.ph
+
+for.body23.lr.ph:                                 ; preds = %for.cond19.preheader
+  %8 = and i64 %count, 3
+  br label %for.body23
+
+for.body23:                                       ; preds = %for.body23, %for.body23.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body23.lr.ph ], [ %indvars.iv.next, %for.body23 ]
+  %dp.132 = phi i32* [ %add.ptr, %for.body23.lr.ph ], [ %incdec.ptr28, %for.body23 ]
+  %p.131 = phi float* [ %incdec.ptr4, %for.body23.lr.ph ], [ %incdec.ptr24, %for.body23 ]
+  %incdec.ptr24 = getelementptr inbounds float* %p.131, i64 1
+  %9 = load float* %incdec.ptr24, align 4
+  %10 = load i32* %dp.132, align 4
+  %conv25 = fptoui float %9 to i32
+  %or26 = or i32 %10, %conv25
+  store i32 %or26, i32* %dp.132, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %incdec.ptr28 = getelementptr inbounds i32* %dp.132, i64 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %8
+  br i1 %exitcond, label %for.end29, label %for.body23
+
+for.end29:                                        ; preds = %entry, %for.body23, %for.cond19.preheader
+  %result = phi i64 [ 0, %entry ], [ %dummyout, %for.body23 ], [ %dummyout, %for.cond19.preheader ]
+  ret i64 %result
+}





More information about the llvm-commits mailing list