[llvm] r228931 - [LoopRerolling] Be more forgiving with instruction order.

Thu Feb 12 07:54:15 PST 2015

Author: jamesm
Date: Thu Feb 12 09:54:14 2015
New Revision: 228931

URL: http://llvm.org/viewvc/llvm-project?rev=228931&view=rev
Log:
[LoopRerolling] Be more forgiving with instruction order.

We can't solve the full subgraph isomorphism problem. But we can
allow obvious cases, where for example two instructions of different
types are out of order. Due to them having different types/opcodes,
there is no ambiguity.

Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp
    llvm/trunk/test/Transforms/LoopReroll/basic.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp?rev=228931&r1=228930&r2=228931&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp Thu Feb 12 09:54:14 2015
@@ -45,6 +45,12 @@ static cl::opt<unsigned>
 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
   cl::desc("The maximum increment for loop rerolling"));
 
+static cl::opt<unsigned>
+NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
+                          cl::Hidden,
+                          cl::desc("The maximum number of failures to tolerate"
+                                   " during fuzzy matching. (default: 400)"));
+
 // This loop re-rolling transformation aims to transform loops like this:
 //
 // int foo(int a);
@@ -394,9 +400,14 @@ namespace {
                                 const SmallInstructionSet &Final,
                                 DenseSet<Instruction *> &Users);
 
-      UsesTy::iterator nextInstr(int Val, UsesTy &In, UsesTy::iterator I);
+      UsesTy::iterator nextInstr(int Val, UsesTy &In,
+                                 const SmallInstructionSet &Exclude,
+                                 UsesTy::iterator *StartI=nullptr);
       bool isBaseInst(Instruction *I);
       bool isRootInst(Instruction *I);
+      bool instrDependsOn(Instruction *I,
+                          UsesTy::iterator Start,
+                          UsesTy::iterator End);
 
       LoopReroll *Parent;
 
@@ -941,10 +952,16 @@ bool LoopReroll::DAGRootTracker::collect
 
 }
 
+/// Get the next instruction in "In" that is a member of set Val.
+/// Start searching from StartI, and do not return anything in Exclude.
+/// If StartI is not given, start from In.begin().
 LoopReroll::DAGRootTracker::UsesTy::iterator
 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
-                                      UsesTy::iterator I) {
-  while (I != In.end() && I->second.test(Val) == 0)
+                                      const SmallInstructionSet &Exclude,
+                                      UsesTy::iterator *StartI) {
+  UsesTy::iterator I = StartI ? *StartI : In.begin();
+  while (I != In.end() && (I->second.test(Val) == 0 ||
+                           Exclude.count(I->first) != 0))
     ++I;
   return I;
 }
@@ -965,6 +982,19 @@ bool LoopReroll::DAGRootTracker::isRootI
   return false;
 }
 
+/// Return true if instruction I depends on any instruction between
+/// Start and End.
+bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
+                                                UsesTy::iterator Start,
+                                                UsesTy::iterator End) {
+  for (auto *U : I->users()) {
+    for (auto It = Start; It != End; ++It)
+      if (U == It->first)
+        return true;
+  }
+  return false;
+}
+
 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
   // We now need to check for equivalence of the use graph of each root with
   // that of the primary induction variable (excluding the roots). Our goal
@@ -1022,8 +1052,9 @@ bool LoopReroll::DAGRootTracker::validat
     DenseMap<Value *, Value *> BaseMap;
 
     // Compare iteration Iter to the base.
-    auto BaseIt = nextInstr(0, Uses, Uses.begin());
-    auto RootIt = nextInstr(Iter, Uses, Uses.begin());
+    SmallInstructionSet Visited;
+    auto BaseIt = nextInstr(0, Uses, Visited);
+    auto RootIt = nextInstr(Iter, Uses, Visited);
     auto LastRootIt = Uses.begin();
 
     while (BaseIt != Uses.end() && RootIt != Uses.end()) {
@@ -1033,20 +1064,58 @@ bool LoopReroll::DAGRootTracker::validat
       // Skip over the IV or root instructions; only match their users.
       bool Continue = false;
       if (isBaseInst(BaseInst)) {
-        BaseIt = nextInstr(0, Uses, ++BaseIt);
+        Visited.insert(BaseInst);
+        BaseIt = nextInstr(0, Uses, Visited);
         Continue = true;
       }
       if (isRootInst(RootInst)) {
         LastRootIt = RootIt;
-        RootIt = nextInstr(Iter, Uses, ++RootIt);
+        Visited.insert(RootInst);
+        RootIt = nextInstr(Iter, Uses, Visited);
         Continue = true;
       }
       if (Continue) continue;
 
+      if (!BaseInst->isSameOperationAs(RootInst)) {
+        // Last chance saloon. We don't try and solve the full isomorphism
+        // problem, but try and at least catch the case where two instructions
+        // *of different types* are round the wrong way. We won't be able to
+        // efficiently tell, given two ADD instructions, which way around we
+        // should match them, but given an ADD and a SUB, we can at least infer
+        // which one is which.
+        //
+        // This should allow us to deal with a greater subset of the isomorphism
+        // problem. It does however change a linear algorithm into a quadratic
+        // one, so limit the number of probes we do.
+        auto TryIt = RootIt;
+        unsigned N = NumToleratedFailedMatches;
+        while (TryIt != Uses.end() &&
+               !BaseInst->isSameOperationAs(TryIt->first) &&
+               N--) {
+          ++TryIt;
+          TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
+        }
+
+        if (TryIt == Uses.end() || TryIt == RootIt ||
+            instrDependsOn(TryIt->first, RootIt, TryIt)) {
+          DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+                " vs. " << *RootInst << "\n");
+          return false;
+        }
+        
+        RootIt = TryIt;
+        RootInst = TryIt->first;
+      }
+
       // All instructions between the last root and this root
-      // belong to some other iteration. If they belong to a 
+      // may belong to some other iteration. If they belong to a 
       // future iteration, then they're dangerous to alias with.
-      for (; LastRootIt != RootIt; ++LastRootIt) {
+      // 
+      // Note that because we allow a limited amount of flexibility in the order
+      // that we visit nodes, LastRootIt might be *before* RootIt, in which
+      // case we've already checked this set of instructions so we shouldn't
+      // do anything.
+      for (; LastRootIt < RootIt; ++LastRootIt) {
         Instruction *I = LastRootIt->first;
         if (LastRootIt->second.find_first() < (int)Iter)
           continue;
@@ -1062,12 +1131,6 @@ bool LoopReroll::DAGRootTracker::validat
           FutureSideEffects = true;
       }
 
-      if (!BaseInst->isSameOperationAs(RootInst)) {
-        DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
-              " vs. " << *RootInst << "\n");
-        return false;
-      }
-
       // Make sure that this instruction, which is in the use set of this
       // root instruction, does not also belong to the base set or the set of
       // some other root instruction.
@@ -1174,8 +1237,10 @@ bool LoopReroll::DAGRootTracker::validat
       BaseMap.insert(std::make_pair(RootInst, BaseInst));
 
       LastRootIt = RootIt;
-      BaseIt = nextInstr(0, Uses, ++BaseIt);
-      RootIt = nextInstr(Iter, Uses, ++RootIt);
+      Visited.insert(BaseInst);
+      Visited.insert(RootInst);
+      BaseIt = nextInstr(0, Uses, Visited);
+      RootIt = nextInstr(Iter, Uses, Visited);
     }
     assert (BaseIt == Uses.end() && RootIt == Uses.end() &&
             "Mismatched set sizes!");

Modified: llvm/trunk/test/Transforms/LoopReroll/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopReroll/basic.ll?rev=228931&r1=228930&r2=228931&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopReroll/basic.ll (original)
+++ llvm/trunk/test/Transforms/LoopReroll/basic.ll Thu Feb 12 09:54:14 2015
@@ -488,6 +488,63 @@ for.end:
   ret void
 }
 
+; int foo(int a);
+; void bar2(int *x, int y, int z) {
+;   for (int i = 0; i < 500; i += 3) {
+;     foo(i+y+i*z); // Slightly reordered instruction order
+;     foo(i+1+y+(i+1)*z);
+;     foo(i+2+y+(i+2)*z);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @bar2(i32* nocapture readnone %x, i32 %y, i32 %z) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+
+  %tmp1 = add i32 %i.08, %y
+  %tmp2 = mul i32 %i.08, %z
+  %tmp3 = add i32 %tmp2, %tmp1
+  %call = tail call i32 @foo(i32 %tmp3) #1
+
+  %add = add nsw i32 %i.08, 1
+  %tmp2a = mul i32 %add, %z
+  %tmp1a = add i32 %add, %y
+  %tmp3a = add i32 %tmp2a, %tmp1a
+  %calla = tail call i32 @foo(i32 %tmp3a) #1
+  
+  %add2 = add nsw i32 %i.08, 2
+  %tmp2b = mul i32 %add2, %z
+  %tmp1b = add i32 %add2, %y
+  %tmp3b = add i32 %tmp2b, %tmp1b
+  %callb = tail call i32 @foo(i32 %tmp3b) #1
+
+  %add3 = add nsw i32 %i.08, 3
+
+  %exitcond = icmp eq i32 %add3, 500
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK-LABEL: @bar2
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %tmp1 = add i32 %indvar, %y
+; CHECK: %tmp2 = mul i32 %indvar, %z
+; CHECK: %tmp3 = add i32 %tmp2, %tmp1
+; CHECK: %call = tail call i32 @foo(i32 %tmp3) #1
+; CHECK: %indvar.next = add i32 %indvar, 1
+; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
+; CHECK: br i1 %exitcond1, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
 
 attributes #0 = { nounwind uwtable }
 attributes #1 = { nounwind }