[llvm-commits] [llvm] r162912 - in /llvm/trunk: lib/Transforms/Scalar/LoopRotation.cpp test/Transforms/LoopRotate/multiple-exits.ll

Benjamin Kramer benny.kra at gmail.com
Sat Sep 1 03:08:03 PDT 2012


On 01.09.2012, at 10:42, Duncan Sands <baldrick at free.fr> wrote:

> Hi Benjamin, probably this caused this expensive checks buildbot failure:
> 
> http://lab.llvm.org:8011/builders/dragonegg-x86_64-linux-gcc-4.6-self-host-checks/builds/1535
> 
> The failure is:
> 
> DominatorTree is not up to date!
> Computed:
> =============================--------------------------------
> Inorder Dominator Tree: DFSNumbers invalid: 6 slow queries.
> ...
> 
> I'm working on a testcase.  Unfortunately I haven't yet been able to reproduce
> with "opt" on a dump of the bitcode from the failing example.

Yikes, these are notoriously hard to reproduce, it's highly dependent on phase ordering :(

Maybe taking the list of passes that dragonegg runs and passing it to opt together with "-verify-dom-info" helps?

- Ben
> 
> Ciao, Duncan.
> 
> On 30/08/12 17:39, Benjamin Kramer wrote:
>> Author: d0k
>> Date: Thu Aug 30 10:39:42 2012
>> New Revision: 162912
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=162912&view=rev
>> Log:
>> LoopRotate: Also rotate loops with multiple exits.
>> 
>> The old PHI updating code in loop-rotate was replaced with SSAUpdater a while
>> ago, it has no problems with comples PHIs. What had to be fixed is detecting
>> whether a loop was already rotated and updating dominators when multiple exits
>> were present.
>> 
>> This change increases overall code size a bit, mostly due to additional loop
>> unrolling opportunities. Passes test-suite and selfhost with -verify-dom-info.
>> Fixes PR7447.
>> 
>> Thanks to Andy for the input on the domtree updating code.
>> 
>> Added:
>>     llvm/trunk/test/Transforms/LoopRotate/multiple-exits.ll
>> Modified:
>>     llvm/trunk/lib/Transforms/Scalar/LoopRotation.cpp
>> 
>> Modified: llvm/trunk/lib/Transforms/Scalar/LoopRotation.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopRotation.cpp?rev=162912&r1=162911&r2=162912&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Scalar/LoopRotation.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Scalar/LoopRotation.cpp Thu Aug 30 10:39:42 2012
>> @@ -24,6 +24,7 @@
>>  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
>>  #include "llvm/Transforms/Utils/SSAUpdater.h"
>>  #include "llvm/Transforms/Utils/ValueMapper.h"
>> +#include "llvm/Support/CFG.h"
>>  #include "llvm/Support/Debug.h"
>>  #include "llvm/ADT/Statistic.h"
>>  using namespace llvm;
>> @@ -256,6 +257,7 @@
>>      return false;
>> 
>>    BasicBlock *OrigHeader = L->getHeader();
>> +  BasicBlock *OrigLatch = L->getLoopLatch();
>> 
>>    BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
>>    if (BI == 0 || BI->isUnconditional())
>> @@ -267,13 +269,9 @@
>>    if (!L->isLoopExiting(OrigHeader))
>>      return false;
>> 
>> -  // Updating PHInodes in loops with multiple exits adds complexity.
>> -  // Keep it simple, and restrict loop rotation to loops with one exit only.
>> -  // In future, lift this restriction and support for multiple exits if
>> -  // required.
>> -  SmallVector<BasicBlock*, 8> ExitBlocks;
>> -  L->getExitBlocks(ExitBlocks);
>> -  if (ExitBlocks.size() > 1)
>> +  // If the loop latch already contains a branch that leaves the loop then the
>> +  // loop is already rotated.
>> +  if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
>>      return false;
>> 
>>    // Check size of original header and reject loop if it is very big.
>> @@ -286,11 +284,10 @@
>> 
>>    // Now, this loop is suitable for rotation.
>>    BasicBlock *OrigPreheader = L->getLoopPreheader();
>> -  BasicBlock *OrigLatch = L->getLoopLatch();
>> 
>>    // If the loop could not be converted to canonical form, it must have an
>>    // indirectbr in it, just give up.
>> -  if (OrigPreheader == 0 || OrigLatch == 0)
>> +  if (OrigPreheader == 0)
>>      return false;
>> 
>>    // Anything ScalarEvolution may know about this loop or the PHI nodes
>> @@ -298,6 +295,8 @@
>>    if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
>>      SE->forgetLoop(L);
>> 
>> +  DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
>> +
>>    // Find new Loop header. NewHeader is a Header's one and only successor
>>    // that is inside loop.  Header's other successor is outside the
>>    // loop.  Otherwise loop is not suitable for rotation.
>> @@ -408,10 +407,16 @@
>>      // Update DominatorTree to reflect the CFG change we just made.  Then split
>>      // edges as necessary to preserve LoopSimplify form.
>>      if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
>> -      // Since OrigPreheader now has the conditional branch to Exit block, it is
>> -      // the dominator of Exit.
>> -      DT->changeImmediateDominator(Exit, OrigPreheader);
>> -      DT->changeImmediateDominator(NewHeader, OrigPreheader);
>> +      // Everything that was dominated by the old loop header is now dominated
>> +      // by the original loop preheader. Conceptually the header was merged
>> +      // into the preheader, even though we reuse the actual block as a new
>> +      // loop latch.
>> +      DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
>> +      SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
>> +                                                   OrigHeaderNode->end());
>> +      DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
>> +      for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
>> +        DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
>> 
>>        // Update OrigHeader to be dominated by the new header block.
>>        DT->changeImmediateDominator(OrigHeader, OrigLatch);
>> @@ -440,6 +445,46 @@
>>        // Update OrigHeader to be dominated by the new header block.
>>        DT->changeImmediateDominator(NewHeader, OrigPreheader);
>>        DT->changeImmediateDominator(OrigHeader, OrigLatch);
>> +
>> +      // Brute force incremental dominator tree update. Call
>> +      // findNearestCommonDominator on all CFG predecessors of each child of the
>> +      // original header.
>> +      DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
>> +      SmallVector<DomTreeNode *, 8> WorkList(OrigHeaderNode->begin(),
>> +                                             OrigHeaderNode->end());
>> +      while (!WorkList.empty()) {
>> +        DomTreeNode *Node = WorkList.pop_back_val();
>> +        BasicBlock *BB = Node->getBlock();
>> +        BasicBlock *NearestDom = 0;
>> +        for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;
>> +             ++PI) {
>> +          BasicBlock *Pred = *PI;
>> +
>> +          // We have to process predecessors of a node before we touch the
>> +          // actual node. If one of the predecessors is in our worklist, put it
>> +          // and the currently processed node on the worklist and go processing
>> +          // the predecessor.
>> +          SmallVectorImpl<DomTreeNode *>::iterator I =
>> +            std::find(WorkList.begin(), WorkList.end(), DT->getNode(Pred));
>> +          if (I != WorkList.end()) {
>> +            WorkList.push_back(Node);
>> +            std::swap(*I, WorkList.back());
>> +            // The predecessor is now at the end of the worklist.
>> +            NearestDom = 0;
>> +            break;
>> +          }
>> +
>> +          // On the first iteration start with Pred, on the other iterations we
>> +          // narrow it down to the nearest common dominator.
>> +          if (!NearestDom)
>> +            NearestDom = Pred;
>> +          else
>> +            NearestDom = DT->findNearestCommonDominator(NearestDom, Pred);
>> +        }
>> +
>> +        if (NearestDom)
>> +          DT->changeImmediateDominator(BB, NearestDom);
>> +      }
>>      }
>>    }
>> 
>> @@ -452,6 +497,8 @@
>>    // emitted code isn't too gross in this common case.
>>    MergeBlockIntoPredecessor(OrigHeader, this);
>> 
>> +  DEBUG(dbgs() << "LoopRotation: into "; L->dump());
>> +
>>    ++NumRotated;
>>    return true;
>>  }
>> 
>> Added: llvm/trunk/test/Transforms/LoopRotate/multiple-exits.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopRotate/multiple-exits.ll?rev=162912&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopRotate/multiple-exits.ll (added)
>> +++ llvm/trunk/test/Transforms/LoopRotate/multiple-exits.ll Thu Aug 30 10:39:42 2012
>> @@ -0,0 +1,200 @@
>> +; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info | FileCheck %s
>> +
>> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>> +target triple = "x86_64-apple-macosx10.8.0"
>> +
>> +; PR7447
>> +define i32 @test1([100 x i32]* nocapture %a) nounwind readonly {
>> +entry:
>> +  br label %for.cond
>> +
>> +for.cond:                                         ; preds = %for.cond1, %entry
>> +  %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.cond1 ]
>> +  %i.0 = phi i1 [ true, %entry ], [ false, %for.cond1 ]
>> +  br i1 %i.0, label %for.cond1, label %return
>> +
>> +for.cond1:                                        ; preds = %for.cond, %land.rhs
>> +  %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.0, %for.cond ]
>> +  %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond ]
>> +  %cmp2 = icmp ult i32 %i.1, 100
>> +  br i1 %cmp2, label %land.rhs, label %for.cond
>> +
>> +land.rhs:                                         ; preds = %for.cond1
>> +  %conv = zext i32 %i.1 to i64
>> +  %arrayidx = getelementptr inbounds [100 x i32]* %a, i64 0, i64 %conv
>> +  %0 = load i32* %arrayidx, align 4
>> +  %add = add i32 %0, %sum.1
>> +  %cmp4 = icmp ugt i32 %add, 1000
>> +  %inc = add i32 %i.1, 1
>> +  br i1 %cmp4, label %return, label %for.cond1
>> +
>> +return:                                           ; preds = %for.cond, %land.rhs
>> +  %retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ]
>> +  ret i32 %retval.0
>> +
>> +; CHECK: @test1
>> +; CHECK: for.cond1.preheader:
>> +; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ]
>> +; CHECK: br label %for.cond1
>> +
>> +; CHECK: for.cond1:
>> +; CHECK: %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.04, %for.cond1.preheader ]
>> +; CHECK: %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond1.preheader ]
>> +; CHECK: %cmp2 = icmp ult i32 %i.1, 100
>> +; CHECK: br i1 %cmp2, label %land.rhs, label %for.cond.loopexit
>> +}
>> +
>> +define void @test2(i32 %x) nounwind {
>> +entry:
>> +  br label %for.cond
>> +
>> +for.cond:                                         ; preds = %if.end, %entry
>> +  %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
>> +  %cmp = icmp eq i32 %i.0, %x
>> +  br i1 %cmp, label %return.loopexit, label %for.body
>> +
>> +for.body:                                         ; preds = %for.cond
>> +  %call = tail call i32 @foo(i32 %i.0) nounwind
>> +  %tobool = icmp eq i32 %call, 0
>> +  br i1 %tobool, label %if.end, label %a
>> +
>> +if.end:                                           ; preds = %for.body
>> +  %call1 = tail call i32 @foo(i32 42) nounwind
>> +  %inc = add i32 %i.0, 1
>> +  br label %for.cond
>> +
>> +a:                                                ; preds = %for.body
>> +  %call2 = tail call i32 @bar(i32 1) nounwind
>> +  br label %return
>> +
>> +return.loopexit:                                  ; preds = %for.cond
>> +  br label %return
>> +
>> +return:                                           ; preds = %return.loopexit, %a
>> +  ret void
>> +
>> +; CHECK: @test2
>> +; CHECK: if.end:
>> +; CHECK: %inc = add i32 %i.02, 1
>> +; CHECK: %cmp = icmp eq i32 %inc, %x
>> +; CHECK: br i1 %cmp, label %for.cond.return.loopexit_crit_edge, label %for.body
>> +}
>> +
>> +declare i32 @foo(i32)
>> +
>> +declare i32 @bar(i32)
>> +
>> + at _ZTIi = external constant i8*
>> +
>> +; Verify dominators.
>> +define void @test3(i32 %x) {
>> +entry:
>> +  %cmp2 = icmp eq i32 0, %x
>> +  br i1 %cmp2, label %try.cont.loopexit, label %for.body.lr.ph
>> +
>> +for.body.lr.ph:                                   ; preds = %entry
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds = %for.body.lr.ph, %for.inc
>> +  %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
>> +  invoke void @_Z3fooi(i32 %i.03)
>> +          to label %for.inc unwind label %lpad
>> +
>> +for.inc:                                          ; preds = %for.body
>> +  %inc = add i32 %i.03, 1
>> +  %cmp = icmp eq i32 %inc, %x
>> +  br i1 %cmp, label %for.cond.try.cont.loopexit_crit_edge, label %for.body
>> +
>> +lpad:                                             ; preds = %for.body
>> +  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
>> +          catch i8* bitcast (i8** @_ZTIi to i8*)
>> +  %1 = extractvalue { i8*, i32 } %0, 0
>> +  %2 = extractvalue { i8*, i32 } %0, 1
>> +  %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
>> +  %matches = icmp eq i32 %2, %3
>> +  br i1 %matches, label %catch, label %eh.resume
>> +
>> +catch:                                            ; preds = %lpad
>> +  %4 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
>> +  br i1 true, label %invoke.cont2.loopexit, label %for.body.i.lr.ph
>> +
>> +for.body.i.lr.ph:                                 ; preds = %catch
>> +  br label %for.body.i
>> +
>> +for.body.i:                                       ; preds = %for.body.i.lr.ph, %for.inc.i
>> +  %i.0.i1 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.inc.i ]
>> +  invoke void @_Z3fooi(i32 %i.0.i1)
>> +          to label %for.inc.i unwind label %lpad.i
>> +
>> +for.inc.i:                                        ; preds = %for.body.i
>> +  %inc.i = add i32 %i.0.i1, 1
>> +  %cmp.i = icmp eq i32 %inc.i, 0
>> +  br i1 %cmp.i, label %for.cond.i.invoke.cont2.loopexit_crit_edge, label %for.body.i
>> +
>> +lpad.i:                                           ; preds = %for.body.i
>> +  %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
>> +          catch i8* bitcast (i8** @_ZTIi to i8*)
>> +  %6 = extractvalue { i8*, i32 } %5, 0
>> +  %7 = extractvalue { i8*, i32 } %5, 1
>> +  %matches.i = icmp eq i32 %7, %3
>> +  br i1 %matches.i, label %catch.i, label %lpad1.body
>> +
>> +catch.i:                                          ; preds = %lpad.i
>> +  %8 = tail call i8* @__cxa_begin_catch(i8* %6) nounwind
>> +  invoke void @_Z3barj(i32 0)
>> +          to label %invoke.cont2.i unwind label %lpad1.i
>> +
>> +invoke.cont2.i:                                   ; preds = %catch.i
>> +  tail call void @__cxa_end_catch() nounwind
>> +  br label %invoke.cont2
>> +
>> +lpad1.i:                                          ; preds = %catch.i
>> +  %9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
>> +          cleanup
>> +  %10 = extractvalue { i8*, i32 } %9, 0
>> +  %11 = extractvalue { i8*, i32 } %9, 1
>> +  tail call void @__cxa_end_catch() nounwind
>> +  br label %lpad1.body
>> +
>> +for.cond.i.invoke.cont2.loopexit_crit_edge:       ; preds = %for.inc.i
>> +  br label %invoke.cont2.loopexit
>> +
>> +invoke.cont2.loopexit:                            ; preds = %for.cond.i.invoke.cont2.loopexit_crit_edge, %catch
>> +  br label %invoke.cont2
>> +
>> +invoke.cont2:                                     ; preds = %invoke.cont2.loopexit, %invoke.cont2.i
>> +  tail call void @__cxa_end_catch() nounwind
>> +  br label %try.cont
>> +
>> +for.cond.try.cont.loopexit_crit_edge:             ; preds = %for.inc
>> +  br label %try.cont.loopexit
>> +
>> +try.cont.loopexit:                                ; preds = %for.cond.try.cont.loopexit_crit_edge, %entry
>> +  br label %try.cont
>> +
>> +try.cont:                                         ; preds = %try.cont.loopexit, %invoke.cont2
>> +  ret void
>> +
>> +lpad1.body:                                       ; preds = %lpad1.i, %lpad.i
>> +  %exn.slot.0.i = phi i8* [ %10, %lpad1.i ], [ %6, %lpad.i ]
>> +  %ehselector.slot.0.i = phi i32 [ %11, %lpad1.i ], [ %7, %lpad.i ]
>> +  tail call void @__cxa_end_catch() nounwind
>> +  br label %eh.resume
>> +
>> +eh.resume:                                        ; preds = %lpad1.body, %lpad
>> +  %exn.slot.0 = phi i8* [ %exn.slot.0.i, %lpad1.body ], [ %1, %lpad ]
>> +  %ehselector.slot.0 = phi i32 [ %ehselector.slot.0.i, %lpad1.body ], [ %2, %lpad ]
>> +  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
>> +  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
>> +  resume { i8*, i32 } %lpad.val5
>> +}
>> +
>> +declare void @_Z3fooi(i32)
>> +
>> +declare i32 @__gxx_personality_v0(...)
>> +
>> +declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
>> +
>> +declare i8* @__cxa_begin_catch(i8*)
>> +
>> +declare void @__cxa_end_catch()
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits





More information about the llvm-commits mailing list