[llvm] r228186 - StructurizeCFG: Use a reverse post-order traversal

Tom Stellard thomas.stellard at amd.com
Wed Feb 4 12:49:44 PST 2015


Author: tstellar
Date: Wed Feb  4 14:49:44 2015
New Revision: 228186

URL: http://llvm.org/viewvc/llvm-project?rev=228186&view=rev
Log:
StructurizeCFG: Use a reverse post-order traversal

We were previously doing a post-order traversal and operating on the
list in reverse, however this would occasionaly cause backedges for
loops to be visited before some of the other blocks in the loop.

We know use a reverse post-order traversal, which avoids this issue.

The reverse post-order traversal is not completely ideal, so we need
to manually fixup the list to ensure that inner loop backedges are
visited before outer loop backedges.

Added:
    llvm/trunk/test/Transforms/StructurizeCFG/nested-loop-order.ll
    llvm/trunk/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/StructurizeCFG.cpp
    llvm/trunk/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll

Modified: llvm/trunk/lib/Transforms/Scalar/StructurizeCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/StructurizeCFG.cpp?rev=228186&r1=228185&r2=228186&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/StructurizeCFG.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/StructurizeCFG.cpp Wed Feb  4 14:49:44 2015
@@ -10,12 +10,14 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Analysis/RegionPass.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 
 using namespace llvm;
@@ -281,11 +283,65 @@ bool StructurizeCFG::doInitialization(Re
 
 /// \brief Build up the general order of nodes
 void StructurizeCFG::orderNodes() {
-  scc_iterator<Region *> I = scc_begin(ParentRegion);
-  for (Order.clear(); !I.isAtEnd(); ++I) {
-    const std::vector<RegionNode *> &Nodes = *I;
-    Order.append(Nodes.begin(), Nodes.end());
+  RNVector TempOrder;
+  ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
+  TempOrder.append(RPOT.begin(), RPOT.end());
+
+  std::map<Loop*, unsigned> LoopBlocks;
+
+
+  // The reverse post-order traversal of the list gives us an ordering close
+  // to what we want.  The only problem with it is that sometimes backedges
+  // for outer loops will be visited before backedges for inner loops.
+  for (RegionNode *RN : TempOrder) {
+    BasicBlock *BB = RN->getEntry();
+    Loop *Loop = LI->getLoopFor(BB);
+    if (!LoopBlocks.count(Loop)) {
+      LoopBlocks[Loop] = 1;
+      continue;
+    }
+    LoopBlocks[Loop]++;
   }
+
+  unsigned CurrentLoopDepth = 0;
+  Loop *CurrentLoop = nullptr;
+  BBSet TempVisited;
+  for (RNVector::iterator I = TempOrder.begin(), E = TempOrder.end(); I != E; ++I) {
+    BasicBlock *BB = (*I)->getEntry();
+    unsigned LoopDepth = LI->getLoopDepth(BB);
+
+    if (std::find(Order.begin(), Order.end(), *I) != Order.end())
+      continue;
+
+    if (LoopDepth < CurrentLoopDepth) {
+      // Make sure we have visited all blocks in this loop before moving back to
+      // the outer loop.
+
+      RNVector::iterator LoopI = I;
+      while(LoopBlocks[CurrentLoop]) {
+        LoopI++;
+        BasicBlock *LoopBB = (*LoopI)->getEntry();
+        if (LI->getLoopFor(LoopBB) == CurrentLoop) {
+          LoopBlocks[CurrentLoop]--;
+          Order.push_back(*LoopI);
+        }
+      }
+    }
+
+    CurrentLoop = LI->getLoopFor(BB);
+    if (CurrentLoop) {
+      LoopBlocks[CurrentLoop]--;
+    }
+
+    CurrentLoopDepth = LoopDepth;
+    Order.push_back(*I);
+  }
+
+  // This pass originally used a post-order traversal and then operated on
+  // the list in reverse. Now that we are using a reverse post-order traversal
+  // rather than re-working the whole pass to operate on the list in order,
+  // we just reverse the list and continue to operate on it in reverse.
+  std::reverse(Order.begin(), Order.end());
 }
 
 /// \brief Determine the end of the loops
@@ -441,6 +497,10 @@ void StructurizeCFG::collectInfos() {
   for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
        OI != OE; ++OI) {
 
+    DEBUG(dbgs() << "Visiting: " <<
+                    ((*OI)->isSubRegion() ? "SubRegion with entry: " : "") <<
+                    (*OI)->getEntry()->getName() << " Loop Depth: " << LI->getLoopDepth((*OI)->getEntry()) << "\n");
+
     // Analyze all the conditions leading to a node
     gatherPredicates(*OI);
 

Added: llvm/trunk/test/Transforms/StructurizeCFG/nested-loop-order.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/StructurizeCFG/nested-loop-order.ll?rev=228186&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/StructurizeCFG/nested-loop-order.ll (added)
+++ llvm/trunk/test/Transforms/StructurizeCFG/nested-loop-order.ll Wed Feb  4 14:49:44 2015
@@ -0,0 +1,79 @@
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+
+define void @main(float addrspace(1)* %out) {
+
+; CHECK: main_body:
+; CHECK: br label %LOOP.outer
+main_body:
+  br label %LOOP.outer
+
+; CHECK: LOOP.outer:
+; CHECK: br label %LOOP
+LOOP.outer:                                       ; preds = %ENDIF28, %main_body
+  %temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ]
+  %temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ]
+  br label %LOOP
+
+; CHECK: LOOP:
+; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow
+LOOP:                                             ; preds = %IF29, %LOOP.outer
+  %temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ]
+  %tmp20 = add i32 %temp4.0, 1
+  %tmp22 = icmp sgt i32 %tmp20, 3
+  br i1 %tmp22, label %ENDLOOP, label %ENDIF
+
+; CHECK: Flow3
+; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer
+
+; CHECK: ENDLOOP:
+; CHECK: ret void
+ENDLOOP:                                          ; preds = %ENDIF28, %IF29, %LOOP
+  %temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ]
+  %tmp23 = icmp eq i32 %tmp20, 3
+  %.45 = select i1 %tmp23, float 0.000000e+00, float 1.000000e+00
+  store float %.45, float addrspace(1)* %out
+  ret void
+
+; CHECK: ENDIF:
+; CHECK: br i1 %tmp31, label %IF29, label %Flow1
+ENDIF:                                            ; preds = %LOOP
+  %tmp31 = icmp sgt i32 %tmp20, 1
+  br i1 %tmp31, label %IF29, label %ENDIF28
+
+; CHECK: Flow:
+; CHECK br i1 %{{[0-9]+}}, label %Flow, label %LOOP
+
+; CHECK: IF29:
+; CHECK: br label %Flow1
+IF29:                                             ; preds = %ENDIF
+  %tmp32 = icmp sgt i32 %tmp20, 2
+  br i1 %tmp32, label %ENDLOOP, label %LOOP
+
+; CHECK: Flow1:
+; CHECK: br label %Flow
+
+; CHECK: Flow2:
+; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3
+
+; CHECK: ENDIF28:
+; CHECK: br label %Flow3
+ENDIF28:                                          ; preds = %ENDIF
+  %tmp35 = fadd float %temp8.0.ph, 1.0
+  %tmp36 = icmp sgt i32 %tmp20, 2
+  br i1 %tmp36, label %ENDLOOP, label %LOOP.outer
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDIL.clamp.(float, float, float) #2
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { readnone }
+
+!0 = !{!1, !1, i64 0, i32 1}
+!1 = !{!"const", null}

Modified: llvm/trunk/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll?rev=228186&r1=228185&r2=228186&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll (original)
+++ llvm/trunk/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll Wed Feb  4 14:49:44 2015
@@ -11,28 +11,29 @@ bb:
 bb3:                                              ; preds = %bb7, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ]
   %tmp4 = fcmp ult float %arg1, 3.500000e+00
-; CHECK: br i1 %tmp4, label %bb7, label %Flow
+; CHECK: %0 = xor i1 %tmp4, true
+; CHECK: br i1 %0, label %bb5, label %Flow
   br i1 %tmp4, label %bb7, label %bb5
 
-; CHECK: Flow:
-; CHECK: br i1 %2, label %Flow1, label %bb3
-
-; CHECK: Flow1:
-; CHECK: br i1 %3, label %bb5, label %bb10
-
 ; CHECK: bb5:
 bb5:                                              ; preds = %bb3
   %tmp6 = fcmp olt float 0.000000e+00, %arg2
-; CHECK: br label %bb10
+; CHECK: br label %Flow
   br i1 %tmp6, label %bb10, label %bb7
 
+; CHECK: Flow:
+; CHECK: br i1 %3, label %bb7, label %Flow1
+
 ; CHECK: bb7
 bb7:                                              ; preds = %bb5, %bb3
   %tmp8 = add nuw nsw i64 %tmp, 1
   %tmp9 = icmp slt i64 %tmp8, 5
-; CHECK: br label %Flow
+; CHECK: br label %Flow1
   br i1 %tmp9, label %bb3, label %bb10
 
+; CHECK: Flow1:
+; CHECK: br i1 %7, label %bb10, label %bb3
+
 ; CHECK: bb10
 bb10:                                             ; preds = %bb7, %bb5
   %tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ]

Added: llvm/trunk/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll?rev=228186&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll (added)
+++ llvm/trunk/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll Wed Feb  4 14:49:44 2015
@@ -0,0 +1,100 @@
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+
+; The structurize cfg pass used to do a post-order traversal to generate a list
+; of ; basic blocks and then operate on the list in reverse.  This led to bugs,
+; because sometimes successors would be visited before their predecessors.
+; The fix for this was to do a reverse post-order traversal which is what the
+; algorithm requires.
+
+; Function Attrs: nounwind
+define void @test(float* nocapture %out, i32 %K1, float* nocapture readonly %nr) {
+
+; CHECK: entry:
+; CHECK: br label %for.body
+entry:
+  br label %for.body
+
+; CHECK: for.body:
+; CHECK: br i1 %{{[0-9]+}}, label %lor.lhs.false, label %Flow
+for.body:                                         ; preds = %for.body.backedge, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.be, %for.body.backedge ], [ 1, %entry ]
+  %best_val.027 = phi float [ %best_val.027.be, %for.body.backedge ], [ 5.000000e+01, %entry ]
+  %prev_start.026 = phi i32 [ %tmp26, %for.body.backedge ], [ 0, %entry ]
+  %best_count.025 = phi i32 [ %best_count.025.be, %for.body.backedge ], [ 0, %entry ]
+  %tmp0 = trunc i64 %indvars.iv to i32
+  %cmp1 = icmp eq i32 %tmp0, %K1
+  br i1 %cmp1, label %if.then, label %lor.lhs.false
+
+; CHECK: lor.lhs.false:
+; CHECK: br label %Flow
+lor.lhs.false:                                    ; preds = %for.body
+  %arrayidx = getelementptr inbounds float* %nr, i64 %indvars.iv
+  %tmp1 = load float* %arrayidx, align 4
+  %tmp2 = add nsw i64 %indvars.iv, -1
+  %arrayidx2 = getelementptr inbounds float* %nr, i64 %tmp2
+  %tmp3 = load float* %arrayidx2, align 4
+  %cmp3 = fcmp une float %tmp1, %tmp3
+  br i1 %cmp3, label %if.then, label %for.body.1
+
+; CHECK: Flow:
+; CHECK: br i1 %{{[0-9]+}}, label %if.then, label %Flow1
+
+; CHECK: if.then:
+; CHECK: br label %Flow1
+if.then:                                          ; preds = %lor.lhs.false, %for.body
+  %sub4 = sub nsw i32 %tmp0, %prev_start.026
+  %tmp4 = add nsw i64 %indvars.iv, -1
+  %arrayidx8 = getelementptr inbounds float* %nr, i64 %tmp4
+  %tmp5 = load float* %arrayidx8, align 4
+  br i1 %cmp1, label %for.end, label %for.body.1
+
+; CHECK: for.end:
+; CHECK: ret void
+for.end:                                          ; preds = %for.body.1, %if.then
+  %best_val.0.lcssa = phi float [ %best_val.233, %for.body.1 ], [ %tmp5, %if.then ]
+  store float %best_val.0.lcssa, float* %out, align 4
+  ret void
+
+; CHECK: Flow1
+; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2
+
+; CHECK: for.body.1:
+; CHECK: br i1 %{{[0-9]+}}, label %for.body.6, label %Flow3
+for.body.1:                                       ; preds = %if.then, %lor.lhs.false
+  %best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ]
+  %best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ]
+  %indvars.iv.next.454 = add nsw i64 %indvars.iv, 5
+  %tmp22 = trunc i64 %indvars.iv.next.454 to i32
+  %cmp1.5 = icmp eq i32 %tmp22, %K1
+  br i1 %cmp1.5, label %for.end, label %for.body.6
+
+; CHECK: Flow2:
+; CHECK: br i1 %{{[0-9]+}}, label %for.end, label %for.body
+
+; CHECK: for.body.6:
+; CHECK: br i1 %cmp5.6, label %if.then6.6, label %for.body.backedge
+for.body.6:                                       ; preds = %for.body.1
+  %indvars.iv.next.559 = add nsw i64 %indvars.iv, 6
+  %tmp26 = trunc i64 %indvars.iv.next.559 to i32
+  %sub4.6 = sub nsw i32 %tmp26, %tmp22
+  %cmp5.6 = icmp slt i32 %best_count.231, %sub4.6
+  br i1 %cmp5.6, label %if.then6.6, label %for.body.backedge
+
+; CHECK: if.then6.6
+; CHECK: br label %for.body.backedge
+if.then6.6:                                       ; preds = %for.body.6
+  %arrayidx8.6 = getelementptr inbounds float* %nr, i64 %indvars.iv.next.454
+  %tmp29 = load float* %arrayidx8.6, align 4
+  br label %for.body.backedge
+
+; CHECK: Flow3:
+; CHECK: br label %Flow2
+
+; CHECK: for.body.backedge:
+; CHECK: br label %Flow3
+for.body.backedge:                                ; preds = %if.then6.6, %for.body.6
+  %best_val.027.be = phi float [ %tmp29, %if.then6.6 ], [ %best_val.233, %for.body.6 ]
+  %best_count.025.be = phi i32 [ %sub4.6, %if.then6.6 ], [ %best_count.231, %for.body.6 ]
+  %indvars.iv.be = add nsw i64 %indvars.iv, 7
+  br label %for.body
+}





More information about the llvm-commits mailing list