PATCHES: StructurizeCFG bug-fix
Tom Stellard
tom at stellard.net
Tue Feb 3 19:15:12 PST 2015
Ping.
On Thu, Jan 22, 2015 at 12:36:04PM -0800, Tom Stellard wrote:
> Yes, here it is.
>
> On Thu, Jan 22, 2015 at 09:33:48PM +0100, Christian König wrote:
> > Forgot the attachment?
> >
> > Am 22.01.2015 um 21:24 schrieb Tom Stellard:
> > >Hi,
> > >
> > >Attached are two patches for the StrucuturizeCFG pass. The first changes
> > >the ordering of the blocks to reverse post-order, which fixes invalid
> > >code being generated for certain kinds of loops.
> > >
> > >The second patch removes an early fix to backedge detection which is no
> > >longer needed after the first patch.
> > >
> > >-Tom
> >
> From c9cb5a4b2e7406d32428def7c79f9d7de5715afb Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Tue, 20 Jan 2015 16:43:00 -0500
> Subject: [PATCH 1/2] StructurizeCFG: Use a reverse post-order traversal
>
> We were previously doing a post-order traversal and operating on the
> list in reverse, however this would occasionaly cause backedges for
> loops to be visited before some of the other blocks in the loop.
>
> We know use a reverse post-order traversal, which avoids this issue.
>
> The reverse post-order traversal is not completely ideal, so we need
> to manually fixup the list to ensure that inner loop backedges are
> visited before outer loop backedges.
> ---
> lib/Transforms/Scalar/StructurizeCFG.cpp | 68 +++++++++++-
> .../Transforms/StructurizeCFG/nested-loop-order.ll | 79 ++++++++++++++
> .../StructurizeCFG/one-loop-multiple-backedges.ll | 19 ++--
> .../StructurizeCFG/post-order-traversal-bug.ll | 117 +++++++++++++++++++++
> 4 files changed, 270 insertions(+), 13 deletions(-)
> create mode 100644 test/Transforms/StructurizeCFG/nested-loop-order.ll
> create mode 100644 test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
>
> diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
> index 7fe87f9..f0c4095 100644
> --- a/lib/Transforms/Scalar/StructurizeCFG.cpp
> +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
> @@ -10,12 +10,14 @@
> #include "llvm/Transforms/Scalar.h"
> #include "llvm/ADT/MapVector.h"
> #include "llvm/ADT/SCCIterator.h"
> +#include "llvm/ADT/PostOrderIterator.h"
> #include "llvm/Analysis/LoopInfo.h"
> #include "llvm/Analysis/RegionInfo.h"
> #include "llvm/Analysis/RegionIterator.h"
> #include "llvm/Analysis/RegionPass.h"
> #include "llvm/IR/Module.h"
> #include "llvm/IR/PatternMatch.h"
> +#include "llvm/Support/Debug.h"
> #include "llvm/Transforms/Utils/SSAUpdater.h"
>
> using namespace llvm;
> @@ -281,11 +283,65 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
>
> /// \brief Build up the general order of nodes
> void StructurizeCFG::orderNodes() {
> - scc_iterator<Region *> I = scc_begin(ParentRegion);
> - for (Order.clear(); !I.isAtEnd(); ++I) {
> - const std::vector<RegionNode *> &Nodes = *I;
> - Order.append(Nodes.begin(), Nodes.end());
> + RNVector TempOrder;
> + ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
> + TempOrder.append(RPOT.begin(), RPOT.end());
> +
> + std::map<Loop*, unsigned> LoopBlocks;
> +
> +
> + // The reverse post-order traversal of the list gives us an ordering close
> + // to what we want. The only problem with it is that sometimes backedges
> + // for outer loops will be visited before backedges for inner loops.
> + for (RegionNode *RN : TempOrder) {
> + BasicBlock *BB = RN->getEntry();
> + Loop *Loop = LI->getLoopFor(BB);
> + if (!LoopBlocks.count(Loop)) {
> + LoopBlocks[Loop] = 1;
> + continue;
> + }
> + LoopBlocks[Loop]++;
> + }
> +
> + unsigned CurrentLoopDepth = 0;
> + Loop *CurrentLoop = nullptr;
> + BBSet TempVisited;
> + for (RNVector::iterator I = TempOrder.begin(), E = TempOrder.end(); I != E; ++I) {
> + BasicBlock *BB = (*I)->getEntry();
> + unsigned LoopDepth = LI->getLoopDepth(BB);
> +
> + if (std::find(Order.begin(), Order.end(), *I) != Order.end())
> + continue;
> +
> + if (LoopDepth < CurrentLoopDepth) {
> + // Make sure we have visited all blocks in this loop before moving back to
> + // the outer loop.
> +
> + RNVector::iterator LoopI = I;
> + while(LoopBlocks[CurrentLoop]) {
> + LoopI++;
> + BasicBlock *LoopBB = (*LoopI)->getEntry();
> + if (LI->getLoopFor(LoopBB) == CurrentLoop) {
> + LoopBlocks[CurrentLoop]--;
> + Order.push_back(*LoopI);
> + }
> + }
> + }
> +
> + CurrentLoop = LI->getLoopFor(BB);
> + if (CurrentLoop) {
> + LoopBlocks[CurrentLoop]--;
> + }
> +
> + CurrentLoopDepth = LoopDepth;
> + Order.push_back(*I);
> }
> +
> + // This pass originally used a post-order traversal and then operated on
> + // the list in reverse. Now that we are using a reverse post-order traversal
> + // rather than re-working the whole pass to operate on the list in order,
> + // we just reverse the list and continue to operate on it in reverse.
> + std::reverse(Order.begin(), Order.end());
> }
>
> /// \brief Determine the end of the loops
> @@ -441,6 +497,10 @@ void StructurizeCFG::collectInfos() {
> for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
> OI != OE; ++OI) {
>
> + DEBUG(dbgs() << "Visiting: " <<
> + ((*OI)->isSubRegion() ? "SubRegion with entry: " : "") <<
> + (*OI)->getEntry()->getName() << " Loop Depth: " << LI->getLoopDepth((*OI)->getEntry()) << "\n");
> +
> // Analyze all the conditions leading to a node
> gatherPredicates(*OI);
>
> diff --git a/test/Transforms/StructurizeCFG/nested-loop-order.ll b/test/Transforms/StructurizeCFG/nested-loop-order.ll
> new file mode 100644
> index 0000000..fee1ff0
> --- /dev/null
> +++ b/test/Transforms/StructurizeCFG/nested-loop-order.ll
> @@ -0,0 +1,79 @@
> +; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
> +
> +define void @main(float addrspace(1)* %out) {
> +
> +; CHECK: main_body:
> +; CHECK: br label %LOOP.outer
> +main_body:
> + br label %LOOP.outer
> +
> +; CHECK: LOOP.outer:
> +; CHECK: br label %LOOP
> +LOOP.outer: ; preds = %ENDIF28, %main_body
> + %temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ]
> + %temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ]
> + br label %LOOP
> +
> +; CHECK: LOOP:
> +; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow
> +LOOP: ; preds = %IF29, %LOOP.outer
> + %temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ]
> + %tmp20 = add i32 %temp4.0, 1
> + %tmp22 = icmp sgt i32 %tmp20, 3
> + br i1 %tmp22, label %ENDLOOP, label %ENDIF
> +
> +; CHECK: Flow3
> +; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer
> +
> +; CHECK: ENDLOOP:
> +; CHECK: ret void
> +ENDLOOP: ; preds = %ENDIF28, %IF29, %LOOP
> + %temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ]
> + %tmp23 = icmp eq i32 %tmp20, 3
> + %.45 = select i1 %tmp23, float 0.000000e+00, float 1.000000e+00
> + store float %.45, float addrspace(1)* %out
> + ret void
> +
> +; CHECK: ENDIF:
> +; CHECK: br i1 %tmp31, label %IF29, label %Flow1
> +ENDIF: ; preds = %LOOP
> + %tmp31 = icmp sgt i32 %tmp20, 1
> + br i1 %tmp31, label %IF29, label %ENDIF28
> +
> +; CHECK: Flow:
> +; CHECK br i1 %{{[0-9]+}}, label %Flow, label %LOOP
> +
> +; CHECK: IF29:
> +; CHECK: br label %Flow1
> +IF29: ; preds = %ENDIF
> + %tmp32 = icmp sgt i32 %tmp20, 2
> + br i1 %tmp32, label %ENDLOOP, label %LOOP
> +
> +; CHECK: Flow1:
> +; CHECK: br label %Flow
> +
> +; CHECK: Flow2:
> +; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3
> +
> +; CHECK: ENDIF28:
> +; CHECK: br label %Flow3
> +ENDIF28: ; preds = %ENDIF
> + %tmp35 = fadd float %temp8.0.ph, 1.0
> + %tmp36 = icmp sgt i32 %tmp20, 2
> + br i1 %tmp36, label %ENDLOOP, label %LOOP.outer
> +}
> +
> +; Function Attrs: nounwind readnone
> +declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
> +
> +; Function Attrs: readnone
> +declare float @llvm.AMDIL.clamp.(float, float, float) #2
> +
> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> +
> +attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
> +attributes #1 = { nounwind readnone }
> +attributes #2 = { readnone }
> +
> +!0 = !{!1, !1, i64 0, i32 1}
> +!1 = !{!"const", null}
> diff --git a/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll b/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
> index 8ebd47a..668a1e9 100644
> --- a/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
> +++ b/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
> @@ -11,28 +11,29 @@ bb:
> bb3: ; preds = %bb7, %bb
> %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ]
> %tmp4 = fcmp ult float %arg1, 3.500000e+00
> -; CHECK: br i1 %tmp4, label %bb7, label %Flow
> +; CHECK: %0 = xor i1 %tmp4, true
> +; CHECK: br i1 %0, label %bb5, label %Flow
> br i1 %tmp4, label %bb7, label %bb5
>
> -; CHECK: Flow:
> -; CHECK: br i1 %2, label %Flow1, label %bb3
> -
> -; CHECK: Flow1:
> -; CHECK: br i1 %3, label %bb5, label %bb10
> -
> ; CHECK: bb5:
> bb5: ; preds = %bb3
> %tmp6 = fcmp olt float 0.000000e+00, %arg2
> -; CHECK: br label %bb10
> +; CHECK: br label %Flow
> br i1 %tmp6, label %bb10, label %bb7
>
> +; CHECK: Flow:
> +; CHECK: br i1 %3, label %bb7, label %Flow1
> +
> ; CHECK: bb7
> bb7: ; preds = %bb5, %bb3
> %tmp8 = add nuw nsw i64 %tmp, 1
> %tmp9 = icmp slt i64 %tmp8, 5
> -; CHECK: br label %Flow
> +; CHECK: br label %Flow1
> br i1 %tmp9, label %bb3, label %bb10
>
> +; CHECK: Flow1:
> +; CHECK: br i1 %7, label %bb10, label %bb3
> +
> ; CHECK: bb10
> bb10: ; preds = %bb7, %bb5
> %tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ]
> diff --git a/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll b/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
> new file mode 100644
> index 0000000..c48110e
> --- /dev/null
> +++ b/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
> @@ -0,0 +1,117 @@
> +; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
> +
> +; The structurize cfg pass used to do a post-order traversal to generate a list
> +; of ; basic blocks and then operate on the list in reverse. This led to bugs,
> +; because sometimes successors would be visited before their predecessors.
> +; The fix for this was to do a reverse post-order traversal which is what the
> +; algorithm requires.
> +
> +; Function Attrs: nounwind
> +define void @test(float* nocapture %out, i32 %K1, float* nocapture readonly %nr) #0 {
> +
> +; CHECK: entry:
> +; CHECK: br label %for.body
> +entry:
> + br label %for.body
> +
> +; CHECK: for.body:
> +; CHECK: br i1 %{{[0-9]+}}, label %lor.lhs.false, label %Flow
> +for.body: ; preds = %for.body.backedge, %entry
> + %indvars.iv = phi i64 [ %indvars.iv.be, %for.body.backedge ], [ 1, %entry ]
> + %best_val.027 = phi float [ %best_val.027.be, %for.body.backedge ], [ 5.000000e+01, %entry ]
> + %prev_start.026 = phi i32 [ %tmp26, %for.body.backedge ], [ 0, %entry ]
> + %best_count.025 = phi i32 [ %best_count.025.be, %for.body.backedge ], [ 0, %entry ]
> + %tmp0 = trunc i64 %indvars.iv to i32
> + %cmp1 = icmp eq i32 %tmp0, %K1
> + br i1 %cmp1, label %if.then, label %lor.lhs.false
> +
> +; CHECK: lor.lhs.false:
> +; CHECK: br label %Flow
> +lor.lhs.false: ; preds = %for.body
> + %arrayidx = getelementptr inbounds float* %nr, i64 %indvars.iv
> + %tmp1 = load float* %arrayidx, align 4, !tbaa !7
> + %tmp2 = add nsw i64 %indvars.iv, -1
> + %arrayidx2 = getelementptr inbounds float* %nr, i64 %tmp2
> + %tmp3 = load float* %arrayidx2, align 4, !tbaa !7
> + %cmp3 = fcmp une float %tmp1, %tmp3
> + br i1 %cmp3, label %if.then, label %for.body.1
> +
> +; CHECK: Flow:
> +; CHECK: br i1 %{{[0-9]+}}, label %if.then, label %Flow1
> +
> +; CHECK: if.then:
> +; CHECK: br label %Flow1
> +if.then: ; preds = %lor.lhs.false, %for.body
> + %sub4 = sub nsw i32 %tmp0, %prev_start.026
> + %tmp4 = add nsw i64 %indvars.iv, -1
> + %arrayidx8 = getelementptr inbounds float* %nr, i64 %tmp4
> + %tmp5 = load float* %arrayidx8, align 4, !tbaa !7
> + br i1 %cmp1, label %for.end, label %for.body.1
> +
> +; CHECK: for.end:
> +; CHECK: ret void
> +for.end: ; preds = %for.body.1, %if.then
> + %best_val.0.lcssa = phi float [ %best_val.233, %for.body.1 ], [ %tmp5, %if.then ]
> + store float %best_val.0.lcssa, float* %out, align 4, !tbaa !7
> + ret void
> +
> +; CHECK: Flow1
> +; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2
> +
> +; CHECK: for.body.1:
> +; CHECK: br i1 %{{[0-9]+}}, label %for.body.6, label %Flow3
> +for.body.1: ; preds = %if.then, %lor.lhs.false
> + %best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ]
> + %best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ]
> + %indvars.iv.next.454 = add nsw i64 %indvars.iv, 5
> + %tmp22 = trunc i64 %indvars.iv.next.454 to i32
> + %cmp1.5 = icmp eq i32 %tmp22, %K1
> + br i1 %cmp1.5, label %for.end, label %for.body.6
> +
> +; CHECK: Flow2:
> +; CHECK: br i1 %{{[0-9]+}}, label %for.end, label %for.body
> +
> +; CHECK: for.body.6:
> +; CHECK: br i1 %cmp5.6, label %if.then6.6, label %for.body.backedge
> +for.body.6: ; preds = %for.body.1
> + %indvars.iv.next.559 = add nsw i64 %indvars.iv, 6
> + %tmp26 = trunc i64 %indvars.iv.next.559 to i32
> + %sub4.6 = sub nsw i32 %tmp26, %tmp22
> + %cmp5.6 = icmp slt i32 %best_count.231, %sub4.6
> + br i1 %cmp5.6, label %if.then6.6, label %for.body.backedge
> +
> +; CHECK: if.then6.6
> +; CHECK: br label %for.body.backedge
> +if.then6.6: ; preds = %for.body.6
> + %arrayidx8.6 = getelementptr inbounds float* %nr, i64 %indvars.iv.next.454
> + %tmp29 = load float* %arrayidx8.6, align 4, !tbaa !7
> + br label %for.body.backedge
> +
> +; CHECK: Flow3:
> +; CHECK: br label %Flow2
> +
> +; CHECK: for.body.backedge:
> +; CHECK: br label %Flow3
> +for.body.backedge: ; preds = %if.then6.6, %for.body.6
> + %best_val.027.be = phi float [ %tmp29, %if.then6.6 ], [ %best_val.233, %for.body.6 ]
> + %best_count.025.be = phi i32 [ %sub4.6, %if.then6.6 ], [ %best_count.231, %for.body.6 ]
> + %indvars.iv.be = add nsw i64 %indvars.iv, 7
> + br label %for.body
> +}
> +
> +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
> +
> +!opencl.kernels = !{!0}
> +!llvm.ident = !{!6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6}
> +
> +!0 = !{void (float*, i32, float*)* @test, !1, !2, !3, !4, !5}
> +!1 = !{!"kernel_arg_addr_space", i32 1, i32 0, i32 1}
> +!2 = !{!"kernel_arg_access_qual", !"none", !"none", !"none"}
> +!3 = !{!"kernel_arg_type", !"float*", !"int", !"float*"}
> +!4 = !{!"kernel_arg_base_type", !"float*", !"int", !"float*"}
> +!5 = !{!"kernel_arg_type_qual", !"", !"", !""}
> +!6 = !{!"clang version 3.7.0 (http://llvm.org/git/clang.git 00f6327cc326763eb3efc6068f6e4fc71ff49fb3) (llvm/trunk 226164)"}
> +!7 = !{!8, !8, i64 0}
> +!8 = !{!"float", !9, i64 0}
> +!9 = !{!"omnipotent char", !10, i64 0}
> +!10 = !{!"Simple C/C++ TBAA"}
> --
> 1.8.5.5
>
> From d1b88127785b20dfaefdc8418598716186f8095f Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Tue, 20 Jan 2015 13:58:49 -0500
> Subject: [PATCH 2/2] StructurizeCFG: Remove obsolete fix for loop backedge
> detection
>
> This is no longer needed now that we are using a reverse post-order
> traversal.
> ---
> lib/Transforms/Scalar/StructurizeCFG.cpp | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
> index f0c4095..98abfb9 100644
> --- a/lib/Transforms/Scalar/StructurizeCFG.cpp
> +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
> @@ -360,7 +360,7 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
> for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
> BasicBlock *Succ = Term->getSuccessor(i);
>
> - if (Visited.count(Succ) && LI->isLoopHeader(Succ) ) {
> + if (Visited.count(Succ)) {
> Loops[Succ] = BB;
> }
> }
> --
> 1.8.5.5
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list