PATCHES: StructurizeCFG bug-fix
Christian König
deathsimple at vodafone.de
Wed Feb 4 02:15:27 PST 2015
LGTM. I unfortunately just didn't found time to test it.
Christian.
Am 04.02.2015 um 04:15 schrieb Tom Stellard:
> Ping.
>
> On Thu, Jan 22, 2015 at 12:36:04PM -0800, Tom Stellard wrote:
>> Yes, here it is.
>>
>> On Thu, Jan 22, 2015 at 09:33:48PM +0100, Christian König wrote:
>>> Forgot the attachment?
>>>
>>> Am 22.01.2015 um 21:24 schrieb Tom Stellard:
>>>> Hi,
>>>>
>>>> Attached are two patches for the StrucuturizeCFG pass. The first changes
>>>> the ordering of the blocks to reverse post-order, which fixes invalid
>>>> code being generated for certain kinds of loops.
>>>>
>>>> The second patch removes an early fix to backedge detection which is no
>>>> longer needed after the first patch.
>>>>
>>>> -Tom
>> From c9cb5a4b2e7406d32428def7c79f9d7de5715afb Mon Sep 17 00:00:00 2001
>> From: Tom Stellard <thomas.stellard at amd.com>
>> Date: Tue, 20 Jan 2015 16:43:00 -0500
>> Subject: [PATCH 1/2] StructurizeCFG: Use a reverse post-order traversal
>>
>> We were previously doing a post-order traversal and operating on the
>> list in reverse, however this would occasionaly cause backedges for
>> loops to be visited before some of the other blocks in the loop.
>>
>> We know use a reverse post-order traversal, which avoids this issue.
>>
>> The reverse post-order traversal is not completely ideal, so we need
>> to manually fixup the list to ensure that inner loop backedges are
>> visited before outer loop backedges.
>> ---
>> lib/Transforms/Scalar/StructurizeCFG.cpp | 68 +++++++++++-
>> .../Transforms/StructurizeCFG/nested-loop-order.ll | 79 ++++++++++++++
>> .../StructurizeCFG/one-loop-multiple-backedges.ll | 19 ++--
>> .../StructurizeCFG/post-order-traversal-bug.ll | 117 +++++++++++++++++++++
>> 4 files changed, 270 insertions(+), 13 deletions(-)
>> create mode 100644 test/Transforms/StructurizeCFG/nested-loop-order.ll
>> create mode 100644 test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
>>
>> diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
>> index 7fe87f9..f0c4095 100644
>> --- a/lib/Transforms/Scalar/StructurizeCFG.cpp
>> +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
>> @@ -10,12 +10,14 @@
>> #include "llvm/Transforms/Scalar.h"
>> #include "llvm/ADT/MapVector.h"
>> #include "llvm/ADT/SCCIterator.h"
>> +#include "llvm/ADT/PostOrderIterator.h"
>> #include "llvm/Analysis/LoopInfo.h"
>> #include "llvm/Analysis/RegionInfo.h"
>> #include "llvm/Analysis/RegionIterator.h"
>> #include "llvm/Analysis/RegionPass.h"
>> #include "llvm/IR/Module.h"
>> #include "llvm/IR/PatternMatch.h"
>> +#include "llvm/Support/Debug.h"
>> #include "llvm/Transforms/Utils/SSAUpdater.h"
>>
>> using namespace llvm;
>> @@ -281,11 +283,65 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
>>
>> /// \brief Build up the general order of nodes
>> void StructurizeCFG::orderNodes() {
>> - scc_iterator<Region *> I = scc_begin(ParentRegion);
>> - for (Order.clear(); !I.isAtEnd(); ++I) {
>> - const std::vector<RegionNode *> &Nodes = *I;
>> - Order.append(Nodes.begin(), Nodes.end());
>> + RNVector TempOrder;
>> + ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
>> + TempOrder.append(RPOT.begin(), RPOT.end());
>> +
>> + std::map<Loop*, unsigned> LoopBlocks;
>> +
>> +
>> + // The reverse post-order traversal of the list gives us an ordering close
>> + // to what we want. The only problem with it is that sometimes backedges
>> + // for outer loops will be visited before backedges for inner loops.
>> + for (RegionNode *RN : TempOrder) {
>> + BasicBlock *BB = RN->getEntry();
>> + Loop *Loop = LI->getLoopFor(BB);
>> + if (!LoopBlocks.count(Loop)) {
>> + LoopBlocks[Loop] = 1;
>> + continue;
>> + }
>> + LoopBlocks[Loop]++;
>> + }
>> +
>> + unsigned CurrentLoopDepth = 0;
>> + Loop *CurrentLoop = nullptr;
>> + BBSet TempVisited;
>> + for (RNVector::iterator I = TempOrder.begin(), E = TempOrder.end(); I != E; ++I) {
>> + BasicBlock *BB = (*I)->getEntry();
>> + unsigned LoopDepth = LI->getLoopDepth(BB);
>> +
>> + if (std::find(Order.begin(), Order.end(), *I) != Order.end())
>> + continue;
>> +
>> + if (LoopDepth < CurrentLoopDepth) {
>> + // Make sure we have visited all blocks in this loop before moving back to
>> + // the outer loop.
>> +
>> + RNVector::iterator LoopI = I;
>> + while(LoopBlocks[CurrentLoop]) {
>> + LoopI++;
>> + BasicBlock *LoopBB = (*LoopI)->getEntry();
>> + if (LI->getLoopFor(LoopBB) == CurrentLoop) {
>> + LoopBlocks[CurrentLoop]--;
>> + Order.push_back(*LoopI);
>> + }
>> + }
>> + }
>> +
>> + CurrentLoop = LI->getLoopFor(BB);
>> + if (CurrentLoop) {
>> + LoopBlocks[CurrentLoop]--;
>> + }
>> +
>> + CurrentLoopDepth = LoopDepth;
>> + Order.push_back(*I);
>> }
>> +
>> + // This pass originally used a post-order traversal and then operated on
>> + // the list in reverse. Now that we are using a reverse post-order traversal
>> + // rather than re-working the whole pass to operate on the list in order,
>> + // we just reverse the list and continue to operate on it in reverse.
>> + std::reverse(Order.begin(), Order.end());
>> }
>>
>> /// \brief Determine the end of the loops
>> @@ -441,6 +497,10 @@ void StructurizeCFG::collectInfos() {
>> for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
>> OI != OE; ++OI) {
>>
>> + DEBUG(dbgs() << "Visiting: " <<
>> + ((*OI)->isSubRegion() ? "SubRegion with entry: " : "") <<
>> + (*OI)->getEntry()->getName() << " Loop Depth: " << LI->getLoopDepth((*OI)->getEntry()) << "\n");
>> +
>> // Analyze all the conditions leading to a node
>> gatherPredicates(*OI);
>>
>> diff --git a/test/Transforms/StructurizeCFG/nested-loop-order.ll b/test/Transforms/StructurizeCFG/nested-loop-order.ll
>> new file mode 100644
>> index 0000000..fee1ff0
>> --- /dev/null
>> +++ b/test/Transforms/StructurizeCFG/nested-loop-order.ll
>> @@ -0,0 +1,79 @@
>> +; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
>> +
>> +define void @main(float addrspace(1)* %out) {
>> +
>> +; CHECK: main_body:
>> +; CHECK: br label %LOOP.outer
>> +main_body:
>> + br label %LOOP.outer
>> +
>> +; CHECK: LOOP.outer:
>> +; CHECK: br label %LOOP
>> +LOOP.outer: ; preds = %ENDIF28, %main_body
>> + %temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ]
>> + %temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ]
>> + br label %LOOP
>> +
>> +; CHECK: LOOP:
>> +; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow
>> +LOOP: ; preds = %IF29, %LOOP.outer
>> + %temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ]
>> + %tmp20 = add i32 %temp4.0, 1
>> + %tmp22 = icmp sgt i32 %tmp20, 3
>> + br i1 %tmp22, label %ENDLOOP, label %ENDIF
>> +
>> +; CHECK: Flow3
>> +; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer
>> +
>> +; CHECK: ENDLOOP:
>> +; CHECK: ret void
>> +ENDLOOP: ; preds = %ENDIF28, %IF29, %LOOP
>> + %temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ]
>> + %tmp23 = icmp eq i32 %tmp20, 3
>> + %.45 = select i1 %tmp23, float 0.000000e+00, float 1.000000e+00
>> + store float %.45, float addrspace(1)* %out
>> + ret void
>> +
>> +; CHECK: ENDIF:
>> +; CHECK: br i1 %tmp31, label %IF29, label %Flow1
>> +ENDIF: ; preds = %LOOP
>> + %tmp31 = icmp sgt i32 %tmp20, 1
>> + br i1 %tmp31, label %IF29, label %ENDIF28
>> +
>> +; CHECK: Flow:
>> +; CHECK br i1 %{{[0-9]+}}, label %Flow, label %LOOP
>> +
>> +; CHECK: IF29:
>> +; CHECK: br label %Flow1
>> +IF29: ; preds = %ENDIF
>> + %tmp32 = icmp sgt i32 %tmp20, 2
>> + br i1 %tmp32, label %ENDLOOP, label %LOOP
>> +
>> +; CHECK: Flow1:
>> +; CHECK: br label %Flow
>> +
>> +; CHECK: Flow2:
>> +; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3
>> +
>> +; CHECK: ENDIF28:
>> +; CHECK: br label %Flow3
>> +ENDIF28: ; preds = %ENDIF
>> + %tmp35 = fadd float %temp8.0.ph, 1.0
>> + %tmp36 = icmp sgt i32 %tmp20, 2
>> + br i1 %tmp36, label %ENDLOOP, label %LOOP.outer
>> +}
>> +
>> +; Function Attrs: nounwind readnone
>> +declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
>> +
>> +; Function Attrs: readnone
>> +declare float @llvm.AMDIL.clamp.(float, float, float) #2
>> +
>> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
>> +
>> +attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
>> +attributes #1 = { nounwind readnone }
>> +attributes #2 = { readnone }
>> +
>> +!0 = !{!1, !1, i64 0, i32 1}
>> +!1 = !{!"const", null}
>> diff --git a/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll b/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
>> index 8ebd47a..668a1e9 100644
>> --- a/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
>> +++ b/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
>> @@ -11,28 +11,29 @@ bb:
>> bb3: ; preds = %bb7, %bb
>> %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ]
>> %tmp4 = fcmp ult float %arg1, 3.500000e+00
>> -; CHECK: br i1 %tmp4, label %bb7, label %Flow
>> +; CHECK: %0 = xor i1 %tmp4, true
>> +; CHECK: br i1 %0, label %bb5, label %Flow
>> br i1 %tmp4, label %bb7, label %bb5
>>
>> -; CHECK: Flow:
>> -; CHECK: br i1 %2, label %Flow1, label %bb3
>> -
>> -; CHECK: Flow1:
>> -; CHECK: br i1 %3, label %bb5, label %bb10
>> -
>> ; CHECK: bb5:
>> bb5: ; preds = %bb3
>> %tmp6 = fcmp olt float 0.000000e+00, %arg2
>> -; CHECK: br label %bb10
>> +; CHECK: br label %Flow
>> br i1 %tmp6, label %bb10, label %bb7
>>
>> +; CHECK: Flow:
>> +; CHECK: br i1 %3, label %bb7, label %Flow1
>> +
>> ; CHECK: bb7
>> bb7: ; preds = %bb5, %bb3
>> %tmp8 = add nuw nsw i64 %tmp, 1
>> %tmp9 = icmp slt i64 %tmp8, 5
>> -; CHECK: br label %Flow
>> +; CHECK: br label %Flow1
>> br i1 %tmp9, label %bb3, label %bb10
>>
>> +; CHECK: Flow1:
>> +; CHECK: br i1 %7, label %bb10, label %bb3
>> +
>> ; CHECK: bb10
>> bb10: ; preds = %bb7, %bb5
>> %tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ]
>> diff --git a/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll b/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
>> new file mode 100644
>> index 0000000..c48110e
>> --- /dev/null
>> +++ b/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
>> @@ -0,0 +1,117 @@
>> +; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
>> +
>> +; The structurize cfg pass used to do a post-order traversal to generate a list
>> +; of ; basic blocks and then operate on the list in reverse. This led to bugs,
>> +; because sometimes successors would be visited before their predecessors.
>> +; The fix for this was to do a reverse post-order traversal which is what the
>> +; algorithm requires.
>> +
>> +; Function Attrs: nounwind
>> +define void @test(float* nocapture %out, i32 %K1, float* nocapture readonly %nr) #0 {
>> +
>> +; CHECK: entry:
>> +; CHECK: br label %for.body
>> +entry:
>> + br label %for.body
>> +
>> +; CHECK: for.body:
>> +; CHECK: br i1 %{{[0-9]+}}, label %lor.lhs.false, label %Flow
>> +for.body: ; preds = %for.body.backedge, %entry
>> + %indvars.iv = phi i64 [ %indvars.iv.be, %for.body.backedge ], [ 1, %entry ]
>> + %best_val.027 = phi float [ %best_val.027.be, %for.body.backedge ], [ 5.000000e+01, %entry ]
>> + %prev_start.026 = phi i32 [ %tmp26, %for.body.backedge ], [ 0, %entry ]
>> + %best_count.025 = phi i32 [ %best_count.025.be, %for.body.backedge ], [ 0, %entry ]
>> + %tmp0 = trunc i64 %indvars.iv to i32
>> + %cmp1 = icmp eq i32 %tmp0, %K1
>> + br i1 %cmp1, label %if.then, label %lor.lhs.false
>> +
>> +; CHECK: lor.lhs.false:
>> +; CHECK: br label %Flow
>> +lor.lhs.false: ; preds = %for.body
>> + %arrayidx = getelementptr inbounds float* %nr, i64 %indvars.iv
>> + %tmp1 = load float* %arrayidx, align 4, !tbaa !7
>> + %tmp2 = add nsw i64 %indvars.iv, -1
>> + %arrayidx2 = getelementptr inbounds float* %nr, i64 %tmp2
>> + %tmp3 = load float* %arrayidx2, align 4, !tbaa !7
>> + %cmp3 = fcmp une float %tmp1, %tmp3
>> + br i1 %cmp3, label %if.then, label %for.body.1
>> +
>> +; CHECK: Flow:
>> +; CHECK: br i1 %{{[0-9]+}}, label %if.then, label %Flow1
>> +
>> +; CHECK: if.then:
>> +; CHECK: br label %Flow1
>> +if.then: ; preds = %lor.lhs.false, %for.body
>> + %sub4 = sub nsw i32 %tmp0, %prev_start.026
>> + %tmp4 = add nsw i64 %indvars.iv, -1
>> + %arrayidx8 = getelementptr inbounds float* %nr, i64 %tmp4
>> + %tmp5 = load float* %arrayidx8, align 4, !tbaa !7
>> + br i1 %cmp1, label %for.end, label %for.body.1
>> +
>> +; CHECK: for.end:
>> +; CHECK: ret void
>> +for.end: ; preds = %for.body.1, %if.then
>> + %best_val.0.lcssa = phi float [ %best_val.233, %for.body.1 ], [ %tmp5, %if.then ]
>> + store float %best_val.0.lcssa, float* %out, align 4, !tbaa !7
>> + ret void
>> +
>> +; CHECK: Flow1
>> +; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2
>> +
>> +; CHECK: for.body.1:
>> +; CHECK: br i1 %{{[0-9]+}}, label %for.body.6, label %Flow3
>> +for.body.1: ; preds = %if.then, %lor.lhs.false
>> + %best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ]
>> + %best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ]
>> + %indvars.iv.next.454 = add nsw i64 %indvars.iv, 5
>> + %tmp22 = trunc i64 %indvars.iv.next.454 to i32
>> + %cmp1.5 = icmp eq i32 %tmp22, %K1
>> + br i1 %cmp1.5, label %for.end, label %for.body.6
>> +
>> +; CHECK: Flow2:
>> +; CHECK: br i1 %{{[0-9]+}}, label %for.end, label %for.body
>> +
>> +; CHECK: for.body.6:
>> +; CHECK: br i1 %cmp5.6, label %if.then6.6, label %for.body.backedge
>> +for.body.6: ; preds = %for.body.1
>> + %indvars.iv.next.559 = add nsw i64 %indvars.iv, 6
>> + %tmp26 = trunc i64 %indvars.iv.next.559 to i32
>> + %sub4.6 = sub nsw i32 %tmp26, %tmp22
>> + %cmp5.6 = icmp slt i32 %best_count.231, %sub4.6
>> + br i1 %cmp5.6, label %if.then6.6, label %for.body.backedge
>> +
>> +; CHECK: if.then6.6
>> +; CHECK: br label %for.body.backedge
>> +if.then6.6: ; preds = %for.body.6
>> + %arrayidx8.6 = getelementptr inbounds float* %nr, i64 %indvars.iv.next.454
>> + %tmp29 = load float* %arrayidx8.6, align 4, !tbaa !7
>> + br label %for.body.backedge
>> +
>> +; CHECK: Flow3:
>> +; CHECK: br label %Flow2
>> +
>> +; CHECK: for.body.backedge:
>> +; CHECK: br label %Flow3
>> +for.body.backedge: ; preds = %if.then6.6, %for.body.6
>> + %best_val.027.be = phi float [ %tmp29, %if.then6.6 ], [ %best_val.233, %for.body.6 ]
>> + %best_count.025.be = phi i32 [ %sub4.6, %if.then6.6 ], [ %best_count.231, %for.body.6 ]
>> + %indvars.iv.be = add nsw i64 %indvars.iv, 7
>> + br label %for.body
>> +}
>> +
>> +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
>> +
>> +!opencl.kernels = !{!0}
>> +!llvm.ident = !{!6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6, !6}
>> +
>> +!0 = !{void (float*, i32, float*)* @test, !1, !2, !3, !4, !5}
>> +!1 = !{!"kernel_arg_addr_space", i32 1, i32 0, i32 1}
>> +!2 = !{!"kernel_arg_access_qual", !"none", !"none", !"none"}
>> +!3 = !{!"kernel_arg_type", !"float*", !"int", !"float*"}
>> +!4 = !{!"kernel_arg_base_type", !"float*", !"int", !"float*"}
>> +!5 = !{!"kernel_arg_type_qual", !"", !"", !""}
>> +!6 = !{!"clang version 3.7.0 (http://llvm.org/git/clang.git 00f6327cc326763eb3efc6068f6e4fc71ff49fb3) (llvm/trunk 226164)"}
>> +!7 = !{!8, !8, i64 0}
>> +!8 = !{!"float", !9, i64 0}
>> +!9 = !{!"omnipotent char", !10, i64 0}
>> +!10 = !{!"Simple C/C++ TBAA"}
>> --
>> 1.8.5.5
>>
>> From d1b88127785b20dfaefdc8418598716186f8095f Mon Sep 17 00:00:00 2001
>> From: Tom Stellard <thomas.stellard at amd.com>
>> Date: Tue, 20 Jan 2015 13:58:49 -0500
>> Subject: [PATCH 2/2] StructurizeCFG: Remove obsolete fix for loop backedge
>> detection
>>
>> This is no longer needed now that we are using a reverse post-order
>> traversal.
>> ---
>> lib/Transforms/Scalar/StructurizeCFG.cpp | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
>> index f0c4095..98abfb9 100644
>> --- a/lib/Transforms/Scalar/StructurizeCFG.cpp
>> +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
>> @@ -360,7 +360,7 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
>> for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
>> BasicBlock *Succ = Term->getSuccessor(i);
>>
>> - if (Visited.count(Succ) && LI->isLoopHeader(Succ) ) {
>> + if (Visited.count(Succ)) {
>> Loops[Succ] = BB;
>> }
>> }
>> --
>> 1.8.5.5
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list