[PATCH] D43237: [LoopInterchange] Allow some loops with PHI nodes in the exit block.
Florian Hahn via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 11 10:29:51 PDT 2018
fhahn added a comment.
Thank you very much for having a look Eli. PHI nodes in the outer loop exit block are used as a proxy for uses of values produced in the loop nest. As we are doing interchanging ATM, I think it is only safe to interchange, if the values used outside of the loop nest are produced in the inner loop. Before and after interchanging, the inner loop block will be executed if both inner and outer loop conditions are true.
The same might not be true for values produced in the outer loop I think. For example, if the inner loop body is never executed. Consider the example below, where the outer loop induction variable is returned. In case ` br i1 undef, label %for.body4, label %for.cond.cleanup3` is always false, we would execute the outer loop latch 1024 times and return 1024. In the interchanged version, we would never execute the outer loop latch, and it is not clear what the values the PHI node should use I think.
Please let me know if that makes sense :)
define i64 @no_deps_interchange([1024 x i32]* nocapture %Arr, i32 %k, i64 %B) {
entry:
br label %for.body
for.body: ; preds = %entry, %for.cond.cleanup3
%indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.cond.cleanup3 ]
br i1 undef, label %for.body4, label %for.cond.cleanup3
for.body4: ; preds = %for.body, %for.body4
%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body4 ]
%arrayidx6 = getelementptr inbounds [1024 x i32], [1024 x i32]* %Arr, i64 %indvars.iv, i64 %indvars.iv19
store i32 0, i32* %arrayidx6, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, %B
br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
for.cond.cleanup3: ; preds = %for.body4
%indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
%exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
br i1 %exitcond21, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup3
ret i64 %indvars.iv.next20
}
Which, given we allow PHI nodes with values produced in the outer loop latch, produces the following code after interchanging
define i64 @no_deps_interchange([1024 x i32]* nocapture %Arr, i32 %k, i64 %B) {
entry:
br label %for.body4.preheader
for.body.preheader: ; preds = %for.body4
br label %for.body
for.body: ; preds = %for.body.preheader, %for.cond.cleanup3
%indvars.iv19 = phi i64 [ %indvars.iv.next20, %for.cond.cleanup3 ], [ 0, %for.body.preheader ]
br i1 undef, label %for.body4.split1, label %for.cond.cleanup
for.body4.preheader: ; preds = %entry
br label %for.body4
for.body4: ; preds = %for.body4.preheader, %for.body4.split
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body4.split ], [ 0, %for.body4.preheader ]
br label %for.body.preheader
for.body4.split1: ; preds = %for.body
%arrayidx6 = getelementptr inbounds [1024 x i32], [1024 x i32]* %Arr, i64 %indvars.iv, i64 %indvars.iv19
store i32 0, i32* %arrayidx6, align 4
br label %for.cond.cleanup3.loopexit
for.body4.split: ; preds = %for.cond.cleanup3
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, %B
br i1 %exitcond, label %for.body4, label %for.cond.cleanup
for.cond.cleanup3.loopexit: ; preds = %for.body4.split1
br label %for.cond.cleanup3
for.cond.cleanup3: ; preds = %for.cond.cleanup3.loopexit
%indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
%exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
br i1 %exitcond21, label %for.body, label %for.body4.split
for.cond.cleanup: ; preds = %for.body4.split, %for.body
%indvars.iv.next20.lcssa = phi i64 [ %indvars.iv.next20, %for.body4.split ] ; invalid PHI node
ret i64 %indvars.iv.next20.lcssa
}
https://reviews.llvm.org/D43237
More information about the llvm-commits
mailing list