[PATCH] D134930: [LoopInterchange] Do not interchange when a reduction phi in all subloops of the outer loop is not recognizable
Congzhe Cao via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 31 19:10:16 PDT 2022
congzhe added a subscriber: uabelho.
congzhe added a comment.
In D134930#3867354 <https://reviews.llvm.org/D134930#3867354>, @Meinersbur wrote:
> LGTM. Thank you.
>
> The additional check make sense.
Thank you Michael @Meinersbur for the review! I've updated the patch slightly to address your comments.
Hi Bardia @bmahjour, I remember you wanted to take a look at the problematic IR if we interchange the loopnest in `multilevel-partial-reduction.ll` (the test file in this patch). Here it is - if we interchange the outermost and the middle loop, after interchange the IR becomes the one as shown below. Like @uabelho said in https://reviews.llvm.org/D132055#3814831, before interchange we read `arrayidx14.promoted.i = load i32, ptr %arrayidx14.i, align 1`, then do 512 rounds of the inner loop and add the read elements, and then we store what we have so far in `store i32 %18, ptr %arrayidx14.i, align 1`: But after interchange we load, then add, but then we dont do the store, but do the load and execute the inner loop again, so we throw away the calculated addition and just read the same values again. I'd appreciate it if you have further comments on it.
define i32 @test7() {
entry:
br label %for.cond4.preheader.i.preheader
for.cond1.preheader.i.preheader: ; preds = %for.cond4.preheader.i
br label %for.cond1.preheader.i
for.cond1.preheader.i: ; preds = %for.cond1.preheader.i.preheader, %for.inc19.i
%i.011.i = phi i16 [ %inc20.i, %for.inc19.i ], [ 0, %for.cond1.preheader.i.preheader ]
br label %for.cond4.preheader.i.split
for.cond4.preheader.i.preheader: ; preds = %entry
br label %for.cond4.preheader.i
for.cond4.preheader.i: ; preds = %for.cond4.preheader.i.preheader, %middle.block
%j.010.i = phi i16 [ %inc17.i, %middle.block ], [ 0, %for.cond4.preheader.i.preheader ]
br label %for.cond1.preheader.i.preheader
for.cond4.preheader.i.split: ; preds = %for.cond1.preheader.i
%arrayidx14.i = getelementptr inbounds [2 x [4 x i32]], ptr @c, i16 0, i16 %i.011.i, i16 %j.010.i
%arrayidx14.promoted.i = load i32, ptr %arrayidx14.i, align 1
%0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %arrayidx14.promoted.i, i64 0
br label %vector.body
vector.body: ; preds = %vector.body, %for.cond4.preheader.i.split
%index = phi i16 [ 0, %for.cond4.preheader.i.split ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ %0, %for.cond4.preheader.i.split ], [ %16, %vector.body ]
%1 = or i16 %index, 1
%2 = or i16 %index, 2
%3 = or i16 %index, 3
%4 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %index, i16 %j.010.i
%5 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %1, i16 %j.010.i
%6 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %2, i16 %j.010.i
%7 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %3, i16 %j.010.i
%8 = load i32, ptr %4, align 1
%9 = load i32, ptr %5, align 1
%10 = load i32, ptr %6, align 1
%11 = load i32, ptr %7, align 1
%12 = insertelement <4 x i32> poison, i32 %8, i64 0
%13 = insertelement <4 x i32> %12, i32 %9, i64 1
%14 = insertelement <4 x i32> %13, i32 %10, i64 2
%15 = insertelement <4 x i32> %14, i32 %11, i64 3
%16 = add <4 x i32> %15, %vec.phi
%index.next = add nuw i16 %index, 4
%17 = icmp eq i16 %index.next, 512
br i1 %17, label %for.inc19.i, label %vector.body
middle.block: ; preds = %for.inc19.i
%.lcssa.lcssa = phi <4 x i32> [ %.lcssa, %for.inc19.i ]
%arrayidx14.i.lcssa = phi ptr [ %arrayidx14.i, %for.inc19.i ]
%18 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %.lcssa.lcssa)
store i32 %18, ptr %arrayidx14.i.lcssa, align 1
%inc17.i = add nuw nsw i16 %j.010.i, 1
%exitcond12.not.i = icmp eq i16 %inc17.i, 4
br i1 %exitcond12.not.i, label %test.exit, label %for.cond4.preheader.i
for.inc19.i: ; preds = %vector.body
%.lcssa = phi <4 x i32> [ %16, %vector.body ]
%inc20.i = add nuw nsw i16 %i.011.i, 1
%exitcond13.not.i = icmp eq i16 %inc20.i, 2
br i1 %exitcond13.not.i, label %middle.block, label %for.cond1.preheader.i
test.exit: ; preds = %middle.block
%19 = load i32, ptr @c, align 1
ret i32 %19
}
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D134930/new/
https://reviews.llvm.org/D134930
More information about the llvm-commits
mailing list