[llvm-dev] persuading licm to do the right thing
Mehdi Amini via llvm-dev
llvm-dev at lists.llvm.org
Tue Dec 8 23:21:18 PST 2015
Hi Preston,
> On Dec 8, 2015, at 10:56 PM, Preston Briggs via llvm-dev <llvm-dev at lists.llvm.org> wrote:
>
> When I compile two different modules using
>
> clang -O -S -emit-llvm
>
> I get different .ll files, no surprise.
>
> The first looks like
>
> double *v;
>
> double zap(long n) {
> double sum = 0;
> for (long i = 0; i < n; i++)
> sum += v[i];
> return sum;
> }
>
> yielding
>
> @v = common global double* null, align 8
>
> ; Function Attrs: nounwind readonly uwtable
> define double @zap(i64 %n) #0 {
> entry:
> %cmp4 = icmp sgt i64 %n, 0
> br i1 %cmp4, label %for.body.lr.ph <http://for.body.lr.ph/>, label %for.end
>
> for.body.lr.ph <http://for.body.lr.ph/>: ; preds = %entry
> %0 = load double** @v, align 8, !tbaa !1
> br label %for.body
>
> for.body: ; preds = %for.body, %for.body.lr.ph <http://for.body.lr.ph/>
> %i.06 = phi i64 [ 0, %for.body.lr.ph <http://for.body.lr.ph/> ], [ %inc, %for.body ]
> %sum.05 = phi double [ 0.000000e+00, %for.body.lr.ph <http://for.body.lr.ph/> ], [ %add, %for.body ]
> %arrayidx = getelementptr inbounds double* %0, i64 %i.06
> %1 = load double* %arrayidx, align 8, !tbaa !5
> %add = fadd double %sum.05, %1
> %inc = add nsw i64 %i.06, 1
>
> %exitcond = icmp eq i64 %inc, %n
> br i1 %exitcond, label %for.end, label %for.body
>
> for.end: ; preds = %for.body, %entry
> %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
> ret double %sum.0.lcssa
> }
>
> and the second looks like
>
> double v[10000];
>
> double zap(long n) {
> double sum = 0;
> for (long i = 0; i < n; i++)
> sum += v[i];
> return sum;
> }
>
> yielding
>
> ; ModuleID = 'z.c'
> target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> @v = common global [10000 x double] zeroinitializer, align 16
>
> ; Function Attrs: nounwind readonly uwtable
> define double @zap(i64 %n) #0 {
> entry:
> %cmp4 = icmp sgt i64 %n, 0
> br i1 %cmp4, label %for.body, label %for.end
>
> for.body: ; preds = %entry, %for.body
> %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
> %sum.05 = phi double [ %add, %for.body ], [ 0.000000e+00, %entry ]
> %arrayidx = getelementptr inbounds [10000 x double]* @v, i64 0, i64 %i.06
> %0 = load double* %arrayidx, align 8, !tbaa !1
> %add = fadd double %sum.05, %0
> %inc = add nsw i64 %i.06, 1
> %exitcond = icmp eq i64 %inc, %n
> br i1 %exitcond, label %for.end, label %for.body
>
> for.end: ; preds = %for.body, %entry
> %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
> ret double %sum.0.lcssa
> }
>
> attributes #0 = { nounwind readonly uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
>
> !llvm.ident = !{!0}
>
> !0 = metadata !{metadata !"Clang Front-End version 3.4.1 (tags/RELEASE_34/final)"}
> !1 = metadata !{metadata !2, metadata !2, i64 0}
> !2 = metadata !{metadata !"double", metadata !3, i64 0}
> !3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
> !4 = metadata !{metadata !"Simple C/C++ TBAA"}
>
> (I included all the metadata and such for the 2nd case, on the off chance it matters.)
>
> Is there any way I can convince licm (or something) to rip open the GEP and hoist the reference to @v outside the loop, similar to the first example?
I believe that in the second case, there is no need to load the address of v as it is constant. However you have a constant address to an array, which is represented by [10000 x double]* @v in the IR, which requires to use the two-level GEP.
You “could” manage to represent it this way:
define double @zap(i64 %n) #0 {
entry:
%cmp6 = icmp sgt i64 %n, 0
%hoisted = bitcast [10000 x double]* @v to double*
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body
%add.lcssa = phi double [ %add, %for.body ]
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
%sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ]
ret double %sum.0.lcssa
for.body: ; preds = %for.body.preheader, %for.body
%i.08 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%sum.07 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
%arrayidx = getelementptr double, double* %hoisted, i64 %i.08
%0 = load double, double* %arrayidx, align 8, !tbaa !2
%add = fadd double %sum.07, %0
%inc = add nuw nsw i64 %i.08, 1
%exitcond = icmp eq i64 %inc, %n
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}
However instcombine will recanonicalize it like it was originally.
Since it is a GEP that operate on a constant address, this shouldn’t matter, why would you want to split this?
Best,
—
Mehdi
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20151208/4541186a/attachment.html>
More information about the llvm-dev
mailing list