[LLVMdev] loop vectorizer and storing to uniform addresses
Frank Winter
fwinter at jlab.org
Fri Nov 8 05:41:51 PST 2013
I changed the input C to using a 64 bit type for the loop index (this
eliminates 'sext' instructions in the IR)
Here the IR produced with clang -O0
define float @foo(i64 %start, i64 %end, float* %A) #0 {
entry:
%start.addr = alloca i64, align 8
%end.addr = alloca i64, align 8
%A.addr = alloca float*, align 8
%sum = alloca [4 x float], align 16
%i = alloca i64, align 8
%q = alloca i64, align 8
store i64 %start, i64* %start.addr, align 8
store i64 %end, i64* %end.addr, align 8
store float* %A, float** %A.addr, align 8
%0 = bitcast [4 x float]* %sum to i8*
call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 16, i32 16, i1 false)
%1 = load i64* %start.addr, align 8
store i64 %1, i64* %i, align 8
br label %for.cond
for.cond: ; preds = %for.inc6,
%entry
%2 = load i64* %i, align 8
%3 = load i64* %end.addr, align 8
%cmp = icmp slt i64 %2, %3
br i1 %cmp, label %for.body, label %for.end8
for.body: ; preds = %for.cond
store i64 0, i64* %q, align 8
br label %for.cond1
for.cond1: ; preds = %for.inc,
%for.body
%4 = load i64* %q, align 8
%cmp2 = icmp slt i64 %4, 4
br i1 %cmp2, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%5 = load i64* %i, align 8
%mul = mul nsw i64 %5, 4
%6 = load i64* %q, align 8
%add = add nsw i64 %mul, %6
%7 = load float** %A.addr, align 8
%arrayidx = getelementptr inbounds float* %7, i64 %add
%8 = load float* %arrayidx, align 4
%9 = load i64* %q, align 8
%arrayidx4 = getelementptr inbounds [4 x float]* %sum, i32 0, i64 %9
%10 = load float* %arrayidx4, align 4
%add5 = fadd float %10, %8
store float %add5, float* %arrayidx4, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%11 = load i64* %q, align 8
%inc = add nsw i64 %11, 1
store i64 %inc, i64* %q, align 8
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc6
for.inc6: ; preds = %for.end
%12 = load i64* %i, align 8
%inc7 = add nsw i64 %12, 1
store i64 %inc7, i64* %i, align 8
br label %for.cond
for.end8: ; preds = %for.cond
%arrayidx9 = getelementptr inbounds [4 x float]* %sum, i32 0, i64 0
%13 = load float* %arrayidx9, align 4
%arrayidx10 = getelementptr inbounds [4 x float]* %sum, i32 0, i64 1
%14 = load float* %arrayidx10, align 4
%add11 = fadd float %13, %14
%arrayidx12 = getelementptr inbounds [4 x float]* %sum, i32 0, i64 2
%15 = load float* %arrayidx12, align 4
%add13 = fadd float %add11, %15
%arrayidx14 = getelementptr inbounds [4 x float]* %sum, i32 0, i64 3
%16 = load float* %arrayidx14, align 4
%add15 = fadd float %add13, %16
ret float %add15
}
Thus, the inner loop is not unrolled.
opt -basicaa -loop-vectorize -debug-only=loop-vectorize
-vectorizer-min-trip-count=4 -S sum.ll
LV: Checking a loop in "foo"
LV: Found a loop: for.cond1
LV: SCEV could not compute the loop exit count.
LV: Not vectorizing.
opt -basicaa -gvn -loop-vectorize -debug-only=loop-vectorize
-vectorizer-min-trip-count=4 -S sum.ll
LV: Checking a loop in "foo"
LV: Found a loop: for.cond1
LV: Found an induction variable.
LV: We don't allow storing to uniform addresses
LV: Can't vectorize due to memory conflicts
LV: Not vectorizing.
Frank
On 08/11/13 02:49, Renato Golin wrote:
> On 7 November 2013 17:18, Frank Winter <fwinter at jlab.org
> <mailto:fwinter at jlab.org>> wrote:
>
> LV: We don't allow storing to uniform addresses
>
>
> This is triggering because it didn't recognize as a reduction variable
> during the canVectorizeInstrs() but did recognize that sum[q] is loop
> invariant in canVectorizeMemory().
>
> I'm guessing the nested loop was unrolled because of the low
> trip-count, and removed, so it ended up as:
>
> float foo( int start , int end , float * A )
> {
> float sum[4] = {0.,0.,0.,0.};
> for (int i = start ; i < end ; ++i ) {
> sum[0] += A[i*4+0];
> sum[1] += A[i*4+1];
> sum[2] += A[i*4+2];
> sum[3] += A[i*4+3];
> }
> return sum[0]+sum[1]+sum[2]+sum[3];
> }
>
> but, for some reason, sum[q] wasn't recognized as a reduction
> variable, maybe because it was an array of reduction variables?
>
> Having the IR would certainly help...
>
> cheers,
> --renato
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20131108/4aea7fa7/attachment.html>
More information about the llvm-dev
mailing list