[PATCH] D69897: Add #pragma clang loop vectorize_assume_alignment(n)

Wed Nov 20 08:20:58 PST 2019

Hello Michael,
Very sorry for the late reply, we had exams and assignments this week and I
had to read about _builtin_assume_aligned as I didn't come across this.

#pragma clang loop vectorize_assume_alignment(32)
> for(int i = 0;i < n; i++){
> a[i] = b[i] + i*i;
> }
>
 for this all-access inside the loop will be aligned to 32bit,
ex  IR

> for.cond:                                         ; preds = %for.inc,
> %entry
>   %5 = load i32, i32* %i, align 32, !llvm.access.group !2
>   %6 = load i32, i32* %n, align 32, !llvm.access.group !2
>   %cmp = icmp slt i32 %5, %6
>   br i1 %cmp, label %for.body, label %for.end
>
> for.body:                                         ; preds = %for.cond
>   %7 = load i32, i32* %i, align 32, !llvm.access.group !2
>   %8 = load i32, i32* %i, align 32, !llvm.access.group !2
>   %idxprom = sext i32 %8 to i64
>   %arrayidx = getelementptr inbounds i32, i32* %vla1, i64 %idxprom
>   store i32 %7, i32* %arrayidx, align 32, !llvm.access.group !2
>   br label %for.inc
>
> for.inc:                                          ; preds = %for.body
>   %9 = load i32, i32* %i, align 32, !llvm.access.group !2
>   %inc = add nsw i32 %9, 1
>   store i32 %inc, i32* %i, align 32, !llvm.access.group !2
>   br label %for.cond, !llvm.loop !3
>
You will not need to create pointers for every array(or operand you want to
perform the operation on).

> void mult(float* x, int size, float factor){
>   float* ax = (float*)__builtin_assume_aligned(x, 64);
>   for (int i = 0; i < size; ++i)
>      ax[i] *= factor;
> }
>
the IR generated for this :

> define void @mult(i32*, i32, float) #0 {
>   %4 = alloca i32*, align 8
>   %5 = alloca i32, align 4
>   %6 = alloca float, align 4
>   %7 = alloca i32*, align 8
>   %8 = alloca i32, align 4
>   store i32* %0, i32** %4, align 8
>   store i32 %1, i32* %5, align 4
>   store float %2, float* %6, align 4
>   %9 = load i32*, i32** %4, align 8
>   %10 = bitcast i32* %9 to i8*
>   %11 = ptrtoint i8* %10 to i64
>   %12 = and i64 %11, 63
>   %13 = icmp eq i64 %12, 0
>   call void @llvm.assume(i1 %13)
>   %14 = bitcast i8* %10 to i32*
>   store i32* %14, i32** %7, align 8
>   store i32 0, i32* %8, align 4
>   br label %15
>
> ; <label>:15:                                     ; preds = %29, %3
>   %16 = load i32, i32* %8, align 4
>   %17 = load i32, i32* %5, align 4
>   %18 = icmp slt i32 %16, %17
>   br i1 %18, label %19, label %32
>
> ; <label>:19:                                     ; preds = %15
>   %20 = load float, float* %6, align 4
>   %21 = load i32*, i32** %7, align 8
>   %22 = load i32, i32* %8, align 4
>   %23 = sext i32 %22 to i64
>   %24 = getelementptr inbounds i32, i32* %21, i64 %23
>   %25 = load i32, i32* %24, align 4
>   %26 = sitofp i32 %25 to float
>   %27 = fmul float %26, %20
>   %28 = fptosi float %27 to i32
>   store i32 %28, i32* %24, align 4
>   br label %29
>
> ; <label>:29:                                     ; preds = %19
>   %30 = load i32, i32* %8, align 4
>   %31 = add nsw i32 %30, 1
>   store i32 %31, i32* %8, align 4
>   br label %15
>
> ; <label>:32:                                     ; preds = %15
>   ret void
> }
>
the alignment is assumed whereas in #pragma it is set to the number
specified.
it'll be easier, and having a pragma for doing this will help as it's
provided in OMP and intel compilers.
Thank you, If I made any mistake please tell me.

Happy Mahto
CSE Undergrad, IIT Hyderabad

On Thu, Nov 14, 2019 at 10:32 PM Michael Kruse via Phabricator <
reviews at reviews.llvm.org> wrote:

> Meinersbur added a comment.
>
> Could you elaborate why this is better than `__builtin_assume_aligned`?
>
>
> Repository:
>   rG LLVM Github Monorepo
>
> CHANGES SINCE LAST ACTION
>   https://reviews.llvm.org/D69897/new/
>
> https://reviews.llvm.org/D69897
>
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20191120/39d018cf/attachment-0001.html>