[llvm-dev] Is the correct behavior of getelementptr i192* foropt + llc -march=aarch64?

Fri Nov 11 18:44:13 PST 2016

> On Nov 11, 2016, at 6:05 PM, MITSUNARI Shigeo via llvm-dev <llvm-dev at lists.llvm.org> wrote:
> 
> Hi Mehdi,
> 
>>> No, My target is x86-64, x86, arm, aarch64, ..., then I'll avoid using i192* and datalayout.
>> 
>> There is nothing specific with i192. You will likely run into issues by not specifying the right datalayout.
>> 
>> The optimizations will always run with a datalayout: if you don’t specify one there will be a default one, which can cause problems on some target (like you saw on arm). 
>> For instance, the optimizer will assume a pointer size and optimize based on this.
> 
> I write a code without i192* as the following, then I get what I wanted.

And what we’re trying to tell you, is that this may “fix” *this* particular case, but it does not make it a correct solution though. 

— 
Mehdi

> I'll rewrite the other code like this.
> 
> // load 192-bit data from %r2
> define i192 @load192(i64* %r2)
> {
> %r3 = load i64, i64* %r2
> %r4 = zext i64 %r3 to i128
> %r6 = getelementptr i64, i64* %r2, i32 1
> %r7 = load i64, i64* %r6
> %r8 = zext i64 %r7 to i128
> %r9 = shl i128 %r8, 64
> %r10 = or i128 %r4, %r9
> %r11 = zext i128 %r10 to i192
> %r13 = getelementptr i64, i64* %r2, i32 2
> %r14 = load i64, i64* %r13
> %r15 = zext i64 %r14 to i192
> %r16 = shl i192 %r15, 128
> %r17 = or i192 %r11, %r16
> ret i192 %r17
> }
> 
> /*
>  struct i192_t {
>    uint64_t v[3];
>  };
>  void add(i192_t *y, const i192_t* x)
>  {
>    *y = x[0] + x[1]; // pseudo code
>  }
> */
> define void @add(i64* noalias  %r1, i64* noalias  %r2)
> {
> %r3 = call i192 @load192(i64* %r2)
> %r5 = getelementptr i64, i64* %r2, i32 3
> %r6 = call i192 @load192(i64* %r5)
> %r7 = add i192 %r3, %r6
> %r9 = getelementptr i64, i64* %r1, i32 0
> %r10 = trunc i192 %r7 to i64
> store i64 %r10, i64* %r9
> %r11 = lshr i192 %r7, 64
> %r13 = getelementptr i64, i64* %r1, i32 1
> %r14 = trunc i192 %r11 to i64
> store i64 %r14, i64* %r13
> %r15 = lshr i192 %r11, 64
> %r17 = getelementptr i64, i64* %r1, i32 2
> %r18 = trunc i192 %r15 to i64
> store i64 %r18, i64* %r17
> ret void
> }
> 
> % opt-3.8 -O3 a.ll -o - | llc-3.8 -O3 -o - -march=x86-64
> add:
>        movq    16(%rsi), %rax
>        movq    24(%rsi), %rcx
>        movq    32(%rsi), %rdx
>        addq    (%rsi), %rcx
>        adcq    8(%rsi), %rdx
>        adcq    40(%rsi), %rax
>        movq    %rcx, (%rdi)
>        movq    %rdx, 8(%rdi)
>        movq    %rax, 16(%rdi)
>        retq
> 
> % opt-3.8 -O3 a.ll -o - | llc-3.8 -O3 -o - -march=aarch64
> add:
>        ldp             x8, x9, [x1]
>        ldp     x10, x11, [x1, #24]
>        ldr     x12, [x1, #16]
>        ldr     x13, [x1, #40]
>        adds            x8, x10, x8
>        adcs    x9, x11, x9
>        stp             x8, x9, [x0]
>        adcs    x8, x13, x12
>        str     x8, [x0, #16]
>        ret
> 
> Yours,
> Shigeo
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev