[llvm-dev] Is the correct behavior of getelementptr i192* foropt + llc -march=aarch64?
Mehdi Amini via llvm-dev
llvm-dev at lists.llvm.org
Fri Nov 11 18:44:13 PST 2016
> On Nov 11, 2016, at 6:05 PM, MITSUNARI Shigeo via llvm-dev <llvm-dev at lists.llvm.org> wrote:
>
> Hi Mehdi,
>
>>> No, My target is x86-64, x86, arm, aarch64, ..., then I'll avoid using i192* and datalayout.
>>
>> There is nothing specific with i192. You will likely run into issues by not specifying the right datalayout.
>>
>> The optimizations will always run with a datalayout: if you don’t specify one there will be a default one, which can cause problems on some target (like you saw on arm).
>> For instance, the optimizer will assume a pointer size and optimize based on this.
>
> I write a code without i192* as the following, then I get what I wanted.
And what we’re trying to tell you, is that this may “fix” *this* particular case, but it does not make it a correct solution though.
—
Mehdi
> I'll rewrite the other code like this.
>
> // load 192-bit data from %r2
> define i192 @load192(i64* %r2)
> {
> %r3 = load i64, i64* %r2
> %r4 = zext i64 %r3 to i128
> %r6 = getelementptr i64, i64* %r2, i32 1
> %r7 = load i64, i64* %r6
> %r8 = zext i64 %r7 to i128
> %r9 = shl i128 %r8, 64
> %r10 = or i128 %r4, %r9
> %r11 = zext i128 %r10 to i192
> %r13 = getelementptr i64, i64* %r2, i32 2
> %r14 = load i64, i64* %r13
> %r15 = zext i64 %r14 to i192
> %r16 = shl i192 %r15, 128
> %r17 = or i192 %r11, %r16
> ret i192 %r17
> }
>
> /*
> struct i192_t {
> uint64_t v[3];
> };
> void add(i192_t *y, const i192_t* x)
> {
> *y = x[0] + x[1]; // pseudo code
> }
> */
> define void @add(i64* noalias %r1, i64* noalias %r2)
> {
> %r3 = call i192 @load192(i64* %r2)
> %r5 = getelementptr i64, i64* %r2, i32 3
> %r6 = call i192 @load192(i64* %r5)
> %r7 = add i192 %r3, %r6
> %r9 = getelementptr i64, i64* %r1, i32 0
> %r10 = trunc i192 %r7 to i64
> store i64 %r10, i64* %r9
> %r11 = lshr i192 %r7, 64
> %r13 = getelementptr i64, i64* %r1, i32 1
> %r14 = trunc i192 %r11 to i64
> store i64 %r14, i64* %r13
> %r15 = lshr i192 %r11, 64
> %r17 = getelementptr i64, i64* %r1, i32 2
> %r18 = trunc i192 %r15 to i64
> store i64 %r18, i64* %r17
> ret void
> }
>
> % opt-3.8 -O3 a.ll -o - | llc-3.8 -O3 -o - -march=x86-64
> add:
> movq 16(%rsi), %rax
> movq 24(%rsi), %rcx
> movq 32(%rsi), %rdx
> addq (%rsi), %rcx
> adcq 8(%rsi), %rdx
> adcq 40(%rsi), %rax
> movq %rcx, (%rdi)
> movq %rdx, 8(%rdi)
> movq %rax, 16(%rdi)
> retq
>
> % opt-3.8 -O3 a.ll -o - | llc-3.8 -O3 -o - -march=aarch64
> add:
> ldp x8, x9, [x1]
> ldp x10, x11, [x1, #24]
> ldr x12, [x1, #16]
> ldr x13, [x1, #40]
> adds x8, x10, x8
> adcs x9, x11, x9
> stp x8, x9, [x0]
> adcs x8, x13, x12
> str x8, [x0, #16]
> ret
>
> Yours,
> Shigeo
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
More information about the llvm-dev
mailing list