[llvm-dev] Is the correct behavior of getelementptr i192* foropt + llc -march=aarch64?
MITSUNARI Shigeo via llvm-dev
llvm-dev at lists.llvm.org
Fri Nov 11 18:05:24 PST 2016
Hi Mehdi,
>> No, My target is x86-64, x86, arm, aarch64, ..., then I'll avoid using i192* and datalayout.
>
>There is nothing specific with i192. You will likely run into issues by not specifying the right datalayout.
>
>The optimizations will always run with a datalayout: if you don’t specify one there will be a default one, which can cause problems on some target (like you saw on arm).
>For instance, the optimizer will assume a pointer size and optimize based on this.
I write a code without i192* as the following, then I get what I wanted.
I'll rewrite the other code like this.
// load 192-bit data from %r2
define i192 @load192(i64* %r2)
{
%r3 = load i64, i64* %r2
%r4 = zext i64 %r3 to i128
%r6 = getelementptr i64, i64* %r2, i32 1
%r7 = load i64, i64* %r6
%r8 = zext i64 %r7 to i128
%r9 = shl i128 %r8, 64
%r10 = or i128 %r4, %r9
%r11 = zext i128 %r10 to i192
%r13 = getelementptr i64, i64* %r2, i32 2
%r14 = load i64, i64* %r13
%r15 = zext i64 %r14 to i192
%r16 = shl i192 %r15, 128
%r17 = or i192 %r11, %r16
ret i192 %r17
}
/*
struct i192_t {
uint64_t v[3];
};
void add(i192_t *y, const i192_t* x)
{
*y = x[0] + x[1]; // pseudo code
}
*/
define void @add(i64* noalias %r1, i64* noalias %r2)
{
%r3 = call i192 @load192(i64* %r2)
%r5 = getelementptr i64, i64* %r2, i32 3
%r6 = call i192 @load192(i64* %r5)
%r7 = add i192 %r3, %r6
%r9 = getelementptr i64, i64* %r1, i32 0
%r10 = trunc i192 %r7 to i64
store i64 %r10, i64* %r9
%r11 = lshr i192 %r7, 64
%r13 = getelementptr i64, i64* %r1, i32 1
%r14 = trunc i192 %r11 to i64
store i64 %r14, i64* %r13
%r15 = lshr i192 %r11, 64
%r17 = getelementptr i64, i64* %r1, i32 2
%r18 = trunc i192 %r15 to i64
store i64 %r18, i64* %r17
ret void
}
% opt-3.8 -O3 a.ll -o - | llc-3.8 -O3 -o - -march=x86-64
add:
movq 16(%rsi), %rax
movq 24(%rsi), %rcx
movq 32(%rsi), %rdx
addq (%rsi), %rcx
adcq 8(%rsi), %rdx
adcq 40(%rsi), %rax
movq %rcx, (%rdi)
movq %rdx, 8(%rdi)
movq %rax, 16(%rdi)
retq
% opt-3.8 -O3 a.ll -o - | llc-3.8 -O3 -o - -march=aarch64
add:
ldp x8, x9, [x1]
ldp x10, x11, [x1, #24]
ldr x12, [x1, #16]
ldr x13, [x1, #40]
adds x8, x10, x8
adcs x9, x11, x9
stp x8, x9, [x0]
adcs x8, x13, x12
str x8, [x0, #16]
ret
Yours,
Shigeo
More information about the llvm-dev
mailing list