[PATCH] LLVM targeting aarch64 doesn't correctly produce aligned accesses for non-aligned data at -O0/fast-isel (-mno-unaligned-access)
Alexandros Lamprineas
alexandros.lamprineas at arm.com
Wed Jun 10 08:32:01 PDT 2015
Hi t.p.northover, mcrosier,
{F556446}
Clang doesn't correctly generate loads in the presence of the -mno-unaligned-access flag. At -O0 it produces:
$ clang --target=aarch64-arm-none-eabi -mno-unaligned-access -O0 -S test.c -o-
.text
.file "test.c"
.globl foo
.align 2
.type foo, at function
foo: // @foo
// BB#0: // %entry
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #32 // =32
adrp x8, a1
add x8, x8, :lo12:a1
movz x2, #0x5
sub x9, x29, #8 // =8
add x10, sp, #16 // =16
str x0, [sp, #16]
mov x0, x9
mov x1, x10
str x8, [sp, #8] // 8-byte Folded Spill
bl memcpy
ldr x8, [sp, #8] // 8-byte Folded Reload
ldur w11, [x8, #1]
ldur w12, [x29, #-7]
add w0, w11, w12
mov sp, x29
ldp x29, x30, [sp], #16
ret
...
At -O1 and above, it produces correct code, using ldrb's to access to non-aligned data:
clang --target=aarch64-arm-none-eabi -mno-unaligned-access -O1 -S test.c -o-
.text
.file "test.c"
.globl foo
.align 2
.type foo, at function
foo: // @foo
// BB#0: // %entry
adrp x8, a1
add x8, x8, :lo12:a1
ldrb w9, [x8, #1]!
ldrb w10, [x8, #3]
ldrb w11, [x8, #2]
ldrb w8, [x8, #1]
lsr x12, x0, #8
bfi w11, w10, #8, #8
bfi w9, w8, #8, #8
bfi w9, w11, #16, #16
add w0, w9, w12
ret
...
The root cause seems to be in fast-isel not producing unaligned access correctly for -mno-unaligned-access:
clang --target=aarch64-arm-none-eabi -mno-unaligned-access -O1 -mllvm -fast-isel -S test.c -o-
.text
.file "test.c"
.globl foo
.align 2
.type foo, at function
foo: // @foo
// BB#0: // %entry
adrp x8, a1
add x8, x8, :lo12:a1
ldur w8, [x8, #1]
lsr x9, x0, #8
add w0, w8, w9
ret
...
REPOSITORY
rL LLVM
http://reviews.llvm.org/D10360
Files:
lib/Target/AArch64/AArch64FastISel.cpp
test/CodeGen/AArch64/arm64-strict-align.ll
Index: test/CodeGen/AArch64/arm64-strict-align.ll
===================================================================
--- test/CodeGen/AArch64/arm64-strict-align.ll
+++ test/CodeGen/AArch64/arm64-strict-align.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-no-strict-align | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT
+; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align -fast-isel | FileCheck %s --check-prefix=CHECK-STRICT
define i32 @f0(i32* nocapture %p) nounwind {
; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2]
Index: lib/Target/AArch64/AArch64FastISel.cpp
===================================================================
--- lib/Target/AArch64/AArch64FastISel.cpp
+++ lib/Target/AArch64/AArch64FastISel.cpp
@@ -1678,6 +1678,9 @@
unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
bool WantZExt, MachineMemOperand *MMO) {
+ if(!TLI.allowsMisalignedMemoryAccesses(VT))
+ return 0;
+
// Simplify this down to something we can handle.
if (!simplifyAddress(Addr, VT))
return 0;
@@ -1962,6 +1965,9 @@
bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
MachineMemOperand *MMO) {
+ if(!TLI.allowsMisalignedMemoryAccesses(VT))
+ return false;
+
// Simplify this down to something we can handle.
if (!simplifyAddress(Addr, VT))
return false;
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D10360.27443.patch
Type: text/x-patch
Size: 1568 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150610/66e036d2/attachment.bin>
More information about the llvm-commits
mailing list