[PATCH] LLVM targeting aarch64 doesn't correctly produce aligned accesses for non-aligned data at -O0/fast-isel (-mno-unaligned-access)

Alexandros Lamprineas alexandros.lamprineas at arm.com
Wed Jun 10 08:32:01 PDT 2015


Hi t.p.northover, mcrosier,

{F556446}

Clang doesn't correctly generate loads in the presence of the -mno-unaligned-access flag. At -O0 it produces:
$ clang --target=aarch64-arm-none-eabi -mno-unaligned-access -O0 -S test.c -o-
        .text
        .file   "test.c"
        .globl  foo
        .align  2
        .type   foo, at function
foo:                                    // @foo
// BB#0:                                // %entry
        stp     x29, x30, [sp, #-16]!
        mov      x29, sp
        sub     sp, sp, #32             // =32
        adrp    x8, a1
        add     x8, x8, :lo12:a1
        movz    x2, #0x5
        sub     x9, x29, #8             // =8
        add     x10, sp, #16            // =16
        str     x0, [sp, #16]
        mov      x0, x9
        mov      x1, x10
        str     x8, [sp, #8]            // 8-byte Folded Spill
        bl      memcpy
        ldr     x8, [sp, #8]            // 8-byte Folded Reload
        ldur    w11, [x8, #1]
        ldur    w12, [x29, #-7]
        add      w0, w11, w12
        mov      sp, x29
        ldp     x29, x30, [sp], #16
        ret
...

At -O1 and above, it produces correct code, using ldrb's to access to non-aligned data:
clang --target=aarch64-arm-none-eabi -mno-unaligned-access -O1 -S test.c -o-
        .text
        .file   "test.c"
        .globl  foo
        .align  2
        .type   foo, at function
foo:                                    // @foo
// BB#0:                                // %entry
        adrp    x8, a1
        add     x8, x8, :lo12:a1
        ldrb    w9, [x8, #1]!
        ldrb    w10, [x8, #3]
        ldrb    w11, [x8, #2]
        ldrb    w8, [x8, #1]
        lsr     x12, x0, #8
        bfi     w11, w10, #8, #8
        bfi     w9, w8, #8, #8
        bfi     w9, w11, #16, #16
        add      w0, w9, w12
        ret
...

The root cause seems to be in fast-isel not producing unaligned access correctly for -mno-unaligned-access:
clang --target=aarch64-arm-none-eabi -mno-unaligned-access -O1 -mllvm -fast-isel -S test.c -o-
        .text
        .file   "test.c"
        .globl  foo
        .align  2
        .type   foo, at function
foo:                                    // @foo
// BB#0:                                // %entry
        adrp    x8, a1
        add     x8, x8, :lo12:a1
        ldur    w8, [x8, #1]
        lsr     x9, x0, #8
        add      w0, w8, w9
        ret
...

REPOSITORY
  rL LLVM

http://reviews.llvm.org/D10360

Files:
  lib/Target/AArch64/AArch64FastISel.cpp
  test/CodeGen/AArch64/arm64-strict-align.ll

Index: test/CodeGen/AArch64/arm64-strict-align.ll
===================================================================
--- test/CodeGen/AArch64/arm64-strict-align.ll
+++ test/CodeGen/AArch64/arm64-strict-align.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-no-strict-align | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT
+; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align -fast-isel | FileCheck %s --check-prefix=CHECK-STRICT
 
 define i32 @f0(i32* nocapture %p) nounwind {
 ; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2]
Index: lib/Target/AArch64/AArch64FastISel.cpp
===================================================================
--- lib/Target/AArch64/AArch64FastISel.cpp
+++ lib/Target/AArch64/AArch64FastISel.cpp
@@ -1678,6 +1678,9 @@
 
 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
                                    bool WantZExt, MachineMemOperand *MMO) {
+  if(!TLI.allowsMisalignedMemoryAccesses(VT))
+    return 0;
+
   // Simplify this down to something we can handle.
   if (!simplifyAddress(Addr, VT))
     return 0;
@@ -1962,6 +1965,9 @@
 
 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
                                 MachineMemOperand *MMO) {
+  if(!TLI.allowsMisalignedMemoryAccesses(VT))
+    return false;
+
   // Simplify this down to something we can handle.
   if (!simplifyAddress(Addr, VT))
     return false;

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D10360.27443.patch
Type: text/x-patch
Size: 1568 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150610/66e036d2/attachment.bin>


More information about the llvm-commits mailing list