[llvm-bugs] [Bug 32799] New: ARMv5 load codegen ignores alignment

Wed Apr 26 02:32:01 PDT 2017

https://bugs.llvm.org/show_bug.cgi?id=32799

            Bug ID: 32799
           Summary: ARMv5 load codegen ignores alignment
           Product: libraries
           Version: trunk
          Hardware: Other
                OS: Linux
            Status: NEW
          Severity: normal
          Priority: P
         Component: Backend: ARM
          Assignee: unassignedbugs at nondot.org
          Reporter: llvm at joakim.fea.st
                CC: llvm-bugs at lists.llvm.org

ldc, the llvm D compiler, uses the following function to read possibly
unaligned integers in its exception-handling code:

uint udata4_read(ref ubyte* addr)
{
    // read udata4 from possibly unaligned `addr`
    import core.stdc.string : memcpy;
    uint udata4;
    memcpy(&udata4, addr, udata4.sizeof);
    addr += udata4.sizeof;
    return udata4;
}
https://github.com/ldc-developers/druntime/blob/24ce3878d8debf6cca9bb4f0a55bad6480f8c5de/src/ldc/eh/common.d#L80

Unoptimized, it works fine and is turned into the following IR and ARM assembly
with these llvm options, "-mtriple=armv5tej-none-linux-gnueabi -float-abi=soft
-gcc=arm-linux-gnueabi-gcc":

IR

; Function Attrs: noinline nounwind optnone
define i32 @_D3ldc2eh6common11udata4_readFKPhZk(i8** dereferenceable(4) %addr)
local_unnamed_addr #2 comdat {
  %udata4 = alloca i32, align 4                   ; [#uses = 4, size/byte = 4]
  store i32 0, i32* %udata4
  %1 = bitcast i32* %udata4 to i8*                ; [#uses = 1]
  %2 = load i8*, i8** %addr                       ; [#uses = 1]
  %3 = call i8* @memcpy(i8* %1, i8* %2, i32 4) #0 ; [#uses = 0]
  %4 = load i8*, i8** %addr                       ; [#uses = 1]
  %5 = getelementptr i8, i8* %4, i32 4            ; [#uses = 1, type = i8*]
  store i8* %5, i8** %addr
  %6 = load i32, i32* %udata4                     ; [#uses = 0]
  %7 = load i32, i32* %udata4                     ; [#uses = 1]
  ret i32 %7
}

ASM

.section   
.text._D3ldc2eh6common11udata4_readFKPhZk,"axG",%progbits,_D3ldc2eh6common11udata4_readFKPhZk,comdat
    .globl  _D3ldc2eh6common11udata4_readFKPhZk
    .p2align    2
    .type   _D3ldc2eh6common11udata4_readFKPhZk,%function
_D3ldc2eh6common11udata4_readFKPhZk:
    .fnstart
    .save   {r4, lr}
    push    {r4, lr}
    .pad    #8
    sub sp, sp, #8
    mov r4, r0
    mov r0, #0
    str r0, [sp, #4]
    ldr r1, [r4]
    add r0, sp, #4
    mov r2, #4
    bl  memcpy
    ldr r0, [r4]
    add r0, r0, #4
    str r0, [r4]
    ldr r0, [sp, #4]
    add sp, sp, #8
    pop {r4, lr}
    bx  lr
.Lfunc_end1:
    .size   _D3ldc2eh6common11udata4_readFKPhZk,
.Lfunc_end1-_D3ldc2eh6common11udata4_readFKPhZk
    .cantunwind
    .fnend

Once aggressively optimized, it turns into this LL and IR, which causes bad
data to be read in and eventually errors out:

IR

; Function Attrs: norecurse nounwind
define i32 @_D3ldc2eh6common11udata4_readFKPhZk(i8** nocapture
dereferenceable(4) %addr) local_unnamed_addr #2 comdat {
  %1 = load i8*, i8** %addr, align 4              ; [#uses = 2]
  %2 = bitcast i8* %1 to i32*                     ; [#uses = 1]
  %3 = load i32, i32* %2, align 1                 ; [#uses = 1]
  %4 = getelementptr i8, i8* %1, i32 4            ; [#uses = 1, type = i8*]
  store i8* %4, i8** %addr, align 4
  ret i32 %3
}

ASM

.section   
.text._D3ldc2eh6common11udata4_readFKPhZk,"axG",%progbits,_D3ldc2eh6common11udata4_readFKPhZk,comdat
    .globl  _D3ldc2eh6common11udata4_readFKPhZk
    .p2align    2
    .type   _D3ldc2eh6common11udata4_readFKPhZk,%function
_D3ldc2eh6common11udata4_readFKPhZk:
    .fnstart
    ldr r2, [r0]
    ldr r1, [r2], #4
    str r2, [r0]
    mov r0, r1
    bx  lr
.Lfunc_end1:
    .size   _D3ldc2eh6common11udata4_readFKPhZk,
.Lfunc_end1-_D3ldc2eh6common11udata4_readFKPhZk
    .cantunwind
    .fnend

The problem appears to be that the load with align 1 from the latter IR is
turned into a ldr + 4 in the ASM, an instruction which doesn't allow unaligned
accesses on ARMv5:

The ARMv6 architecture introduced the first hardware support for unaligned
accesses. ARM11 and Cortex-A/R processors can deal with unaligned accesses in
hardware, removing the need for software routines.
Support for unaligned accesses is limited to a sub-set of load/store
instructions:
LDRB/LDRSB/STRB
LDRH/LDRSH/STRH
LDR/STR
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html

This bug was found by an ldc developer, more discussion in this github thread:

https://github.com/ldc-developers/ldc/issues/2058#issuecomment-296656483

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170426/f7dc85df/attachment-0001.html>