<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - ARMv5 load codegen ignores alignment"
   href="https://bugs.llvm.org/show_bug.cgi?id=32799">32799</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>ARMv5 load codegen ignores alignment
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>Other
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: ARM
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>llvm@joakim.fea.st
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>ldc, the llvm D compiler, uses the following function to read possibly
unaligned integers in its exception-handling code:

uint udata4_read(ref ubyte* addr)
{
    // read udata4 from possibly unaligned `addr`
    import core.stdc.string : memcpy;
    uint udata4;
    memcpy(&udata4, addr, udata4.sizeof);
    addr += udata4.sizeof;
    return udata4;
}
<a href="https://github.com/ldc-developers/druntime/blob/24ce3878d8debf6cca9bb4f0a55bad6480f8c5de/src/ldc/eh/common.d#L80">https://github.com/ldc-developers/druntime/blob/24ce3878d8debf6cca9bb4f0a55bad6480f8c5de/src/ldc/eh/common.d#L80</a>

Unoptimized, it works fine and is turned into the following IR and ARM assembly
with these llvm options, "-mtriple=armv5tej-none-linux-gnueabi -float-abi=soft
-gcc=arm-linux-gnueabi-gcc":

IR

; Function Attrs: noinline nounwind optnone
define i32 @_D3ldc2eh6common11udata4_readFKPhZk(i8** dereferenceable(4) %addr)
local_unnamed_addr #2 comdat {
  %udata4 = alloca i32, align 4                   ; [#uses = 4, size/byte = 4]
  store i32 0, i32* %udata4
  %1 = bitcast i32* %udata4 to i8*                ; [#uses = 1]
  %2 = load i8*, i8** %addr                       ; [#uses = 1]
  %3 = call i8* @memcpy(i8* %1, i8* %2, i32 4) #0 ; [#uses = 0]
  %4 = load i8*, i8** %addr                       ; [#uses = 1]
  %5 = getelementptr i8, i8* %4, i32 4            ; [#uses = 1, type = i8*]
  store i8* %5, i8** %addr
  %6 = load i32, i32* %udata4                     ; [#uses = 0]
  %7 = load i32, i32* %udata4                     ; [#uses = 1]
  ret i32 %7
}

ASM

.section   
.text._D3ldc2eh6common11udata4_readFKPhZk,"axG",%progbits,_D3ldc2eh6common11udata4_readFKPhZk,comdat
    .globl  _D3ldc2eh6common11udata4_readFKPhZk
    .p2align    2
    .type   _D3ldc2eh6common11udata4_readFKPhZk,%function
_D3ldc2eh6common11udata4_readFKPhZk:
    .fnstart
    .save   {r4, lr}
    push    {r4, lr}
    .pad    #8
    sub sp, sp, #8
    mov r4, r0
    mov r0, #0
    str r0, [sp, #4]
    ldr r1, [r4]
    add r0, sp, #4
    mov r2, #4
    bl  memcpy
    ldr r0, [r4]
    add r0, r0, #4
    str r0, [r4]
    ldr r0, [sp, #4]
    add sp, sp, #8
    pop {r4, lr}
    bx  lr
.Lfunc_end1:
    .size   _D3ldc2eh6common11udata4_readFKPhZk,
.Lfunc_end1-_D3ldc2eh6common11udata4_readFKPhZk
    .cantunwind
    .fnend

Once aggressively optimized, it turns into this LL and IR, which causes bad
data to be read in and eventually errors out:

IR

; Function Attrs: norecurse nounwind
define i32 @_D3ldc2eh6common11udata4_readFKPhZk(i8** nocapture
dereferenceable(4) %addr) local_unnamed_addr #2 comdat {
  %1 = load i8*, i8** %addr, align 4              ; [#uses = 2]
  %2 = bitcast i8* %1 to i32*                     ; [#uses = 1]
  %3 = load i32, i32* %2, align 1                 ; [#uses = 1]
  %4 = getelementptr i8, i8* %1, i32 4            ; [#uses = 1, type = i8*]
  store i8* %4, i8** %addr, align 4
  ret i32 %3
}

ASM

.section   
.text._D3ldc2eh6common11udata4_readFKPhZk,"axG",%progbits,_D3ldc2eh6common11udata4_readFKPhZk,comdat
    .globl  _D3ldc2eh6common11udata4_readFKPhZk
    .p2align    2
    .type   _D3ldc2eh6common11udata4_readFKPhZk,%function
_D3ldc2eh6common11udata4_readFKPhZk:
    .fnstart
    ldr r2, [r0]
    ldr r1, [r2], #4
    str r2, [r0]
    mov r0, r1
    bx  lr
.Lfunc_end1:
    .size   _D3ldc2eh6common11udata4_readFKPhZk,
.Lfunc_end1-_D3ldc2eh6common11udata4_readFKPhZk
    .cantunwind
    .fnend

The problem appears to be that the load with align 1 from the latter IR is
turned into a ldr + 4 in the ASM, an instruction which doesn't allow unaligned
accesses on ARMv5:

The ARMv6 architecture introduced the first hardware support for unaligned
accesses. ARM11 and Cortex-A/R processors can deal with unaligned accesses in
hardware, removing the need for software routines.
Support for unaligned accesses is limited to a sub-set of load/store
instructions:
LDRB/LDRSB/STRB
LDRH/LDRSH/STRH
LDR/STR
<a href="http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html">http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html</a>

This bug was found by an ldc developer, more discussion in this github thread:

<a href="https://github.com/ldc-developers/ldc/issues/2058#issuecomment-296656483">https://github.com/ldc-developers/ldc/issues/2058#issuecomment-296656483</a></pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>