<html>
    <head>
      <base href="http://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - byte loads aren't fused into unaligned 16-bit load on ARM/AArch64"
   href="http://bugs.llvm.org/show_bug.cgi?id=32366">32366</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>byte loads aren't fused into unaligned 16-bit load on ARM/AArch64
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: ARM
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>simon.hosie@arm.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>(also affects AArch64, but I can only tag one component)

clang --target=arm-linux-gneabihf -march=armv7-a -O3 -S -xc -o- - << EOF
#include <stdint.h>

uint16_t ld16(uint8_t const* p) {
  uint16_t r;
  __builtin_memcpy(&r, p, sizeof(r));
  return r;
}

uint16_t ld16_bytes(uint8_t const* p) {
  uint16_t r = p[0] | (p[1] << 8);
  return r;
}
EOF

gives:

ld16:
        ldrh    r0, [r0]
        bx      lr

ld16_bytes:
        ldrb    r1, [r0]
        ldrb    r0, [r0, #1]
        orr     r0, r1, r0, lsl #8
        bx      lr

For little endian targets I would expect both these functions to turn out the
same (like they do on x86).  32-bit and 64-bit loads seem to be rewritten
already; it's just 16-bit where they diverge.

Note that -march=armv7-a (or similar) is necessary to make the unaligned load
option available.</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>