<html>

    <head>

      <base href="https://llvm.org/bugs/" />

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - Loads from array into small struct are not fused."

   href="https://llvm.org/bugs/show_bug.cgi?id=28886">28886</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Loads from array into small struct are not fused.

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>new-bugs

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>3.8

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>normal

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>new bugs

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>edy.burt@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr>

        <tr>

          <th>Classification</th>

          <td>Unclassified

          </td>

        </tr></table>

      <p>

        <div>

        <pre>From the included testcase, only the fourth function is optimal - a single

unaligned load of i16, "movzx eax, word ptr [rdi]".

The other functions load the two bytes separately and pack them with shl and

or.

I believe it has to do with the fourth function using llvm.memcpy, which

somehow takes precedence over the individual loads and everything is kept as

one i16. 

On IRC "-mllvm -combine-loads" was suggested, which does turn the first three

functions into the same single i16 load as the fourth, but I've also been told

that it causes regressions in other situations so it's not enabled by default.

---

#include <array>

using namespace std;

typedef unsigned char i8;

struct XY { i8 x, y; };

XY direct(array<i8, 2> &a) {

  return {a[0], a[1]};

}

XY vars(array<i8, 2> &a) {

  i8 x = a[0], y = a[1];

  return {x, y};

}

XY array(array<i8, 2> &a) {

  array<i8, 2> b;

  b[0] = a[0];

  b[1] = a[1];

  return {b[0], b[1]};

}

XY array_llvm_memcpy(array<i8, 2> &a) {

  array<i8, 2> b;

  b = a;

  return {b[0], b[1]};

}</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>