<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Missing optimization opportunity: non-power-of-two integer loading with fewer movs"
   href="https://bugs.llvm.org/show_bug.cgi?id=41216">41216</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Missing optimization opportunity: non-power-of-two integer loading with fewer movs
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>clang
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Windows NT
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Keywords</th>
          <td>code-quality
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>LLVM Codegen
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedclangbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>nok.raven@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org, neeilans@live.com, richard-llvm@metafoo.co.uk
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Currently non-power-of-two integers loads are done byte-per-byte:

#include <cstdint>

// could be two loads instead of three: (u16 << 8) | u8
std::uint32_t foo_24(unsigned char const* p)
{
    return static_cast<std::uint32_t>(p[0])
        | (static_cast<std::uint32_t>(p[1]) << 8)
        | (static_cast<std::uint32_t>(p[2]) << 16)
        ;
}

// could be two loads instead of five: (u32 << 8) | u8
std::uint64_t foo_40(unsigned char const* p)
{
    return static_cast<std::uint64_t>(p[0])
        | (static_cast<std::uint64_t>(p[1]) << 8)
        | (static_cast<std::uint64_t>(p[2]) << 16)
        | (static_cast<std::uint64_t>(p[3]) << 24)
        | (static_cast<std::uint64_t>(p[4]) << 32)
        ;
}

// could be two loads instead of six: (u32 << 16) | u16
std::uint64_t foo_48(unsigned char const* p)
{
    return static_cast<std::uint64_t>(p[0])
        | (static_cast<std::uint64_t>(p[1]) << 8)
        | (static_cast<std::uint64_t>(p[2]) << 16)
        | (static_cast<std::uint64_t>(p[3]) << 24)
        | (static_cast<std::uint64_t>(p[4]) << 32)
        | (static_cast<std::uint64_t>(p[5]) << 40)
        ;
}

// could be three loads instead of seven: (u32 << 24) | (u16 << 8) | u8
std::uint64_t foo_56(unsigned char const* p)
{
    return static_cast<std::uint64_t>(p[0])
        | (static_cast<std::uint64_t>(p[1]) << 8)
        | (static_cast<std::uint64_t>(p[2]) << 16)
        | (static_cast<std::uint64_t>(p[3]) << 24)
        | (static_cast<std::uint64_t>(p[4]) << 32)
        | (static_cast<std::uint64_t>(p[5]) << 40)
        | (static_cast<std::uint64_t>(p[6]) << 48)
        ;
}

<a href="https://godbolt.org/z/Re7dWL">https://godbolt.org/z/Re7dWL</a>

GCC produces better code (however currently it optimizes only 32bit loads
<a href="https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89809">https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89809</a>)</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>