<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - [X86][SSE] Improve equality memcmp support"

   href="https://bugs.llvm.org/show_bug.cgi?id=33325">33325</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>[X86][SSE] Improve equality memcmp support

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Windows NT

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Backend: X86

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>llvm-dev@redking.me.uk

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>filcab@gmail.com, llvm-bugs@lists.llvm.org, spatel+llvm@rotateright.com

          </td>

        </tr></table>

      <p>

        <div>

        <pre>int cmpeq16(const char *a, const char *b) {

        return 0 == __builtin_memcmp(a, b, 16);

}

int cmpeq32(const char *a, const char *b) {

        return 0 == __builtin_memcmp(a, b, 32);

}

On SSE2-AVX1 targets, equality memcmp of 16 bytes lowers using the SIMD unit

but remains scalarized for 32 byte compares:

cmpeq16(char const*, char const*):                       # @cmpeq16(char

const*, char const*)

        vmovdqu (%rdi), %xmm0

        xorl    %eax, %eax

        vpcmpeqb        (%rsi), %xmm0, %xmm0

        vpmovmskb       %xmm0, %ecx

        cmpl    $65535, %ecx            # imm = 0xFFFF

        sete    %al

        retq

cmpeq32(char const*, char const*):                       # @cmpeq32(char

const*, char const*)

        movq    16(%rdi), %rax

        movq    (%rdi), %rcx

        movq    8(%rdi), %rdx

        movq    24(%rdi), %rdi

        xorq    24(%rsi), %rdi

        xorq    8(%rsi), %rdx

        xorq    16(%rsi), %rax

        xorq    (%rsi), %rcx

        orq     %rax, %rcx

        orq     %rdi, %rdx

        xorl    %eax, %eax

        orq     %rdx, %rcx

        sete    %al

        retq

cmpeq32 is even worse on 32-bit targets....

Ideally it'd be something like:

cmpeq32(char const*, char const*):

        vmovdqu (%rdi), %xmm0

        vmovdqu 16(%rdi), %xmm1

        xorl    %eax, %eax

        vpcmpeqb        (%rsi), %xmm0, %xmm0

        vpcmpeqb        16(%rsi), %xmm1, %xmm1

        vpand        %xmm1, %xmm0, %xmm0

        vpmovmskb       %xmm0, %ecx

        cmpl    $65535, %ecx            # imm = 0xFFFF

        sete    %al

        retq

I'm not sure what the upper limit should be but 32-bytes on SSE2-AVX1 and

64-bytes on AVX2 should definitely be fine (no idea what the best solution is

on AVX512).</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>