<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - potentially missing peephole for comparing mask vector against all zeroes"
   href="https://bugs.llvm.org/show_bug.cgi?id=51259">51259</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>potentially missing peephole for comparing mask vector against all zeroes
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: X86
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>richard-llvm@metafoo.co.uk
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>craig.topper@gmail.com, llvm-bugs@lists.llvm.org, llvm-dev@redking.me.uk, pengfei.wang@intel.com, spatel+llvm@rotateright.com
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Live demo: <a href="https://godbolt.org/z/a6YaahdPP">https://godbolt.org/z/a6YaahdPP</a>

Example:

[[gnu::weak]] void do_this() {}
[[gnu::weak]] void do_that() {}

void f1(unsigned char const p[8]) {
  if (p[0] != 0x00 & p[1] != 0x00 & p[2] != 0x00 & p[3] != 0x00 & p[4] != 0x00
&
      p[5] != 0x00 & p[6] != 0x00 & p[7] != 0x00) {
    do_this();
  } else {
    do_that();
  }
}

void f2(unsigned const char *p) {
  using T [[gnu::vector_size(8), gnu::aligned(1)]] = unsigned char;
  T same = *(T *)p == (T){0, 0, 0, 0, 0, 0, 0, 0};
  if ((unsigned long)same == 0) {
    do_this();
  } else {
    do_that();
  }
}

This results in the following:

f1(unsigned char const*):                               # @f1(unsigned char
const*)
        vmovq   xmm0, qword ptr [rdi]           # xmm0 = mem[0],zero
        vpxor   xmm1, xmm1, xmm1
        vpcmpeqb        xmm0, xmm0, xmm1
        vpmovmskb       eax, xmm0
        not     eax
        cmp     al, -1
        jne     ...

f2(unsigned char const*):                               # @f2(unsigned char
const*)
        vmovq   xmm0, qword ptr [rdi]           # xmm0 = mem[0],zero
        vpxor   xmm1, xmm1, xmm1
        vpcmpeqb        xmm0, xmm0, xmm1
        vmovq   rax, xmm0
        test    rax, rax
        je      ...

I think these should produce the same assembly, and the result from f2 looks
better to me (though both are the same size). Presumably we'd need to recognize
that after vpcmpeqb, each lane in xmm0 is either all-zeros or all-ones, so the
vpmovmskb is redundant.</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>