<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - Extra bitcasts prevent simplification of bit select"

   href="https://bugs.llvm.org/show_bug.cgi?id=34047">34047</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Extra bitcasts prevent simplification of bit select

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Windows NT

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Common Code Generator Code

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>llvm-dev@redking.me.uk

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org, spatel+llvm@rotateright.com

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Two versions of the same 'select based on comparison' pattern, one using

generic vector types and the other using SSE intrinsics.

The presence of the bitcasts between SSE types prevents the recognition of the

select.

#include <x86intrin.h>

__v16qi cmp_i8_sel_v16i8(__v16qi a, __v16qi b, __v16qi c, __v16qi d) {

  __v16qi cc = _mm_cmpeq_epi8(a, b);

  return (c & ~cc) | (d & cc);

}

__m128i cmp_i8_sel_m128i(__m128i a, __m128i b, __m128i c, __m128i d) {

  __m128i cc = _mm_cmpeq_epi8(a, b);

  return _mm_or_si128(_mm_andnot_si128(cc, c), _mm_and_si128(cc, d));

}

define <16 x i8> @cmp_i8_sel_v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)

{

  %5 = icmp eq <16 x i8> %0, %1

  %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %2

  ret <16 x i8> %6

}

define <2 x i64> @cmp_i8_sel_m128i(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)

{

  %5 = bitcast <2 x i64> %0 to <16 x i8>

  %6 = bitcast <2 x i64> %1 to <16 x i8>

  %7 = icmp eq <16 x i8> %5, %6

  %8 = sext <16 x i1> %7 to <16 x i8>

  %9 = bitcast <16 x i8> %8 to <2 x i64>

  %10 = xor <2 x i64> %9, <i64 -1, i64 -1>

  %11 = and <2 x i64> %10, %2

  %12 = and <2 x i64> %9, %3

  %13 = or <2 x i64> %11, %12

  ret <2 x i64> %13

}

Mind you, on X86 at least it doesn't affect final codegen:

cmp_i8_sel_v16i8:

  vpcmpeqb %xmm1, %xmm0, %xmm0

  vpblendvb %xmm0, %xmm3, %xmm2, %xmm0

  retq

cmp_i8_sel_m128i:

  vpcmpeqb %xmm1, %xmm0, %xmm0

  vpblendvb %xmm0, %xmm3, %xmm2, %xmm0

  retq</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>