<html>

    <head>

      <base href="https://llvm.org/bugs/" />

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - <12 x i8> shuffle is lowered to a sequence of extracts and inserts."

   href="https://llvm.org/bugs/show_bug.cgi?id=29025">29025</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td><12 x i8> shuffle is lowered to a sequence of extracts and inserts.

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>normal

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Backend: X86

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>mkuper@google.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr>

        <tr>

          <th>Classification</th>

          <td>Unclassified

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Consider three equivalent IR functions:

define void @bad(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <12 x i8> *%p) {

  %s1 = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2,

i32 3, i32 4, i32 5, i32 6, i32 7>

  %s2 = shufflevector <4 x i8> %c, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32

2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>

  %r = shufflevector <8 x i8> %s1, <8 x i8> %s2, <12 x i32> <i32 0, i32 4, i32

8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>

  store <12 x i8> %r, <12 x i8>* %p, align 1

  ret void

}

define void @better(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <12 x i8> *%p) {

  %s1 = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2,

i32 3, i32 4, i32 5, i32 6, i32 7>

  %s2 = shufflevector <4 x i8> %c, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32

2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>

  br label %foo

foo:    

  %r = shufflevector <8 x i8> %s1, <8 x i8> %s2, <12 x i32> <i32 0, i32 4, i32

8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>  

  store <12 x i8> %r, <12 x i8>* %p, align 1

  ret void

}

define void @best(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <12 x i8> *%p) {

  %s1 = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2,

i32 3, i32 4, i32 5, i32 6, i32 7>

  %s2 = shufflevector <4 x i8> %c, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32

2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>

  %r = shufflevector <8 x i8> %s1, <8 x i8> %s2, <16 x i32> <i32 0, i32 4, i32

8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11, i32 undef,

i32 undef, i32 undef, i32 undef>  

  br label %foo

foo:    

  %r2 = shufflevector <16 x i8> %r, <16 x i8> undef, <12 x i32> <i32 0, i32 1,

i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>  

  store <12 x i8> %r2, <12 x i8>* %p, align 1

  ret void

}

When compiling for AVX, for 'bad', we get:

    vpextrb    $0, %xmm1, %eax

    vpinsrb    $1, %eax, %xmm0, %xmm3

    vpextrb    $0, %xmm2, %eax

    vpinsrb    $2, %eax, %xmm3, %xmm3

    vpextrb    $4, %xmm0, %eax

    vpinsrb    $3, %eax, %xmm3, %xmm3

    vpextrb    $4, %xmm1, %eax

    vpinsrb    $4, %eax, %xmm3, %xmm3

    vpextrb    $4, %xmm2, %eax

    vpinsrb    $5, %eax, %xmm3, %xmm3

    vpextrb    $8, %xmm0, %eax

    vpinsrb    $6, %eax, %xmm3, %xmm3

    vpextrb    $8, %xmm1, %eax

    vpinsrb    $7, %eax, %xmm3, %xmm3

    vpextrb    $8, %xmm2, %eax

    vpinsrb    $8, %eax, %xmm3, %xmm3

    vpextrb    $12, %xmm0, %eax

    vpinsrb    $9, %eax, %xmm3, %xmm0

    vpextrb    $12, %xmm1, %eax

    vpinsrb    $10, %eax, %xmm0, %xmm0

    vpextrb    $12, %xmm2, %eax

    vpinsrb    $11, %eax, %xmm0, %xmm0

    vpextrd    $2, %xmm0, 8(%rdi)

    vmovq    %xmm0, (%rdi)

    retq

For 'better':

    vmovdqa    .LCPI1_0(%rip), %xmm3

    vpshufb    %xmm3, %xmm1, %xmm1

    vpshufb    %xmm3, %xmm0, %xmm0

    vpunpcklqdq    %xmm1, %xmm0, %xmm0

    vpshufb    %xmm3, %xmm2, %xmm1

    vpshufb    .LCPI1_1(%rip), %xmm1, %xmm1

    vpshufb    .LCPI1_2(%rip), %xmm0, %xmm0

    vpor    %xmm1, %xmm0, %xmm0

    vpextrd    $2, %xmm0, 8(%rdi)

    vmovq    %xmm0, (%rdi)

    retq

And for 'best':

    vmovdqa    .LCPI2_0(%rip), %xmm3

    vpshufb    %xmm3, %xmm1, %xmm1

    vpshufb    %xmm3, %xmm0, %xmm0

    vpunpckldq    %xmm1, %xmm0, %xmm0

    vpshufb    %xmm3, %xmm2, %xmm1

    vpunpcklbw    %xmm1, %xmm0, %xmm0

    vpshufb    .LCPI2_1(%rip), %xmm0, %xmm0

    vpextrd    $2, %xmm0, 8(%rdi)

    vmovq    %xmm0, (%rdi)

The first problem is that when constructing a <12 x i8> out of two <8 x i8>-s,

isel produces a series of extracts + build_vector instead of a vector_shuffle,

because the mask length is not a multiple of the input length. This happens for

both 'bad' and 'better'. 'best' avoids this problem, because it never sees a

v12i8 being constructed directly, only extracted from a v16i8. 

The second problem is that for 'bad' we don't manage to clean this up. In

'better', we are saved by a dagcombine, but for 'bad', no such luck. This has

to do with the way we end up legalizing the sources of the build_vector. In

'better' we extract from two v8i16 vectors, and in 'bad' from three v4i32

vectors and an undef.</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>