<html>
    <head>
      <base href="https://llvm.org/bugs/" />
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW --- - Regression in x86 codegen using _mm256 intrinsic"
   href="https://llvm.org/bugs/show_bug.cgi?id=28136">28136</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Regression in x86 codegen using _mm256 intrinsic
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>clang
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>3.7
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>LLVM Codegen
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedclangbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>deadalnix@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr>

        <tr>
          <th>Classification</th>
          <td>Unclassified
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Sample code:

#include <x86intrin.h>

__m256i foo(__m256i a, __m256i b)
{
  a = _mm256_unpacklo_epi8(a, b);
  return _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0));
}

Assembly output on 3.6 :

foo(long long __vector(4), long long __vector(4)):                          #
@foo(long long __vector(4), long long __vector(4))
        vpunpcklbw      ymm0, ymm0, ymm1 # ymm0 =
ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
        vpermq  ymm0, ymm0, -40         # ymm0 = ymm0[0,2,1,3]
        ret

Assembly output on 3.7 onward :

.LCPI0_0:
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .byte   0                       # 0x0
        .zero   1
        .byte   1                       # 0x1
        .zero   1
        .byte   2                       # 0x2
        .zero   1
        .byte   3                       # 0x3
        .zero   1
        .byte   4                       # 0x4
        .zero   1
        .byte   5                       # 0x5
        .zero   1
        .byte   6                       # 0x6
        .zero   1
        .byte   7                       # 0x7
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
.LCPI0_1:
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .byte   0                       # 0x0
        .zero   1
        .byte   1                       # 0x1
        .zero   1
        .byte   2                       # 0x2
        .zero   1
        .byte   3                       # 0x3
        .zero   1
        .byte   4                       # 0x4
        .zero   1
        .byte   5                       # 0x5
        .zero   1
        .byte   6                       # 0x6
        .zero   1
        .byte   7                       # 0x7
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
        .zero   1
.LCPI0_2:
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
        .byte   255                     # 0xff
        .byte   0                       # 0x0
foo(long long __vector(4), long long __vector(4)):                          #
@foo(long long __vector(4), long long __vector(4))
        vpunpcklbw      ymm2, ymm0, ymm0 # ymm2 =
ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
        vperm2i128      ymm0, ymm0, ymm0, 35 # ymm0 = ymm0[2,3,0,1]
        vpshufb ymm0, ymm0, ymmword ptr [rip + .LCPI0_0] # ymm0 =
ymm0[u,u,u,u,u,u,u,u,0,u,1,u,2,u,3,u,20,u,21,u,22,u,23,u,u,u,u,u,u,u,u,u]
        vpblendd        ymm0, ymm2, ymm0, 60 # ymm0 =
ymm2[0,1],ymm0[2,3,4,5],ymm2[6,7]
        vpunpcklbw      ymm2, ymm0, ymm1 # ymm2 =
ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
        vperm2i128      ymm1, ymm0, ymm1, 35 # ymm1 = ymm1[2,3,0,1]
        vpshufb ymm1, ymm1, ymmword ptr [rip + .LCPI0_1] # ymm1 =
ymm1[u,u,u,u,u,u,u,u,u,0,u,1,u,2,u,3,u,20,u,21,u,22,u,23,u,u,u,u,u,u,u,u]
        vpblendd        ymm1, ymm2, ymm1, 60 # ymm1 =
ymm2[0,1],ymm1[2,3,4,5],ymm2[6,7]
        vmovdqa ymm2, ymmword ptr [rip + .LCPI0_2] # ymm2 =
[255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
        vpblendvb       ymm0, ymm1, ymm0, ymm2
        ret</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>