<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Wrong code generated with -fslp-vectorize"
href="https://bugs.llvm.org/show_bug.cgi?id=50323">50323</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Wrong code generated with -fslp-vectorize
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>normal
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Backend: X86
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>kazu@google.com
</td>
</tr>
<tr>
<th>CC</th>
<td>craig.topper@gmail.com, llvm-bugs@lists.llvm.org, llvm-dev@redking.me.uk, pengfei.wang@intel.com, spatel+llvm@rotateright.com
</td>
</tr></table>
<p>
<div>
<pre>I'm seeing a miscompilation triggered by -fslp-vectorize.
Consider:
#include <stdint.h>
#include <immintrin.h>
#include <stdio.h>
struct Pair {
__m256i lo;
__m256i hi;
};
static inline int64_t Extract(const Pair& v, int index) {
return index < 4 ? v.lo[index] : v.hi[index - 4];
}
// This function gets miscompiled. This is a lot like
_mm512_permutexvar_epi64.
// It takes a pair of __m256i along with an array of indexes and returns a
// permutation in __m256i.
__attribute__((noinline)) __m256i Permute(Pair a, __m256i map) {
int64_t result[] = {0, 0, 0, 0};
_mm256_storeu_si256(reinterpret_cast<__m256i *>(result), a.lo);
result[0] = Extract(a, map[0] & 0x7);
result[1] = Extract(a, map[1] & 0x7);
result[2] = Extract(a, map[2] & 0x7);
result[3] = Extract(a, map[3] & 0x7);
return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(result));
}
int main() {
Pair v;
v.lo = _mm256_set_epi64x(0xa3, 0xa2, 0xa1, 0xa0);
v.hi = _mm256_set_epi64x(0xa7, 0xa6, 0xa5, 0xa4);
__m256i r = Permute(v, _mm256_set_epi64x(2, 3, 4, 5));
printf("%02x %02x %02x %02x\n",
(int)_mm256_extract_epi64(r, 3),
(int)_mm256_extract_epi64(r, 2),
(int)_mm256_extract_epi64(r, 1),
(int)_mm256_extract_epi64(r, 0));
return 0;
}
With the latest clang (a0ca4c46ca35957a38a6023fa84afda2fc9ba0ec), I see:
$ ./release/bin/clang++ -O3 -mavx -fno-slp-vectorize permute.cc ; ./a.out
a2 a3 a4 a5
$ ./release/bin/clang++ -O3 -mavx -fslp-vectorize permute.cc ; ./a.out
a0 a3 a4 a5
Notice that the top lane is different -- 0xa0 v.s. 0xa2.
Here is the assembly output for Permute:
.text
.file "permute.cc"
.globl _Z7Permute4PairDv4_x # -- Begin function
_Z7Permute4PairDv4_x
.p2align 4, 0x90
.type _Z7Permute4PairDv4_x,@function
_Z7Permute4PairDv4_x: # @_Z7Permute4PairDv4_x
.cfi_startproc
# %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
andq $-32, %rsp
subq $96, %rsp
vextractf128 $1, %ymm0, %xmm1
vpextrd $2, %xmm1, %eax
movl %eax, %edx
andl $7, %edx
vmovaps 16(%rbp), %ymm2
vmovaps 48(%rbp), %ymm3
vmovaps %ymm2, (%rsp)
vmovaps %ymm3, 32(%rsp)
vmovd %xmm1, %ecx
subl $4, %edx
jae .LBB0_1
# %bb.2:
andl $3, %eax
movq (%rsp,%rax,8), %rax
jmp .LBB0_3
.LBB0_1:
andl $3, %edx
movq 32(%rsp,%rdx,8), %rax
.LBB0_3:
movl %ecx, %esi
andl $7, %esi
vpextrd $2, %xmm0, %edx
subl $4, %esi
jae .LBB0_4
# %bb.5:
andl $3, %ecx
movq (%rsp,%rcx,8), %rcx
jmp .LBB0_6
.LBB0_4:
andl $3, %esi
movq 32(%rsp,%rsi,8), %rcx
.LBB0_6:
movl %edx, %edi
andl $7, %edi
vmovd %xmm0, %esi
subl $4, %edi
jae .LBB0_7
# %bb.8:
andl $3, %edx
movq (%rsp,%rdx,8), %rdx
movl %esi, %edi
andl $7, %edi
subl $4, %edi
jb .LBB0_11
.LBB0_10:
andl $3, %edi
movq 32(%rsp,%rdi,8), %rsi
jmp .LBB0_12
.LBB0_7:
andl $3, %edi
movq 32(%rsp,%rdi,8), %rdx
movl %esi, %edi
andl $7, %edi
subl $4, %edi
jae .LBB0_10
.LBB0_11:
andl $3, %esi
movq (%rsp,%rsi,8), %rsi
.LBB0_12:
vmovq %rax, %xmm0
vmovq %rcx, %xmm1
vpunpcklqdq %xmm0, %xmm1, %xmm0 # xmm0 = xmm1[0],xmm0[0]
vmovq %rdx, %xmm1
vmovq %rsi, %xmm2
vpunpcklqdq %xmm1, %xmm2, %xmm1 # xmm1 = xmm2[0],xmm1[0]
vinsertf128 $1, %xmm0, %ymm1, %ymm0
movq %rbp, %rsp
popq %rbp
.cfi_def_cfa %rsp, 8
retq</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>