<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Wrong code generated with -fslp-vectorize"
href="https://bugs.llvm.org/show_bug.cgi?id=50356">50356</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Wrong code generated with -fslp-vectorize
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>normal
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Scalar Optimizations
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>kazu@google.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>I'm seeing a miscompilation triggered by -fslp-vectorize. This is
similar and may be related to PR50323 and PR50338.
Consider:
#include <immintrin.h>
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
struct Int64x8 {
__m256i ymm_lo;
__m256i ymm_hi;
};
static inline int64_t Extract(Int64x8 a, int index) {
return (index < 4) ? a.ymm_lo[index] : a.ymm_hi[index - 4];
}
// This function appears to be miscompiled with -flp-vectorize.
__attribute__((noinline)) __m256i Permute(Int64x8 a, __m256i b, __m256i c) {
int64_t d[4];
d[0] = Extract(a, b[0] & 0x7);
d[1] = Extract(a, b[1] & 0x7);
d[2] = Extract(a, b[2] & 0x7);
d[3] = Extract(a, b[3] & 0x7);
__m256i e = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(d));
int64_t f[4];
f[0] = e[0] < c[0] ? -1 : 0;
f[1] = e[1] < c[1] ? -1 : 0;
f[2] = e[2] < c[2] ? -1 : 0;
f[3] = e[3] < c[3] ? -1 : 0;
return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(f));
}
int main() {
Int64x8 a;
a.ymm_lo = _mm256_set_epi64x(0xa3, 0xa2, 0xa1, 0xa0);
a.ymm_hi = _mm256_set_epi64x(0xa7, 0xa6, 0xa5, 0xa4);
__m256i m = Permute(a,
_mm256_set_epi64x(0, 0, 1, 0),
_mm256_set_epi64x(0, 0, 0, 0xa1));
printf("%016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n",
(int64_t)m[3], (int64_t)m[2], (int64_t)m[1], (int64_t)m[0]);
return 0;
}
With the latest clang (4b91f96a3e291db1ea6360c9a842ecbc6ee89d67), I see:
$ ./release/bin/clang++ -O3 -mavx -fno-slp-vectorize permute3.cc ; ./a.out
0000000000000000 0000000000000000 0000000000000000 ffffffffffffffff
$ ./release/bin/clang++ -O3 -mavx -fslp-vectorize permute3.cc ; ./a.out
0000000000000000 0000000000000000 0000000000000000 0000000000000000
Notice that the bottom lane is different in the output.
Here is the assembly output for Permute:
.text
.file "permute3.cc"
.section .rodata.cst16,"aM",@progbits,16
.p2align 4 # -- Begin function
_Z7Permute7Int64x8Dv4_xS0_
.LCPI0_0:
.long 7 # 0x7
.long 7 # 0x7
.long 7 # 0x7
.long 7 # 0x7
.LCPI0_1:
.long 4 # 0x4
.long 4 # 0x4
.long 4 # 0x4
.long 4 # 0x4
.section .rodata.cst32,"aM",@progbits,32
.p2align 5
.LCPI0_2:
.quad 2 # 0x2
.quad 2 # 0x2
.quad 2 # 0x2
.quad 2 # 0x2
.text
.globl _Z7Permute7Int64x8Dv4_xS0_
.p2align 4, 0x90
.type _Z7Permute7Int64x8Dv4_xS0_,@function
_Z7Permute7Int64x8Dv4_xS0_: # @_Z7Permute7Int64x8Dv4_xS0_
.cfi_startproc
# %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
andq $-32, %rsp
subq $96, %rsp
vmovaps 16(%rbp), %ymm3
vmovaps 48(%rbp), %ymm4
vextractf128 $1, %ymm0, %xmm2
vshufps $136, %xmm2, %xmm0, %xmm2 # xmm2 = xmm0[0,2],xmm2[0,2]
vandps .LCPI0_0(%rip), %xmm2, %xmm2
vmovd %xmm0, %eax
addl $-4, %eax
vmovaps %ymm4, (%rsp)
andl $3, %eax
vpextrd $2, %xmm0, %edx
addl $-4, %edx
andl $3, %edx
vextractps $2, %xmm2, %ecx
vmovaps %ymm3, 32(%rsp)
movl %ecx, %r8d
andl $3, %r8d
addl $-4, %ecx
andl $3, %ecx
vmovdqa .LCPI0_1(%rip), %xmm0 # xmm0 = [4,4,4,4]
vpcmpgtd %xmm2, %xmm0, %xmm3
vpshufd $238, %xmm3, %xmm0 # xmm0 = xmm3[2,3,2,3]
vpmovsxdq %xmm0, %xmm0
vpmovsxdq %xmm3, %xmm3
vpextrd $3, %xmm2, %edi
movl %edi, %esi
andl $3, %esi
addl $-4, %edi
vpaddq %xmm2, %xmm2, %xmm2
vmovapd 16(%rbp), %xmm4
vmovapd 32(%rbp), %xmm5
vpermilpd %xmm2, %xmm5, %xmm5
vpermilpd %xmm2, %xmm4, %xmm4
vpcmpgtq .LCPI0_2(%rip), %xmm2, %xmm2
andl $3, %edi
vblendvpd %xmm2, %xmm5, %xmm4, %xmm2
vmovsd (%rsp,%rdx,8), %xmm4 # xmm4 = mem[0],zero
vmovsd (%rsp,%rax,8), %xmm5 # xmm5 = mem[0],zero
vunpcklpd %xmm4, %xmm5, %xmm4 # xmm4 = xmm5[0],xmm4[0]
vblendvpd %xmm3, %xmm2, %xmm4, %xmm2
vmovsd 32(%rsp,%rsi,8), %xmm3 # xmm3 = mem[0],zero
vmovsd 32(%rsp,%r8,8), %xmm4 # xmm4 = mem[0],zero
vunpcklpd %xmm3, %xmm4, %xmm3 # xmm3 = xmm4[0],xmm3[0]
vmovsd (%rsp,%rdi,8), %xmm4 # xmm4 = mem[0],zero
vmovsd (%rsp,%rcx,8), %xmm5 # xmm5 = mem[0],zero
vunpcklpd %xmm4, %xmm5, %xmm4 # xmm4 = xmm5[0],xmm4[0]
vblendvpd %xmm0, %xmm3, %xmm4, %xmm0
vpcmpgtq %xmm2, %xmm1, %xmm2
vextractf128 $1, %ymm1, %xmm1
vpcmpgtq %xmm0, %xmm1, %xmm0
vinsertf128 $1, %xmm0, %ymm2, %ymm0
movq %rbp, %rsp
popq %rbp
.cfi_def_cfa %rsp, 8
retq</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>