[llvm-bugs] [Bug 50323] New: Wrong code generated with -fslp-vectorize
via llvm-bugs
llvm-bugs at lists.llvm.org
Wed May 12 21:14:52 PDT 2021
https://bugs.llvm.org/show_bug.cgi?id=50323
Bug ID: 50323
Summary: Wrong code generated with -fslp-vectorize
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: normal
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: kazu at google.com
CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
llvm-dev at redking.me.uk, pengfei.wang at intel.com,
spatel+llvm at rotateright.com
I'm seeing a miscompilation triggered by -fslp-vectorize.
Consider:
#include <stdint.h>
#include <immintrin.h>
#include <stdio.h>
struct Pair {
__m256i lo;
__m256i hi;
};
static inline int64_t Extract(const Pair& v, int index) {
return index < 4 ? v.lo[index] : v.hi[index - 4];
}
// This function gets miscompiled. This is a lot like
_mm512_permutexvar_epi64.
// It takes a pair of __m256i along with an array of indexes and returns a
// permutation in __m256i.
__attribute__((noinline)) __m256i Permute(Pair a, __m256i map) {
int64_t result[] = {0, 0, 0, 0};
_mm256_storeu_si256(reinterpret_cast<__m256i *>(result), a.lo);
result[0] = Extract(a, map[0] & 0x7);
result[1] = Extract(a, map[1] & 0x7);
result[2] = Extract(a, map[2] & 0x7);
result[3] = Extract(a, map[3] & 0x7);
return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(result));
}
int main() {
Pair v;
v.lo = _mm256_set_epi64x(0xa3, 0xa2, 0xa1, 0xa0);
v.hi = _mm256_set_epi64x(0xa7, 0xa6, 0xa5, 0xa4);
__m256i r = Permute(v, _mm256_set_epi64x(2, 3, 4, 5));
printf("%02x %02x %02x %02x\n",
(int)_mm256_extract_epi64(r, 3),
(int)_mm256_extract_epi64(r, 2),
(int)_mm256_extract_epi64(r, 1),
(int)_mm256_extract_epi64(r, 0));
return 0;
}
With the latest clang (a0ca4c46ca35957a38a6023fa84afda2fc9ba0ec), I see:
$ ./release/bin/clang++ -O3 -mavx -fno-slp-vectorize permute.cc ; ./a.out
a2 a3 a4 a5
$ ./release/bin/clang++ -O3 -mavx -fslp-vectorize permute.cc ; ./a.out
a0 a3 a4 a5
Notice that the top lane is different -- 0xa0 v.s. 0xa2.
Here is the assembly output for Permute:
.text
.file "permute.cc"
.globl _Z7Permute4PairDv4_x # -- Begin function
_Z7Permute4PairDv4_x
.p2align 4, 0x90
.type _Z7Permute4PairDv4_x, at function
_Z7Permute4PairDv4_x: # @_Z7Permute4PairDv4_x
.cfi_startproc
# %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
andq $-32, %rsp
subq $96, %rsp
vextractf128 $1, %ymm0, %xmm1
vpextrd $2, %xmm1, %eax
movl %eax, %edx
andl $7, %edx
vmovaps 16(%rbp), %ymm2
vmovaps 48(%rbp), %ymm3
vmovaps %ymm2, (%rsp)
vmovaps %ymm3, 32(%rsp)
vmovd %xmm1, %ecx
subl $4, %edx
jae .LBB0_1
# %bb.2:
andl $3, %eax
movq (%rsp,%rax,8), %rax
jmp .LBB0_3
.LBB0_1:
andl $3, %edx
movq 32(%rsp,%rdx,8), %rax
.LBB0_3:
movl %ecx, %esi
andl $7, %esi
vpextrd $2, %xmm0, %edx
subl $4, %esi
jae .LBB0_4
# %bb.5:
andl $3, %ecx
movq (%rsp,%rcx,8), %rcx
jmp .LBB0_6
.LBB0_4:
andl $3, %esi
movq 32(%rsp,%rsi,8), %rcx
.LBB0_6:
movl %edx, %edi
andl $7, %edi
vmovd %xmm0, %esi
subl $4, %edi
jae .LBB0_7
# %bb.8:
andl $3, %edx
movq (%rsp,%rdx,8), %rdx
movl %esi, %edi
andl $7, %edi
subl $4, %edi
jb .LBB0_11
.LBB0_10:
andl $3, %edi
movq 32(%rsp,%rdi,8), %rsi
jmp .LBB0_12
.LBB0_7:
andl $3, %edi
movq 32(%rsp,%rdi,8), %rdx
movl %esi, %edi
andl $7, %edi
subl $4, %edi
jae .LBB0_10
.LBB0_11:
andl $3, %esi
movq (%rsp,%rsi,8), %rsi
.LBB0_12:
vmovq %rax, %xmm0
vmovq %rcx, %xmm1
vpunpcklqdq %xmm0, %xmm1, %xmm0 # xmm0 = xmm1[0],xmm0[0]
vmovq %rdx, %xmm1
vmovq %rsi, %xmm2
vpunpcklqdq %xmm1, %xmm2, %xmm1 # xmm1 = xmm2[0],xmm1[0]
vinsertf128 $1, %xmm0, %ymm1, %ymm0
movq %rbp, %rsp
popq %rbp
.cfi_def_cfa %rsp, 8
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20210513/0010089e/attachment.html>
More information about the llvm-bugs
mailing list