[llvm-bugs] [Bug 43815] New: Load folding defeated by VPCMP to VTESTNM optimization
via llvm-bugs
llvm-bugs at lists.llvm.org
Sat Oct 26 03:51:18 PDT 2019
https://bugs.llvm.org/show_bug.cgi?id=43815
Bug ID: 43815
Summary: Load folding defeated by VPCMP to VTESTNM optimization
Product: libraries
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: dave at znu.io
CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
llvm-dev at redking.me.uk, spatel+llvm at rotateright.com
While trying to create an AVX512BW version of strlen() using the X86 clang
intrinsics, I noticed that load folding was being defeated by a VPCMP to
VTESTNM optimization. For example:
unsigned long avx512_strlen(const char *_ptr) {
__m512i zero = { 0 };
//asm("" : "+v" (zero));
const char *ptr = reinterpret_cast<const char
*>(reinterpret_cast<uintptr_t>(_ptr) & ~63ul);
uint64_t mask = ~0ul << (ptr - _ptr);
auto result = _mm512_cmp_epi8_mask(zero, *reinterpret_cast<const
__m512i*>(ptr), 0);
result &= mask;
if (!result) for (;;) {
ptr += 64;
result = _mm512_cmp_epi8_mask(zero, *reinterpret_cast<const
__m512i*>(ptr), 0);
if (result)
break;
}
ptr += __builtin_ctzll(result);
return ptr - _ptr;
}
Generates:
_ZL13avx512_strlenPKc: # @_ZL13avx512_strlenPKc
.cfi_startproc
# %bb.0:
movq %rdi, %rcx
andq $-64, %rcx
movl %ecx, %eax
subl %edi, %eax
vmovdqa64 (%rcx), %zmm0
vptestnmb %zmm0, %zmm0, %k0
kmovq %k0, %rdx
shrxq %rax, %rdx, %rdx
shlxq %rax, %rdx, %rax
testq %rax, %rax
je .LBB0_1
# %bb.3:
tzcntq %rax, %rax
addq %rcx, %rax
subq %rdi, %rax
vzeroupper
retq
.p2align 4, 0x90
.LBB0_1: # =>This Inner Loop Header: Depth=1
vmovdqa64 64(%rcx), %zmm0
addq $64, %rcx
vptestnmb %zmm0, %zmm0, %k0
kortestq %k0, %k0
je .LBB0_1
# %bb.2:
kmovq %k0, %rax
tzcntq %rax, %rax
addq %rcx, %rax
subq %rdi, %rax
vzeroupper
retq
But with the inline asm() uncommented, the desired code is emitted:
_ZL13avx512_strlenPKc: # @_ZL13avx512_strlenPKc
.cfi_startproc
# %bb.0:
vxorps %xmm0, %xmm0, %xmm0
#APP
#NO_APP
movq %rdi, %rcx
andq $-64, %rcx
movl %ecx, %eax
subl %edi, %eax
vpcmpeqb (%rcx), %zmm0, %k0
kmovq %k0, %rdx
shrxq %rax, %rdx, %rdx
shlxq %rax, %rdx, %rax
testq %rax, %rax
je .LBB0_1
# %bb.3:
tzcntq %rax, %rax
addq %rcx, %rax
subq %rdi, %rax
vzeroupper
retq
.p2align 4, 0x90
.LBB0_1: # =>This Inner Loop Header: Depth=1
vpcmpeqb 64(%rcx), %zmm0, %k0
addq $64, %rcx
kortestq %k0, %k0
je .LBB0_1
# %bb.2:
kmovq %k0, %rax
tzcntq %rax, %rax
addq %rcx, %rax
subq %rdi, %rax
vzeroupper
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20191026/aa76441b/attachment.html>
More information about the llvm-bugs
mailing list