[PATCH] D109368: [LV] Vectorize cases with larger number of RT checks, execute only if profitable.
Alexander Kornienko via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 7 09:48:18 PDT 2022
alexfh added a comment.
In D109368#3636261 <https://reviews.llvm.org/D109368#3636261>, @asmok-g wrote:
> Heads-up: I think this patch caused a mis-compile that's causing some test in Tenserflow <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/tests/tensor_array_ops_test.py> to fail. We're still confirming it and working on a reproducer.
I got it down to this sample:
void *memmove(void * destination, const void * source, unsigned long num);
void f(char *s, char *d, int g) {
while (g--) {
memmove(d, s, 4);
d += 4;
}
}
When compiled with `--target=x86_64--linux-gnu -O2`, before and after this commit, the resulting assembly differs in a way that seems wrong to me:
@@ -1,100 +1,100 @@
.text
.file "input.i"
.globl f # -- Begin function f
.p2align 4, 0x90
.type f, at function
f: # @f
.cfi_startproc
# %bb.0:
# kill: def $edx killed $edx def $rdx
testl %edx, %edx
je .LBB0_16
# %bb.1:
leal -1(%rdx), %r8d
- cmpl $7, %r8d
+ cmpl $15, %r8d
jb .LBB0_2
# %bb.3:
leaq 4(%rdi), %rax
cmpq %rsi, %rax
jbe .LBB0_6
# %bb.4:
leaq (%rsi,%r8,4), %rax
addq $4, %rax
cmpq %rdi, %rax
jbe .LBB0_6
.LBB0_2:
movq %rsi, %rax
.LBB0_9:
leal -1(%rdx), %r8d
testb $7, %dl
je .LBB0_13
# %bb.10:
movl %edx, %r9d
andl $7, %r9d
xorl %esi, %esi
.p2align 4, 0x90
.LBB0_11: # =>This Inner Loop Header: Depth=1
movl (%rdi), %ecx
movl %ecx, (%rax)
addq $4, %rax
incq %rsi
cmpl %esi, %r9d
jne .LBB0_11
# %bb.12:
subl %esi, %edx
.LBB0_13:
cmpl $7, %r8d
jb .LBB0_16
# %bb.14:
movl %edx, %ecx
xorl %edx, %edx
.p2align 4, 0x90
.LBB0_15: # =>This Inner Loop Header: Depth=1
movl (%rdi), %esi
movl %esi, (%rax,%rdx,4)
movl (%rdi), %esi
movl %esi, 4(%rax,%rdx,4)
movl (%rdi), %esi
movl %esi, 8(%rax,%rdx,4)
movl (%rdi), %esi
movl %esi, 12(%rax,%rdx,4)
movl (%rdi), %esi
movl %esi, 16(%rax,%rdx,4)
movl (%rdi), %esi
movl %esi, 20(%rax,%rdx,4)
movl (%rdi), %esi
movl %esi, 24(%rax,%rdx,4)
movl (%rdi), %esi
movl %esi, 28(%rax,%rdx,4)
addq $8, %rdx
cmpl %edx, %ecx
jne .LBB0_15
jmp .LBB0_16
.LBB0_6:
incq %r8
movq %r8, %r9
andq $-8, %r9
subl %r9d, %edx
leaq (%rsi,%r9,4), %rax
movd (%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero
pshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
xorl %ecx, %ecx
.p2align 4, 0x90
.LBB0_7: # =>This Inner Loop Header: Depth=1
movdqu %xmm0, (%rsi,%rcx,4)
movdqu %xmm0, 16(%rsi,%rcx,4)
addq $8, %rcx
cmpq %rcx, %r9
jne .LBB0_7
# %bb.8:
cmpq %r9, %r8
jne .LBB0_9
.LBB0_16:
retq
.Lfunc_end0:
.size f, .Lfunc_end0-f
.cfi_endproc
# -- End function
- .ident "clang version google3-trunk (aa78c5298ea37f2ca8150dc0a1c880be7ec438f4)"
+ .ident "clang version google3-trunk (644a965c1efef68f22d9495e4cefbb599c214788)"
.section ".note.GNU-stack","", at progbits
.addrsig
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D109368/new/
https://reviews.llvm.org/D109368
More information about the llvm-commits
mailing list