[LLVMbugs] [Bug 17185] match broadcast instruction to data type (int or FP)
bugzilla-daemon at llvm.org
bugzilla-daemon at llvm.org
Thu Apr 10 13:38:06 PDT 2014
http://llvm.org/bugs/show_bug.cgi?id=17185
Sanjay Patel <sanjay3.0.0.0 at gmail.com> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|NEW |RESOLVED
CC| |sanjay3.0.0.0 at gmail.com
Resolution|--- |FIXED
--- Comment #2 from Sanjay Patel <sanjay3.0.0.0 at gmail.com> ---
Using:
$ ./clang -v
clang version 3.5.0 (trunk 205798) (llvm/trunk 205792)
Target: x86_64-apple-darwin13.1.0
Thread model: posix
We're not generating any floating-point variants of the broadcast instruction.
We're generating 'vpbroadcastq' now.
Not sure if this is actually better codegen though...certainly bigger:
_foo: ## @foo
.cfi_startproc
## BB#0: ## %entry
## kill: ESI<def> ESI<kill> RSI<def>
testl %esi, %esi
jle LBB0_23
## BB#1: ## %for.body.lr.ph
movl %edx, %r8d
leal -1(%rsi), %eax
leaq 1(%rax), %r9
xorl %r10d, %r10d
movabsq $8589934560, %rdx ## imm = 0x1FFFFFFE0
andq %r9, %rdx
je LBB0_5
## BB#2: ## %vector.ph
vmovq %r8, %xmm0
vpbroadcastq %xmm0, %ymm0 <---- integer form of broadcast
incq %rax
andq $-32, %rax
xorl %ecx, %ecx
vmovdqa LCPI0_0(%rip), %ymm1
vmovdqa LCPI0_1(%rip), %ymm2
vmovdqa LCPI0_2(%rip), %ymm3
vmovdqa LCPI0_3(%rip), %ymm4
vmovdqa LCPI0_4(%rip), %ymm5
vmovdqa LCPI0_5(%rip), %ymm6
vmovdqa LCPI0_6(%rip), %ymm7
vmovdqa LCPI0_7(%rip), %ymm8
vmovdqa LCPI0_8(%rip), %ymm9
.align 4, 0x90
LBB0_3: ## %vector.body
## =>This Inner Loop Header: Depth=1
vmovq %rcx, %xmm10
vpermq $0, %ymm10, %ymm10 ## ymm10 = ymm10[0,0,0,0]
vpaddq %ymm0, %ymm10, %ymm10
vpaddq %ymm1, %ymm10, %ymm11
vpaddq %ymm2, %ymm10, %ymm12
vpaddq %ymm3, %ymm10, %ymm13
vpaddq %ymm4, %ymm10, %ymm14
vpermd %ymm12, %ymm9, %ymm12
vpermd %ymm11, %ymm9, %ymm11
vinserti128 $1, %xmm11, %ymm12, %ymm11
vpaddq %ymm5, %ymm10, %ymm12
vpermd %ymm14, %ymm9, %ymm14
vpermd %ymm13, %ymm9, %ymm13
vinserti128 $1, %xmm13, %ymm14, %ymm13
vpaddq %ymm6, %ymm10, %ymm14
vpermd %ymm14, %ymm9, %ymm14
vpermd %ymm12, %ymm9, %ymm12
vinserti128 $1, %xmm12, %ymm14, %ymm12
vpaddq %ymm7, %ymm10, %ymm14
vpaddq %ymm8, %ymm10, %ymm10
vpermd %ymm10, %ymm9, %ymm10
vpermd %ymm14, %ymm9, %ymm14
vinserti128 $1, %xmm14, %ymm10, %ymm10
vmovdqu %ymm11, (%rdi,%rcx,4)
vmovdqu %ymm13, 32(%rdi,%rcx,4)
vmovdqu %ymm12, 64(%rdi,%rcx,4)
vmovdqu %ymm10, 96(%rdi,%rcx,4)
addq $32, %rcx
cmpq %rcx, %rax
jne LBB0_3
## BB#4:
movq %rdx, %r10
LBB0_5: ## %middle.block
cmpq %r10, %r9
je LBB0_23
## BB#6: ## %for.body.preheader
leal 1(%rsi), %edx
leal 1(%r10), %eax
subl %eax, %edx
movl %edx, %eax
andl $7, %eax
je LBB0_20
## BB#7: ## %unr.cmp60
cmpl $1, %eax
je LBB0_19
## BB#8: ## %unr.cmp52
cmpl $2, %eax
je LBB0_18
## BB#9: ## %unr.cmp44
cmpl $3, %eax
je LBB0_17
## BB#10: ## %unr.cmp36
cmpl $4, %eax
je LBB0_16
## BB#11: ## %unr.cmp28
cmpl $5, %eax
je LBB0_15
## BB#12: ## %unr.cmp
cmpl $6, %eax
je LBB0_14
## BB#13: ## %for.body.unr
leal (%r10,%r8), %eax
movl %eax, (%rdi,%r10,4)
incq %r10
LBB0_14: ## %for.body.unr17
leal (%r10,%r8), %eax
movl %eax, (%rdi,%r10,4)
incq %r10
LBB0_15: ## %for.body.unr22
leal (%r10,%r8), %eax
movl %eax, (%rdi,%r10,4)
incq %r10
LBB0_16: ## %for.body.unr30
leal (%r10,%r8), %eax
movl %eax, (%rdi,%r10,4)
incq %r10
LBB0_17: ## %for.body.unr38
leal (%r10,%r8), %eax
movl %eax, (%rdi,%r10,4)
incq %r10
LBB0_18: ## %for.body.unr46
leal (%r10,%r8), %eax
movl %eax, (%rdi,%r10,4)
incq %r10
LBB0_19: ## %for.body.unr54
leal (%r10,%r8), %eax
movl %eax, (%rdi,%r10,4)
incq %r10
LBB0_20: ## %for.body.preheader.split
cmpl $8, %edx
jb LBB0_23
## BB#21: ## %for.body.preheader.split.split
leaq 28(%rdi,%r10,4), %rax
leaq 3(%r10,%r8), %rdx
subl %r10d, %esi
xorl %edi, %edi
.align 4, 0x90
LBB0_22: ## %for.body
## =>This Inner Loop Header: Depth=1
leal (%rdx,%rdi), %r8d
leal -3(%rdx,%rdi), %ecx
movl %ecx, -28(%rax,%rdi,4)
leal -2(%rdx,%rdi), %ecx
movl %ecx, -24(%rax,%rdi,4)
leal -1(%rdx,%rdi), %ecx
movl %ecx, -20(%rax,%rdi,4)
movl %r8d, -16(%rax,%rdi,4)
leal 1(%rdx,%rdi), %ecx
movl %ecx, -12(%rax,%rdi,4)
leal 2(%rdx,%rdi), %ecx
movl %ecx, -8(%rax,%rdi,4)
leal 3(%rdx,%rdi), %ecx
movl %ecx, -4(%rax,%rdi,4)
leal 4(%rdx,%rdi), %ecx
movl %ecx, (%rax,%rdi,4)
addq $8, %r10
addq $8, %rdi
cmpl %edi, %esi
jne LBB0_22
LBB0_23: ## %for.end
vzeroupper
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20140410/b4e57049/attachment.html>
More information about the llvm-bugs
mailing list