[llvm] r264512 - [X86][AVX] Enabled MULHS/MULHU v16i16 vectors on AVX1 targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 26 08:44:56 PDT 2016
Author: rksimon
Date: Sat Mar 26 10:44:55 2016
New Revision: 264512
URL: http://llvm.org/viewvc/llvm-project?rev=264512&view=rev
Log:
[X86][AVX] Enabled MULHS/MULHU v16i16 vectors on AVX1 targets
Correct splitting of v16i16 vectors into v8i16 vectors to prevent scalarization
Differential Revision: http://reviews.llvm.org/D18307
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=264512&r1=264511&r2=264512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar 26 10:44:55 2016
@@ -1253,6 +1253,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
setOperationAction(ISD::MUL, MVT::v32i8, Custom);
+ setOperationAction(ISD::MULHU, MVT::v16i16, Custom);
+ setOperationAction(ISD::MULHS, MVT::v16i16, Custom);
setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll?rev=264512&r1=264511&r2=264512&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll Sat Mar 26 10:44:55 2016
@@ -193,150 +193,15 @@ define <16 x i16> @test_div7_16i16(<16 x
; AVX1-LABEL: test_div7_16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: movswl %cx, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: movswl %cx, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
+; AVX1-NEXT: vpmulhw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm3
+; AVX1-NEXT: vpsraw $1, %xmm1, %xmm1
+; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpmulhw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm2
+; AVX1-NEXT: vpsraw $1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -674,198 +539,20 @@ define <16 x i16> @test_rem7_16i16(<16 x
; AVX1-LABEL: test_rem7_16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: movswl %cx, %edx
-; AVX1-NEXT: imull $18725, %edx, %edx # imm = 0x4925
-; AVX1-NEXT: movl %edx, %esi
-; AVX1-NEXT: shrl $16, %esi
-; AVX1-NEXT: sarw %si
-; AVX1-NEXT: shrl $31, %edx
-; AVX1-NEXT: addl %esi, %edx
-; AVX1-NEXT: leal (,%rdx,8), %esi
-; AVX1-NEXT: subl %edx, %esi
-; AVX1-NEXT: subl %esi, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: movswl %cx, %edx
-; AVX1-NEXT: imull $18725, %edx, %edx # imm = 0x4925
-; AVX1-NEXT: movl %edx, %esi
-; AVX1-NEXT: shrl $16, %esi
-; AVX1-NEXT: sarw %si
-; AVX1-NEXT: shrl $31, %edx
-; AVX1-NEXT: addl %esi, %edx
-; AVX1-NEXT: leal (,%rdx,8), %esi
-; AVX1-NEXT: subl %edx, %esi
-; AVX1-NEXT: subl %esi, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
+; AVX1-NEXT: vpmulhw %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpsrlw $15, %xmm3, %xmm4
+; AVX1-NEXT: vpsraw $1, %xmm3, %xmm3
+; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,7,7,7,7,7,7,7]
+; AVX1-NEXT: vpmullw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsubw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpmulhw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm3
+; AVX1-NEXT: vpsraw $1, %xmm2, %xmm2
+; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpmullw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll?rev=264512&r1=264511&r2=264512&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll Sat Mar 26 10:44:55 2016
@@ -184,154 +184,19 @@ define <8 x i32> @test_div7_8i32(<8 x i3
define <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: test_div7_16i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: movzwl %cx, %edx
-; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: andl $65534, %ecx # imm = 0xFFFE
-; AVX1-NEXT: shrl %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: shrl $2, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: movzwl %cx, %edx
-; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: andl $65534, %ecx # imm = 0xFFFE
-; AVX1-NEXT: shrl %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: shrl $2, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363]
+; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm3
+; AVX1-NEXT: vpaddw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_div7_16i16:
@@ -661,216 +526,22 @@ define <16 x i16> @test_rem7_16i16(<16 x
; AVX1-LABEL: test_rem7_16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: movzwl %cx, %edx
-; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: movl %ecx, %esi
-; AVX1-NEXT: subl %edx, %esi
-; AVX1-NEXT: andl $65534, %esi # imm = 0xFFFE
-; AVX1-NEXT: shrl %esi
-; AVX1-NEXT: addl %edx, %esi
-; AVX1-NEXT: shrl $2, %esi
-; AVX1-NEXT: leal (,%rsi,8), %edx
-; AVX1-NEXT: subl %esi, %edx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: movzwl %cx, %edx
-; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: movl %ecx, %esi
-; AVX1-NEXT: subl %edx, %esi
-; AVX1-NEXT: andl $65534, %esi # imm = 0xFFFE
-; AVX1-NEXT: shrl %esi
-; AVX1-NEXT: addl %edx, %esi
-; AVX1-NEXT: shrl $2, %esi
-; AVX1-NEXT: leal (,%rsi,8), %edx
-; AVX1-NEXT: subl %esi, %edx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363]
+; AVX1-NEXT: vpmulhuw %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpsubw %xmm3, %xmm1, %xmm4
+; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm4
+; AVX1-NEXT: vpaddw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $2, %xmm3, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,7,7,7,7,7,7,7]
+; AVX1-NEXT: vpmullw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsubw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpmulhuw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm3
+; AVX1-NEXT: vpaddw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm2
+; AVX1-NEXT: vpmullw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
More information about the llvm-commits
mailing list