[llvm] r264512 - [X86][AVX] Enabled MULHS/MULHU v16i16 vectors on AVX1 targets

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 26 08:44:56 PDT 2016


Author: rksimon
Date: Sat Mar 26 10:44:55 2016
New Revision: 264512

URL: http://llvm.org/viewvc/llvm-project?rev=264512&view=rev
Log:
[X86][AVX] Enabled MULHS/MULHU v16i16 vectors on AVX1 targets

Correct splitting of v16i16 vectors into v8i16 vectors to prevent scalarization

Differential Revision: http://reviews.llvm.org/D18307

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=264512&r1=264511&r2=264512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar 26 10:44:55 2016
@@ -1253,6 +1253,8 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
       setOperationAction(ISD::MUL,             MVT::v32i8, Custom);
 
+      setOperationAction(ISD::MULHU,           MVT::v16i16, Custom);
+      setOperationAction(ISD::MULHS,           MVT::v16i16, Custom);
       setOperationAction(ISD::MULHU,           MVT::v32i8, Custom);
       setOperationAction(ISD::MULHS,           MVT::v32i8, Custom);
 

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll?rev=264512&r1=264511&r2=264512&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll Sat Mar 26 10:44:55 2016
@@ -193,150 +193,15 @@ define <16 x i16> @test_div7_16i16(<16 x
 ; AVX1-LABEL: test_div7_16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm1, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vmovd %xmm1, %ecx
-; AVX1-NEXT:    movswl %cx, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm1, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm1, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm1, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm1, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm1, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm1, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm0, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vmovd %xmm0, %ecx
-; AVX1-NEXT:    movswl %cx, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm0, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm0, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm0, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm0, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm0, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm0, %eax
-; AVX1-NEXT:    cwtl
-; AVX1-NEXT:    imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    sarw %cx
-; AVX1-NEXT:    shrl $31, %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
+; AVX1-NEXT:    vpmulhw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $15, %xmm1, %xmm3
+; AVX1-NEXT:    vpsraw $1, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpmulhw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm2
+; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm0
+; AVX1-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -674,198 +539,20 @@ define <16 x i16> @test_rem7_16i16(<16 x
 ; AVX1-LABEL: test_rem7_16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm1, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vmovd %xmm1, %ecx
-; AVX1-NEXT:    movswl %cx, %edx
-; AVX1-NEXT:    imull $18725, %edx, %edx # imm = 0x4925
-; AVX1-NEXT:    movl %edx, %esi
-; AVX1-NEXT:    shrl $16, %esi
-; AVX1-NEXT:    sarw %si
-; AVX1-NEXT:    shrl $31, %edx
-; AVX1-NEXT:    addl %esi, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %esi
-; AVX1-NEXT:    subl %edx, %esi
-; AVX1-NEXT:    subl %esi, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm1, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm1, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm1, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm1, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm1, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm1, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm0, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vmovd %xmm0, %ecx
-; AVX1-NEXT:    movswl %cx, %edx
-; AVX1-NEXT:    imull $18725, %edx, %edx # imm = 0x4925
-; AVX1-NEXT:    movl %edx, %esi
-; AVX1-NEXT:    shrl $16, %esi
-; AVX1-NEXT:    sarw %si
-; AVX1-NEXT:    shrl $31, %edx
-; AVX1-NEXT:    addl %esi, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %esi
-; AVX1-NEXT:    subl %edx, %esi
-; AVX1-NEXT:    subl %esi, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm0, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm0, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm0, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm0, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm0, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm0, %eax
-; AVX1-NEXT:    movswl %ax, %ecx
-; AVX1-NEXT:    imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT:    movl %ecx, %edx
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    sarw %dx
-; AVX1-NEXT:    shrl $31, %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    leal (,%rcx,8), %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    subl %edx, %eax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
+; AVX1-NEXT:    vpmulhw %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vpsrlw $15, %xmm3, %xmm4
+; AVX1-NEXT:    vpsraw $1, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [7,7,7,7,7,7,7,7]
+; AVX1-NEXT:    vpmullw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsubw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpmulhw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpsrlw $15, %xmm2, %xmm3
+; AVX1-NEXT:    vpsraw $1, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpmullw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll?rev=264512&r1=264511&r2=264512&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll Sat Mar 26 10:44:55 2016
@@ -184,154 +184,19 @@ define <8 x i32> @test_div7_8i32(<8 x i3
 define <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind {
 ; AVX1-LABEL: test_div7_16i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vmovd %xmm1, %ecx
-; AVX1-NEXT:    movzwl %cx, %edx
-; AVX1-NEXT:    imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    andl $65534, %ecx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    shrl $2, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vmovd %xmm0, %ecx
-; AVX1-NEXT:    movzwl %cx, %edx
-; AVX1-NEXT:    imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    andl $65534, %ecx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %ecx
-; AVX1-NEXT:    addl %edx, %ecx
-; AVX1-NEXT:    shrl $2, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT:    shrl %eax
-; AVX1-NEXT:    addl %ecx, %eax
-; AVX1-NEXT:    shrl $2, %eax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363]
+; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vpsubw %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpsrlw $1, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm0
+; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_div7_16i16:
@@ -661,216 +526,22 @@ define <16 x i16> @test_rem7_16i16(<16 x
 ; AVX1-LABEL: test_rem7_16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vmovd %xmm1, %ecx
-; AVX1-NEXT:    movzwl %cx, %edx
-; AVX1-NEXT:    imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    movl %ecx, %esi
-; AVX1-NEXT:    subl %edx, %esi
-; AVX1-NEXT:    andl $65534, %esi # imm = 0xFFFE
-; AVX1-NEXT:    shrl %esi
-; AVX1-NEXT:    addl %edx, %esi
-; AVX1-NEXT:    shrl $2, %esi
-; AVX1-NEXT:    leal (,%rsi,8), %edx
-; AVX1-NEXT:    subl %esi, %edx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm1, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vmovd %xmm0, %ecx
-; AVX1-NEXT:    movzwl %cx, %edx
-; AVX1-NEXT:    imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %edx
-; AVX1-NEXT:    movl %ecx, %esi
-; AVX1-NEXT:    subl %edx, %esi
-; AVX1-NEXT:    andl $65534, %esi # imm = 0xFFFE
-; AVX1-NEXT:    shrl %esi
-; AVX1-NEXT:    addl %edx, %esi
-; AVX1-NEXT:    shrl $2, %esi
-; AVX1-NEXT:    leal (,%rsi,8), %edx
-; AVX1-NEXT:    subl %esi, %edx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm0, %eax
-; AVX1-NEXT:    imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    movl %eax, %edx
-; AVX1-NEXT:    subl %ecx, %edx
-; AVX1-NEXT:    andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT:    shrl %edx
-; AVX1-NEXT:    addl %ecx, %edx
-; AVX1-NEXT:    shrl $2, %edx
-; AVX1-NEXT:    leal (,%rdx,8), %ecx
-; AVX1-NEXT:    subl %edx, %ecx
-; AVX1-NEXT:    subl %ecx, %eax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363]
+; AVX1-NEXT:    vpmulhuw %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vpsubw %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpsrlw $1, %xmm4, %xmm4
+; AVX1-NEXT:    vpaddw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpsrlw $2, %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [7,7,7,7,7,7,7,7]
+; AVX1-NEXT:    vpmullw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsubw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpmulhuw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpsubw %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpsrlw $1, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm2
+; AVX1-NEXT:    vpmullw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;




More information about the llvm-commits mailing list