[llvm] r346530 - [DAGCombiner][X86][Mips] Enable combineShuffleOfScalars to run between vector op legalization and DAG legalization. Fix bad one use check in combineShuffleOfScalars

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 9 10:04:34 PST 2018


Author: ctopper
Date: Fri Nov  9 10:04:34 2018
New Revision: 346530

URL: http://llvm.org/viewvc/llvm-project?rev=346530&view=rev
Log:
[DAGCombiner][X86][Mips] Enable combineShuffleOfScalars to run between vector op legalization and DAG legalization. Fix bad one use check in combineShuffleOfScalars

It's possible for vector op legalization to generate a shuffle. If that happens we should give a chance for DAG combine to combine that with a build_vector input.

I also fixed a bug in combineShuffleOfScalars that was considering the number of uses on a undef input to a shuffle. We don't care how many times undef is used.

Differential Revision: https://reviews.llvm.org/D54283

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
    llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
    llvm/trunk/test/CodeGen/X86/vec_cast.ll
    llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
    llvm/trunk/test/CodeGen/X86/widen_conv-3.ll
    llvm/trunk/test/CodeGen/X86/widen_conv-4.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=346530&r1=346529&r2=346530&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Nov  9 10:04:34 2018
@@ -16933,12 +16933,15 @@ static SDValue combineShuffleOfScalars(S
   SDValue N0 = SVN->getOperand(0);
   SDValue N1 = SVN->getOperand(1);
 
-  if (!N0->hasOneUse() || !N1->hasOneUse())
+  if (!N0->hasOneUse())
     return SDValue();
 
   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
   // discussed above.
   if (!N1.isUndef()) {
+    if (!N1->hasOneUse())
+      return SDValue();
+
     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -17371,7 +17374,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
 
   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
-  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
       return Res;
 

Modified: llvm/trunk/test/CodeGen/Mips/cconv/vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/cconv/vector.ll?rev=346530&r1=346529&r2=346530&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/cconv/vector.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/cconv/vector.ll Fri Nov  9 10:04:34 2018
@@ -61,19 +61,15 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x
 ; MIPS32R5EB-NEXT:    sw $5, 36($sp)
 ; MIPS32R5EB-NEXT:    sw $4, 40($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 37($sp)
-; MIPS32R5EB-NEXT:    sw $1, 20($sp)
+; MIPS32R5EB-NEXT:    sw $1, 28($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 36($sp)
-; MIPS32R5EB-NEXT:    sw $1, 16($sp)
+; MIPS32R5EB-NEXT:    sw $1, 20($sp)
+; MIPS32R5EB-NEXT:    lbu $1, 41($sp)
+; MIPS32R5EB-NEXT:    sw $1, 12($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 40($sp)
-; MIPS32R5EB-NEXT:    lbu $2, 41($sp)
-; MIPS32R5EB-NEXT:    sw $2, 4($sp)
-; MIPS32R5EB-NEXT:    sw $1, 0($sp)
-; MIPS32R5EB-NEXT:    ld.w $w0, 16($sp)
-; MIPS32R5EB-NEXT:    ilvr.w $w0, $w0, $w0
-; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
-; MIPS32R5EB-NEXT:    ld.w $w1, 0($sp)
-; MIPS32R5EB-NEXT:    ilvr.w $w1, $w1, $w1
-; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    sw $1, 4($sp)
+; MIPS32R5EB-NEXT:    ld.d $w0, 16($sp)
+; MIPS32R5EB-NEXT:    ld.d $w1, 0($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w1, $w0
 ; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
 ; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[1]
@@ -166,17 +162,15 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x
 ; MIPS32R5EL-NEXT:    sw $5, 36($sp)
 ; MIPS32R5EL-NEXT:    sw $4, 40($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 37($sp)
-; MIPS32R5EL-NEXT:    sw $1, 20($sp)
+; MIPS32R5EL-NEXT:    sw $1, 24($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 36($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 16($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 41($sp)
-; MIPS32R5EL-NEXT:    sw $1, 4($sp)
+; MIPS32R5EL-NEXT:    sw $1, 8($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 40($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 0($sp)
-; MIPS32R5EL-NEXT:    ld.w $w0, 16($sp)
-; MIPS32R5EL-NEXT:    ilvr.w $w0, $w0, $w0
-; MIPS32R5EL-NEXT:    ld.w $w1, 0($sp)
-; MIPS32R5EL-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EL-NEXT:    ld.d $w0, 16($sp)
+; MIPS32R5EL-NEXT:    ld.d $w1, 0($sp)
 ; MIPS32R5EL-NEXT:    addv.d $w0, $w1, $w0
 ; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
 ; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[2]
@@ -327,61 +321,47 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2
 ; MIPS32R5EB-NEXT:    sw $5, 132($sp)
 ; MIPS32R5EB-NEXT:    sw $4, 136($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 133($sp)
-; MIPS32R5EB-NEXT:    sw $1, 68($sp)
+; MIPS32R5EB-NEXT:    sw $1, 76($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 132($sp)
-; MIPS32R5EB-NEXT:    sw $1, 64($sp)
+; MIPS32R5EB-NEXT:    sw $1, 68($sp)
+; MIPS32R5EB-NEXT:    lbu $1, 137($sp)
+; MIPS32R5EB-NEXT:    sw $1, 60($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 136($sp)
-; MIPS32R5EB-NEXT:    lbu $2, 137($sp)
-; MIPS32R5EB-NEXT:    sw $2, 52($sp)
-; MIPS32R5EB-NEXT:    sw $1, 48($sp)
-; MIPS32R5EB-NEXT:    ld.w $w0, 64($sp)
-; MIPS32R5EB-NEXT:    ilvr.w $w0, $w0, $w0
-; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
-; MIPS32R5EB-NEXT:    ld.w $w1, 48($sp)
-; MIPS32R5EB-NEXT:    ilvr.w $w1, $w1, $w1
-; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    sw $1, 52($sp)
+; MIPS32R5EB-NEXT:    ld.d $w0, 64($sp)
+; MIPS32R5EB-NEXT:    ld.d $w1, 48($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w1, $w0
 ; MIPS32R5EB-NEXT:    sw $6, 128($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 129($sp)
-; MIPS32R5EB-NEXT:    sw $1, 84($sp)
+; MIPS32R5EB-NEXT:    sw $1, 92($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 128($sp)
-; MIPS32R5EB-NEXT:    sw $1, 80($sp)
-; MIPS32R5EB-NEXT:    ld.w $w1, 80($sp)
-; MIPS32R5EB-NEXT:    ilvr.w $w1, $w1, $w1
-; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    sw $1, 84($sp)
+; MIPS32R5EB-NEXT:    ld.d $w1, 80($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS32R5EB-NEXT:    sw $7, 124($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 125($sp)
-; MIPS32R5EB-NEXT:    sw $1, 100($sp)
+; MIPS32R5EB-NEXT:    sw $1, 108($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 124($sp)
-; MIPS32R5EB-NEXT:    sw $1, 96($sp)
-; MIPS32R5EB-NEXT:    ld.w $w1, 96($sp)
-; MIPS32R5EB-NEXT:    ilvr.w $w1, $w1, $w1
-; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    sw $1, 100($sp)
+; MIPS32R5EB-NEXT:    ld.d $w1, 96($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS32R5EB-NEXT:    lbu $1, 161($fp)
-; MIPS32R5EB-NEXT:    sw $1, 4($sp)
+; MIPS32R5EB-NEXT:    sw $1, 12($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 160($fp)
-; MIPS32R5EB-NEXT:    sw $1, 0($sp)
-; MIPS32R5EB-NEXT:    ld.w $w1, 0($sp)
-; MIPS32R5EB-NEXT:    ilvr.w $w1, $w1, $w1
-; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    sw $1, 4($sp)
+; MIPS32R5EB-NEXT:    ld.d $w1, 0($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS32R5EB-NEXT:    lbu $1, 165($fp)
-; MIPS32R5EB-NEXT:    sw $1, 20($sp)
+; MIPS32R5EB-NEXT:    sw $1, 28($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 164($fp)
-; MIPS32R5EB-NEXT:    sw $1, 16($sp)
-; MIPS32R5EB-NEXT:    ld.w $w1, 16($sp)
-; MIPS32R5EB-NEXT:    ilvr.w $w1, $w1, $w1
-; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    sw $1, 20($sp)
+; MIPS32R5EB-NEXT:    ld.d $w1, 16($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS32R5EB-NEXT:    lbu $1, 169($fp)
-; MIPS32R5EB-NEXT:    sw $1, 36($sp)
+; MIPS32R5EB-NEXT:    sw $1, 44($sp)
 ; MIPS32R5EB-NEXT:    lbu $1, 168($fp)
-; MIPS32R5EB-NEXT:    sw $1, 32($sp)
-; MIPS32R5EB-NEXT:    ld.w $w1, 32($sp)
-; MIPS32R5EB-NEXT:    ilvr.w $w1, $w1, $w1
-; MIPS32R5EB-NEXT:    shf.w $w1, $w1, 177
+; MIPS32R5EB-NEXT:    sw $1, 36($sp)
+; MIPS32R5EB-NEXT:    ld.d $w1, 32($sp)
 ; MIPS32R5EB-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS32R5EB-NEXT:    shf.w $w0, $w0, 177
 ; MIPS32R5EB-NEXT:    copy_s.w $1, $w0[1]
@@ -579,54 +559,47 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2
 ; MIPS32R5EL-NEXT:    sw $5, 132($sp)
 ; MIPS32R5EL-NEXT:    sw $4, 136($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 133($sp)
-; MIPS32R5EL-NEXT:    sw $1, 68($sp)
+; MIPS32R5EL-NEXT:    sw $1, 72($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 132($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 64($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 137($sp)
-; MIPS32R5EL-NEXT:    sw $1, 52($sp)
+; MIPS32R5EL-NEXT:    sw $1, 56($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 136($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 48($sp)
-; MIPS32R5EL-NEXT:    ld.w $w0, 64($sp)
-; MIPS32R5EL-NEXT:    ilvr.w $w0, $w0, $w0
-; MIPS32R5EL-NEXT:    ld.w $w1, 48($sp)
-; MIPS32R5EL-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EL-NEXT:    ld.d $w0, 64($sp)
+; MIPS32R5EL-NEXT:    ld.d $w1, 48($sp)
 ; MIPS32R5EL-NEXT:    addv.d $w0, $w1, $w0
 ; MIPS32R5EL-NEXT:    sw $6, 128($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 129($sp)
-; MIPS32R5EL-NEXT:    sw $1, 84($sp)
+; MIPS32R5EL-NEXT:    sw $1, 88($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 128($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 80($sp)
-; MIPS32R5EL-NEXT:    ld.w $w1, 80($sp)
-; MIPS32R5EL-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EL-NEXT:    ld.d $w1, 80($sp)
 ; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS32R5EL-NEXT:    sw $7, 124($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 125($sp)
-; MIPS32R5EL-NEXT:    sw $1, 100($sp)
+; MIPS32R5EL-NEXT:    sw $1, 104($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 124($sp)
 ; MIPS32R5EL-NEXT:    sw $1, 96($sp)
-; MIPS32R5EL-NEXT:    ld.w $w1, 96($sp)
-; MIPS32R5EL-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EL-NEXT:    ld.d $w1, 96($sp)
 ; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS32R5EL-NEXT:    lbu $1, 161($fp)
-; MIPS32R5EL-NEXT:    sw $1, 4($sp)
+; MIPS32R5EL-NEXT:    sw $1, 8($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 160($fp)
 ; MIPS32R5EL-NEXT:    sw $1, 0($sp)
-; MIPS32R5EL-NEXT:    ld.w $w1, 0($sp)
-; MIPS32R5EL-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EL-NEXT:    ld.d $w1, 0($sp)
 ; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS32R5EL-NEXT:    lbu $1, 165($fp)
-; MIPS32R5EL-NEXT:    sw $1, 20($sp)
+; MIPS32R5EL-NEXT:    sw $1, 24($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 164($fp)
 ; MIPS32R5EL-NEXT:    sw $1, 16($sp)
-; MIPS32R5EL-NEXT:    ld.w $w1, 16($sp)
-; MIPS32R5EL-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EL-NEXT:    ld.d $w1, 16($sp)
 ; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS32R5EL-NEXT:    lbu $1, 169($fp)
-; MIPS32R5EL-NEXT:    sw $1, 36($sp)
+; MIPS32R5EL-NEXT:    sw $1, 40($sp)
 ; MIPS32R5EL-NEXT:    lbu $1, 168($fp)
 ; MIPS32R5EL-NEXT:    sw $1, 32($sp)
-; MIPS32R5EL-NEXT:    ld.w $w1, 32($sp)
-; MIPS32R5EL-NEXT:    ilvr.w $w1, $w1, $w1
+; MIPS32R5EL-NEXT:    ld.d $w1, 32($sp)
 ; MIPS32R5EL-NEXT:    addv.d $w0, $w0, $w1
 ; MIPS32R5EL-NEXT:    copy_s.w $1, $w0[0]
 ; MIPS32R5EL-NEXT:    copy_s.w $2, $w0[2]

Modified: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll?rev=346530&r1=346529&r2=346530&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll Fri Nov  9 10:04:34 2018
@@ -28,10 +28,9 @@ define <4 x float> @signbits_sext_v4i64_
 ; X32-NEXT:    movswl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    vmovd %ecx, %xmm0
-; X32-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
-; X32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
-; X32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; X32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
 ; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; X32-NEXT:    retl
 ;

Modified: llvm/trunk/test/CodeGen/X86/vec_cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_cast.ll?rev=346530&r1=346529&r2=346530&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_cast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_cast.ll Fri Nov  9 10:04:34 2018
@@ -28,10 +28,10 @@ define <8 x i32> @a(<8 x i16> %a) nounwi
 define <3 x i32> @b(<3 x i16> %a) nounwind {
 ; CHECK-LIN-LABEL: b:
 ; CHECK-LIN:       # %bb.0:
-; CHECK-LIN-NEXT:    movd %edi, %xmm0
-; CHECK-LIN-NEXT:    pinsrw $1, %esi, %xmm0
-; CHECK-LIN-NEXT:    pinsrw $2, %edx, %xmm0
-; CHECK-LIN-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; CHECK-LIN-NEXT:    pxor %xmm0, %xmm0
+; CHECK-LIN-NEXT:    pinsrw $1, %edi, %xmm0
+; CHECK-LIN-NEXT:    pinsrw $3, %esi, %xmm0
+; CHECK-LIN-NEXT:    pinsrw $5, %edx, %xmm0
 ; CHECK-LIN-NEXT:    psrad $16, %xmm0
 ; CHECK-LIN-NEXT:    retq
 ;
@@ -40,10 +40,10 @@ define <3 x i32> @b(<3 x i16> %a) nounwi
 ; CHECK-WIN-NEXT:    # kill: def $r8w killed $r8w def $r8d
 ; CHECK-WIN-NEXT:    # kill: def $dx killed $dx def $edx
 ; CHECK-WIN-NEXT:    # kill: def $cx killed $cx def $ecx
-; CHECK-WIN-NEXT:    movd %ecx, %xmm0
-; CHECK-WIN-NEXT:    pinsrw $1, %edx, %xmm0
-; CHECK-WIN-NEXT:    pinsrw $2, %r8d, %xmm0
-; CHECK-WIN-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; CHECK-WIN-NEXT:    pxor %xmm0, %xmm0
+; CHECK-WIN-NEXT:    pinsrw $1, %ecx, %xmm0
+; CHECK-WIN-NEXT:    pinsrw $3, %edx, %xmm0
+; CHECK-WIN-NEXT:    pinsrw $5, %r8d, %xmm0
 ; CHECK-WIN-NEXT:    psrad $16, %xmm0
 ; CHECK-WIN-NEXT:    retq
   %c = sext <3 x i16> %a to <3 x i32>
@@ -88,11 +88,10 @@ define <8 x i32> @d(<8 x i16> %a) nounwi
 define <3 x i32> @e(<3 x i16> %a) nounwind {
 ; CHECK-LIN-LABEL: e:
 ; CHECK-LIN:       # %bb.0:
-; CHECK-LIN-NEXT:    movd %edi, %xmm0
-; CHECK-LIN-NEXT:    pinsrw $1, %esi, %xmm0
-; CHECK-LIN-NEXT:    pinsrw $2, %edx, %xmm0
-; CHECK-LIN-NEXT:    pxor %xmm1, %xmm1
-; CHECK-LIN-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK-LIN-NEXT:    pxor %xmm0, %xmm0
+; CHECK-LIN-NEXT:    pinsrw $0, %edi, %xmm0
+; CHECK-LIN-NEXT:    pinsrw $2, %esi, %xmm0
+; CHECK-LIN-NEXT:    pinsrw $4, %edx, %xmm0
 ; CHECK-LIN-NEXT:    retq
 ;
 ; CHECK-WIN-LABEL: e:
@@ -100,11 +99,10 @@ define <3 x i32> @e(<3 x i16> %a) nounwi
 ; CHECK-WIN-NEXT:    # kill: def $r8w killed $r8w def $r8d
 ; CHECK-WIN-NEXT:    # kill: def $dx killed $dx def $edx
 ; CHECK-WIN-NEXT:    # kill: def $cx killed $cx def $ecx
-; CHECK-WIN-NEXT:    movd %ecx, %xmm0
-; CHECK-WIN-NEXT:    pinsrw $1, %edx, %xmm0
-; CHECK-WIN-NEXT:    pinsrw $2, %r8d, %xmm0
-; CHECK-WIN-NEXT:    pxor %xmm1, %xmm1
-; CHECK-WIN-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK-WIN-NEXT:    pxor %xmm0, %xmm0
+; CHECK-WIN-NEXT:    pinsrw $0, %ecx, %xmm0
+; CHECK-WIN-NEXT:    pinsrw $2, %edx, %xmm0
+; CHECK-WIN-NEXT:    pinsrw $4, %r8d, %xmm0
 ; CHECK-WIN-NEXT:    retq
   %c = zext <3 x i16> %a to <3 x i32>
   ret <3 x i32> %c

Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=346530&r1=346529&r2=346530&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Fri Nov  9 10:04:34 2018
@@ -1259,29 +1259,29 @@ define <4 x float> @sitofp_2i64_to_4f32_
 ;
 ; VEX-LABEL: sitofp_2i64_to_4f32_zero:
 ; VEX:       # %bb.0:
-; VEX-NEXT:    vpextrq $1, %xmm0, %rax
-; VEX-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm1
 ; VEX-NEXT:    vmovq %xmm0, %rax
+; VEX-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm1
+; VEX-NEXT:    vpextrq $1, %xmm0, %rax
 ; VEX-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm0
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: sitofp_2i64_to_4f32_zero:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX512F-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
+; AVX512F-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm1
+; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: sitofp_2i64_to_4f32_zero:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX512VL-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm1
+; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX512VL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; AVX512VL-NEXT:    retq
 ;
@@ -2058,7 +2058,7 @@ define <4 x float> @uitofp_2i64_to_2f32(
 ;
 ; VEX-LABEL: uitofp_2i64_to_2f32:
 ; VEX:       # %bb.0:
-; VEX-NEXT:    vpextrq $1, %xmm0, %rax
+; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    testq %rax, %rax
 ; VEX-NEXT:    js .LBB40_1
 ; VEX-NEXT:  # %bb.2:
@@ -2072,12 +2072,12 @@ define <4 x float> @uitofp_2i64_to_2f32(
 ; VEX-NEXT:    vcvtsi2ssq %rax, %xmm1, %xmm1
 ; VEX-NEXT:    vaddss %xmm1, %xmm1, %xmm1
 ; VEX-NEXT:  .LBB40_3:
-; VEX-NEXT:    vmovq %xmm0, %rax
+; VEX-NEXT:    vpextrq $1, %xmm0, %rax
 ; VEX-NEXT:    testq %rax, %rax
 ; VEX-NEXT:    js .LBB40_4
 ; VEX-NEXT:  # %bb.5:
 ; VEX-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm0
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
 ; VEX-NEXT:    retq
 ; VEX-NEXT:  .LBB40_4:
 ; VEX-NEXT:    movq %rax, %rcx
@@ -2086,25 +2086,25 @@ define <4 x float> @uitofp_2i64_to_2f32(
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm0
 ; VEX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: uitofp_2i64_to_2f32:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX512F-NEXT:    vcvtusi2ssq %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
+; AVX512F-NEXT:    vcvtusi2ssq %rax, %xmm1, %xmm1
+; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2ssq %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: uitofp_2i64_to_2f32:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX512VL-NEXT:    vcvtusi2ssq %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vcvtusi2ssq %rax, %xmm1, %xmm1
+; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2ssq %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX512VL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; AVX512VL-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/widen_conv-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-3.ll?rev=346530&r1=346529&r2=346530&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-3.ll Fri Nov  9 10:04:34 2018
@@ -47,15 +47,16 @@ define void @convert_v3i8_to_v3f32(<3 x
 ; X86-SSE2-NEXT:    movd %edx, %xmm0
 ; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; X86-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X86-SSE2-NEXT:    movzbl 2(%ecx), %ecx
 ; X86-SSE2-NEXT:    movdqa %xmm0, (%esp)
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    movl (%esp), %edx
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE2-NEXT:    shll $8, %esi
 ; X86-SSE2-NEXT:    shll $8, %edx
-; X86-SSE2-NEXT:    movzbl (%esp), %esi
-; X86-SSE2-NEXT:    orl %edx, %esi
-; X86-SSE2-NEXT:    movd %esi, %xmm0
-; X86-SSE2-NEXT:    pinsrw $1, %ecx, %xmm0
-; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE2-NEXT:    movd %edx, %xmm0
+; X86-SSE2-NEXT:    pinsrw $1, %esi, %xmm0
+; X86-SSE2-NEXT:    movzbl 2(%ecx), %ecx
+; X86-SSE2-NEXT:    shll $8, %ecx
+; X86-SSE2-NEXT:    pinsrw $2, %ecx, %xmm0
 ; X86-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; X86-SSE2-NEXT:    psrad $24, %xmm0
 ; X86-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
@@ -95,15 +96,16 @@ define void @convert_v3i8_to_v3f32(<3 x
 ; X64-SSE2-NEXT:    movq %rax, %xmm0
 ; X64-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; X64-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X64-SSE2-NEXT:    movzbl 2(%rsi), %eax
 ; X64-SSE2-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
+; X64-SSE2-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
 ; X64-SSE2-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx
 ; X64-SSE2-NEXT:    shll $8, %ecx
-; X64-SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
-; X64-SSE2-NEXT:    orl %ecx, %edx
-; X64-SSE2-NEXT:    movd %edx, %xmm0
-; X64-SSE2-NEXT:    pinsrw $1, %eax, %xmm0
-; X64-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X64-SSE2-NEXT:    shll $8, %eax
+; X64-SSE2-NEXT:    movd %eax, %xmm0
+; X64-SSE2-NEXT:    pinsrw $1, %ecx, %xmm0
+; X64-SSE2-NEXT:    movzbl 2(%rsi), %eax
+; X64-SSE2-NEXT:    shll $8, %eax
+; X64-SSE2-NEXT:    pinsrw $2, %eax, %xmm0
 ; X64-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; X64-SSE2-NEXT:    psrad $24, %xmm0
 ; X64-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0

Modified: llvm/trunk/test/CodeGen/X86/widen_conv-4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_conv-4.ll?rev=346530&r1=346529&r2=346530&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_conv-4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_conv-4.ll Fri Nov  9 10:04:34 2018
@@ -77,7 +77,6 @@ define void @convert_v3i8_to_v3f32(<3 x
 ; X86-SSE2:       # %bb.0: # %entry
 ; X86-SSE2-NEXT:    pushl %ebp
 ; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    pushl %esi
 ; X86-SSE2-NEXT:    andl $-16, %esp
 ; X86-SSE2-NEXT:    subl $32, %esp
 ; X86-SSE2-NEXT:    movl 8(%ebp), %eax
@@ -88,15 +87,11 @@ define void @convert_v3i8_to_v3f32(<3 x
 ; X86-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; X86-SSE2-NEXT:    movzbl 2(%ecx), %ecx
 ; X86-SSE2-NEXT:    movdqa %xmm0, (%esp)
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-SSE2-NEXT:    shll $8, %edx
-; X86-SSE2-NEXT:    movzbl (%esp), %esi
-; X86-SSE2-NEXT:    orl %edx, %esi
-; X86-SSE2-NEXT:    movd %esi, %xmm0
-; X86-SSE2-NEXT:    pinsrw $1, %ecx, %xmm0
-; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
-; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X86-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X86-SSE2-NEXT:    movzbl (%esp), %edx
+; X86-SSE2-NEXT:    movd %edx, %xmm0
+; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-SSE2-NEXT:    pinsrw $2, %edx, %xmm0
+; X86-SSE2-NEXT:    pinsrw $4, %ecx, %xmm0
 ; X86-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
 ; X86-SSE2-NEXT:    movss %xmm0, (%eax)
 ; X86-SSE2-NEXT:    movaps %xmm0, %xmm1
@@ -104,8 +99,7 @@ define void @convert_v3i8_to_v3f32(<3 x
 ; X86-SSE2-NEXT:    movss %xmm1, 8(%eax)
 ; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
 ; X86-SSE2-NEXT:    movss %xmm0, 4(%eax)
-; X86-SSE2-NEXT:    leal -4(%ebp), %esp
-; X86-SSE2-NEXT:    popl %esi
+; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    retl
 ;
@@ -135,15 +129,11 @@ define void @convert_v3i8_to_v3f32(<3 x
 ; X64-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; X64-SSE2-NEXT:    movzbl 2(%rsi), %eax
 ; X64-SSE2-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; X64-SSE2-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx
-; X64-SSE2-NEXT:    shll $8, %ecx
-; X64-SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
-; X64-SSE2-NEXT:    orl %ecx, %edx
-; X64-SSE2-NEXT:    movd %edx, %xmm0
-; X64-SSE2-NEXT:    pinsrw $1, %eax, %xmm0
-; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
-; X64-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X64-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-SSE2-NEXT:    movd %ecx, %xmm0
+; X64-SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-SSE2-NEXT:    pinsrw $2, %ecx, %xmm0
+; X64-SSE2-NEXT:    pinsrw $4, %eax, %xmm0
 ; X64-SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
 ; X64-SSE2-NEXT:    movlps %xmm0, (%rdi)
 ; X64-SSE2-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]




More information about the llvm-commits mailing list