[llvm] r305091 - [X86][SSE] Add support for PACKSS nodes to faux shuffle extraction

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 9 10:29:52 PDT 2017


Author: rksimon
Date: Fri Jun  9 12:29:52 2017
New Revision: 305091

URL: http://llvm.org/viewvc/llvm-project?rev=305091&view=rev
Log:
[X86][SSE] Add support for PACKSS nodes to faux shuffle extraction

If the inputs won't saturate during packing then we can treat the PACKSS as a truncation shuffle

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-compare-results.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=305091&r1=305090&r2=305091&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jun  9 12:29:52 2017
@@ -1,4 +1,4 @@
-
+
 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -5816,7 +5816,8 @@ static bool setTargetShuffleZeroElements
 // The decoded shuffle mask may contain a different number of elements to the
 // destination value type.
 static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
-                               SmallVectorImpl<SDValue> &Ops) {
+                               SmallVectorImpl<SDValue> &Ops,
+                               SelectionDAG &DAG) {
   Mask.clear();
   Ops.clear();
 
@@ -5924,6 +5925,19 @@ static bool getFauxShuffleMask(SDValue N
       Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
     return true;
   }
+  case X86ISD::PACKSS: {
+    // If we know input saturation won't happen we can treat this
+    // as a truncation shuffle.
+    if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt ||
+        DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt)
+      return false;
+
+    Ops.push_back(N.getOperand(0));
+    Ops.push_back(N.getOperand(1));
+    for (unsigned i = 0; i != NumElts; ++i)
+      Mask.push_back(i * 2);
+    return true;
+  }
   case X86ISD::VSHLI:
   case X86ISD::VSRLI: {
     uint64_t ShiftVal = N.getConstantOperandVal(1);
@@ -5998,9 +6012,10 @@ static void resolveTargetShuffleInputsAn
 /// Returns true if the target shuffle mask was decoded.
 static bool resolveTargetShuffleInputs(SDValue Op,
                                        SmallVectorImpl<SDValue> &Inputs,
-                                       SmallVectorImpl<int> &Mask) {
+                                       SmallVectorImpl<int> &Mask,
+                                       SelectionDAG &DAG) {
   if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
-    if (!getFauxShuffleMask(Op, Mask, Inputs))
+    if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
       return false;
 
   resolveTargetShuffleInputsAndMask(Inputs, Mask);
@@ -26760,6 +26775,17 @@ unsigned X86TargetLowering::ComputeNumSi
     return Tmp;
   }
 
+  case X86ISD::VSHLI: {
+    SDValue Src = Op.getOperand(0);
+    unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
+    APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+    if (ShiftVal.uge(VTBits))
+      return VTBits; // Shifted all bits out --> zero.
+    if (ShiftVal.uge(Tmp))
+      return 1; // Shifted all sign bits out --> unknown.
+    return Tmp - ShiftVal.getZExtValue();
+  }
+
   case X86ISD::VSRAI: {
     SDValue Src = Op.getOperand(0);
     unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
@@ -27908,7 +27934,7 @@ static bool combineX86ShufflesRecursivel
   // Extract target shuffle mask and resolve sentinels and inputs.
   SmallVector<int, 64> OpMask;
   SmallVector<SDValue, 2> OpInputs;
-  if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask))
+  if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
     return false;
 
   assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
@@ -29450,7 +29476,7 @@ static SDValue combineExtractWithShuffle
   // Resolve the target shuffle inputs and mask.
   SmallVector<int, 16> Mask;
   SmallVector<SDValue, 2> Ops;
-  if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask))
+  if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG))
     return SDValue();
 
   // Attempt to narrow/widen the shuffle mask to the correct size.

Modified: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-results.ll?rev=305091&r1=305090&r2=305091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll Fri Jun  9 12:29:52 2017
@@ -5345,217 +5345,213 @@ define <64 x i1> @test_cmp_v64i16(<64 x
 ;
 ; AVX1-LABEL: test_cmp_v64i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm9
-; AVX1-NEXT:    vpcmpgtw %xmm8, %xmm9, %xmm8
-; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm8, %xmm0, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm0, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpgtw %xmm5, %xmm1, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; AVX1-NEXT:    vpcmpgtw %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm0
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpcmpgtw %xmm6, %xmm2, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
 ; AVX1-NEXT:    vpcmpgtw %xmm6, %xmm2, %xmm2
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm0
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpcmpgtw %xmm7, %xmm3, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm7
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; AVX1-NEXT:    vpcmpgtw %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm3, %xmm3
-; AVX1-NEXT:    vpextrb $15, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $14, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $13, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $12, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $11, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $10, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $9, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $8, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $7, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $6, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $5, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $4, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $3, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $2, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $1, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $0, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $15, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $14, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $12, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $13, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $10, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $8, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $11, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $6, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $4, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $9, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $2, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $0, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $7, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $14, %xmm2, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $12, %xmm2, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $5, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $10, %xmm2, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $8, %xmm2, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $3, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $6, %xmm2, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $4, %xmm2, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $1, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $2, %xmm2, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $0, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $14, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $12, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $10, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $8, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $6, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $4, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $2, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $0, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $14, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $13, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $12, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $11, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $10, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $9, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $8, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $7, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $6, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $5, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $4, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $3, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $2, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $0, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $15, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $14, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $14, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $12, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $13, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $10, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $12, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $8, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $11, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $6, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $10, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $4, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $9, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $2, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $8, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $0, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $7, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $6, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $5, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $4, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $3, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $2, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $14, %xmm8, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $12, %xmm8, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $1, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $10, %xmm8, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $8, %xmm8, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $6, %xmm8, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $4, %xmm8, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $2, %xmm8, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $0, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    movq %rdi, %rax
 ; AVX1-NEXT:    vzeroupper
@@ -5565,207 +5561,203 @@ define <64 x i1> @test_cmp_v64i16(<64 x
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpcmpgtw %ymm4, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
-; AVX2-NEXT:    vpacksswb %xmm4, %xmm0, %xmm0
 ; AVX2-NEXT:    vpcmpgtw %ymm5, %ymm1, %ymm1
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm4
-; AVX2-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm5
 ; AVX2-NEXT:    vpcmpgtw %ymm6, %ymm2, %ymm2
-; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm4
-; AVX2-NEXT:    vpacksswb %xmm4, %xmm2, %xmm2
+; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm6
 ; AVX2-NEXT:    vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
-; AVX2-NEXT:    vpacksswb %xmm4, %xmm3, %xmm3
-; AVX2-NEXT:    vpextrb $15, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm7
+; AVX2-NEXT:    vpextrb $14, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $14, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $12, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $13, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $10, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $12, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $8, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $11, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $6, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $10, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $4, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $9, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $2, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $8, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $0, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $7, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $14, %xmm3, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $6, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $12, %xmm3, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $5, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $10, %xmm3, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $4, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $8, %xmm3, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $3, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $6, %xmm3, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $2, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $4, %xmm3, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $1, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $2, %xmm3, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $0, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $15, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $14, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $12, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $13, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $10, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $8, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $11, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $6, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $4, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $9, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $2, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $0, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $7, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $14, %xmm2, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $12, %xmm2, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $5, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $10, %xmm2, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $8, %xmm2, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $3, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $6, %xmm2, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $4, %xmm2, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $1, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $2, %xmm2, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $0, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $15, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $14, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $14, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $12, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $13, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $10, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $12, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $8, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $11, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $6, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $10, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $4, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $9, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $2, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $8, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $0, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $7, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $14, %xmm1, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $6, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $12, %xmm1, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $5, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $10, %xmm1, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $4, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $8, %xmm1, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $3, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $6, %xmm1, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $2, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $4, %xmm1, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $2, %xmm1, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $0, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $14, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $12, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $10, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $8, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $6, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $4, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $2, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $0, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    movq %rdi, %rax
 ; AVX2-NEXT:    vzeroupper




More information about the llvm-commits mailing list