[llvm] r238735 - AVX-512: Implemented vector shuffle lowering for v8i64 and v8f64 types.

Chandler Carruth chandlerc at google.com
Mon Jun 1 03:07:09 PDT 2015


On Mon, Jun 1, 2015 at 2:57 AM Elena Demikhovsky <
elena.demikhovsky at intel.com> wrote:

> Author: delena
> Date: Mon Jun  1 04:49:53 2015
> New Revision: 238735
>
> URL: http://llvm.org/viewvc/llvm-project?rev=238735&view=rev
> Log:
> AVX-512: Implemented vector shuffle lowering for v8i64 and v8f64 types.
> I removed the vector-shuffle-512-v8.ll, it is auto-generated test, not
> valid any more.
>
>
> Added:
>     llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll
> Removed:
>     llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=238735&r1=238734&r2=238735&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jun  1 04:49:53 2015
> @@ -6260,6 +6260,42 @@ is128BitLaneRepeatedShuffleMask(MVT VT,
>    return true;
>  }
>
> +/// \brief Test whether a shuffle mask is equivalent within each 256-bit
> lane.
> +///
> +/// This checks a shuffle mask to see if it is performing the same
> +/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially
> implies
> +/// that it is also not lane-crossing. It may however involve a blend
> from the
> +/// same lane of a second vector.
> +///
> +/// The specific repeated shuffle mask is populated in \p RepeatedMask,
> as it is
> +/// non-trivial to compute in the face of undef lanes. The representation
> is
> +/// *not* suitable for use with existing 256-bit shuffles as it will
> contain
> +/// entries from both V1 and V2 inputs to the wider mask.
> +static bool
> +is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
> +                                SmallVectorImpl<int> &RepeatedMask) {
> +  int LaneSize = 256 / VT.getScalarSizeInBits();
> +  RepeatedMask.resize(LaneSize, -1);
> +  int Size = Mask.size();
> +  for (int i = 0; i < Size; ++i) {
> +    if (Mask[i] < 0)
> +      continue;
> +    if ((Mask[i] % Size) / LaneSize != i / LaneSize)
> +      // This entry crosses lanes, so there is no way to model this
> shuffle.
> +      return false;
> +
> +    // Ok, handle the in-lane shuffles by detecting if and when they
> repeat.
> +    if (RepeatedMask[i % LaneSize] == -1)
> +      // This is the first non-undef entry in this slot of a 256-bit lane.
> +      RepeatedMask[i % LaneSize] =
> +          Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size;
> +    else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize !=
> Mask[i])
> +      // Found a mismatch with the repeated mask.
> +      return false;
> +  }
> +  return true;
> +}
> +
>  /// \brief Checks whether a shuffle mask is equivalent to an explicit
> list of
>  /// arguments.
>  ///
> @@ -10054,12 +10090,15 @@ static SDValue lower256BitVectorShuffle(
>  }
>
>  /// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
> -static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
> +static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
>                                         const X86Subtarget *Subtarget,
>                                         SelectionDAG &DAG) {
>    SDLoc DL(Op);
> -  assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
> -  assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
> +  MVT VT = Op.getSimpleValueType();
> +  assert((V1.getSimpleValueType() == MVT::v8f64 ||
> +          V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
> +  assert((V2.getSimpleValueType() == MVT::v8f64 ||
> +          V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
>    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
>    ArrayRef<int> Mask = SVOp->getMask();
>    assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
> @@ -10067,12 +10106,65 @@ static SDValue lowerV8F64VectorShuffle(S
>    // X86 has dedicated unpack instructions that can handle specific blend
>    // operations: UNPCKH and UNPCKL.
>    if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
> -    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
> +    return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
>    if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
> -    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
> +    return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
>
> -  // FIXME: Implement direct support for this type!
> -  return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
> +  // VSHUFPD instruction - mask 0/1, 8/9, 2/3, 10/11, 4/5, 12/13, 6/7,
> 14/15
> +  bool ShufpdMask = true;
> +  unsigned Immediate = 0;
> +  for (int i = 0; i < 8; ++i) {
> +    if (Mask[i] < 0)
> +      continue;
> +    int Val = (i & 6) + 8 * (i & 1);
> +    if (Mask[i] < Val ||  Mask[i] > Val+1) {
> +      ShufpdMask = false;
> +      break;
> +    }
> +    Immediate |= (Mask[i]%2) << i;
> +  }
> +  if (ShufpdMask)
>

Why are you only matching specific masks? Especially for things like SHUFP
which are actually fully generic shuffles within their lanes. We have loads
of really nice generic shuffle lowering logic for 128-bit and 256-bit
shuffles that should be generalized for 512-bit. Adding code to detect
single-instruction patterns only and lower just those will take us back to
exactly the bad shuffle lowering we had for the other vector types.

Please revert this patch and take the approach of finding optimal lowerings
for high-level *categories* of incoming shuffle patterns. Please especially
re-use the reasonably comprehensive SHUFP matching logic that both 128-bit
and 256-bit vector shuffle lowering uses.


> +      return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
> +                         DAG.getConstant(Immediate, DL, MVT::i8));
> +
> +  // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7
> +  if (isSingleInputShuffleMask(Mask)) {
> +    bool PermilMask = true;
> +    unsigned Immediate = 0;
> +    for (int i = 0; i < 8; ++i) {
> +      if (Mask[i] < 0)
> +        continue;
> +      int Val = (i & 6);
> +      if (Mask[i] < Val ||  Mask[i] > Val+1) {
> +        PermilMask = false;
> +        break;
> +      }
> +      Immediate |= (Mask[i]%2) << i;
> +    }
> +    if (PermilMask)
> +      return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1,
> +                         DAG.getConstant(Immediate, DL, MVT::i8));
> +
> +    SmallVector<int, 4> RepeatedMask;
> +    if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) {
> +      unsigned Immediate = 0;
> +      for (int i = 0; i < 4; ++i)
> +        if (RepeatedMask[i] > 0)
> +          Immediate |= (RepeatedMask[i] & 3) << (i*2);
> +      return DAG.getNode(X86ISD::VPERMI, DL, VT, V1,
> +                         DAG.getConstant(Immediate, DL, MVT::i8));
> +    }
> +  }
> +  SDValue VPermMask[8];
> +  for (int i = 0; i < 8; ++i)
> +    VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i64)
> +                               : DAG.getConstant(Mask[i], DL, MVT::i64);
> +  SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i64,
> +                                 VPermMask);
> +  if (isSingleInputShuffleMask(Mask))
> +    return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
> +
> +  return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2);
>  }
>
>  /// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
> @@ -10104,28 +10196,6 @@ static SDValue lowerV16F32VectorShuffle(
>    return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
>  }
>
> -/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
> -static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
> -                                       const X86Subtarget *Subtarget,
> -                                       SelectionDAG &DAG) {
> -  SDLoc DL(Op);
> -  assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
> -  assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
> -  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
> -  ArrayRef<int> Mask = SVOp->getMask();
> -  assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
> -
> -  // X86 has dedicated unpack instructions that can handle specific blend
> -  // operations: UNPCKH and UNPCKL.
> -  if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
> -    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
> -  if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
> -    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
> -
> -  // FIXME: Implement direct support for this type!
> -  return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
> -}
> -
>  /// \brief Handle lowering of 16-lane 32-bit integer shuffles.
>  static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue
> V2,
>                                         const X86Subtarget *Subtarget,
> @@ -10212,11 +10282,10 @@ static SDValue lower512BitVectorShuffle(
>    // the requisite ISA extensions for that element type are available.
>    switch (VT.SimpleTy) {
>    case MVT::v8f64:
> -    return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
> +  case MVT::v8i64:
> +    return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG);
>    case MVT::v16f32:
>      return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
> -  case MVT::v8i64:
> -    return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
>    case MVT::v16i32:
>      return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
>    case MVT::v32i16:
> @@ -20482,7 +20551,7 @@ static SmallVector<int, 4> getPSHUFShuff
>  #ifndef NDEBUG
>      for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes;
> ++i)
>        for (int j = 0; j < LaneElts; ++j)
> -        assert(Mask[j] == Mask[i * LaneElts + j] - LaneElts &&
> +        assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) &&
>                 "Mask doesn't repeat in high 128-bit lanes!");
>  #endif
>      Mask.resize(LaneElts);
>
> Added: llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll?rev=238735&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll Mon Jun  1 04:49:53 2015
> @@ -0,0 +1,98 @@
> +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
> +
> +; CHECK-LABEL: test1:
> +; CHECK: vpermps
> +; CHECK: ret
> +define <16 x float> @test1(<16 x float> %a) nounwind {
> +  %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32
> 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0,
> i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
> +  ret <16 x float> %c
> +}
> +
> +; CHECK-LABEL: test2:
> +; CHECK: vpermd
> +; CHECK: ret
> +define <16 x i32> @test2(<16 x i32> %a) nounwind {
> +  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2,
> i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32
> 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
> +  ret <16 x i32> %c
> +}
> +
> +; CHECK-LABEL: test3:
> +; CHECK: vpermq
> +; CHECK: ret
> +define <8 x i64> @test3(<8 x i64> %a) nounwind {
> +  %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32
> 5, i32 1, i32 undef, i32 7, i32 undef, i32 3, i32 1>
> +  ret <8 x i64> %c
> +}
> +
> +; CHECK-LABEL: test4:
> +; CHECK: vpermpd
> +; CHECK: ret
> +define <8 x double> @test4(<8 x double> %a) nounwind {
> +  %c = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32> <i32
> 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
> +  ret <8 x double> %c
> +}
> +
> +; CHECK-LABEL: test5:
> +; CHECK: vpermt2pd
> +; CHECK: ret
> +define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
> +  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2,
> i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
> +  ret <8 x double> %c
> +}
> +
> +; CHECK-LABEL: test6:
> +; CHECK: vpermq $30
> +; CHECK: ret
> +define <8 x i64> @test6(<8 x i64> %a) nounwind {
> +  %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32
> 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
> +  ret <8 x i64> %c
> +}
> +
> +; CHECK-LABEL: test7:
> +; CHECK: vpermt2q
> +; CHECK: ret
> +define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
> +  %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8,
> i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
> +  ret <8 x i64> %c
> +}
> +
> +; CHECK-LABEL: test14
> +; CHECK: vpermilpd $203, %zmm
> +; CHECK: ret
> +define <8 x double> @test14(<8 x double> %a) {
> + %b = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32><i32 1,
> i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 7>
> + ret <8 x double> %b
> +}
> +
> +; CHECK-LABEL: test17
> +; CHECK: vshufpd $19, %zmm1, %zmm0
> +; CHECK: ret
> +define <8 x double> @test17(<8 x double> %a, <8 x double> %b) nounwind {
> +  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1,
> i32 9, i32 2, i32 10, i32 5, i32 undef, i32 undef, i32 undef>
> +  ret <8 x double> %c
> +}
> +
> +; CHECK-LABEL: test20
> +; CHECK: vpunpckhqdq  %zmm
> +; CHECK: ret
> +define <8 x i64> @test20(<8 x i64> %a, <8 x i64> %c) {
> + %b = shufflevector <8 x i64> %a, <8 x i64> %c, <8 x i32><i32 1, i32 9,
> i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
> + ret <8 x i64> %b
> +}
> +
> +; CHECK-LABEL: test21
> +; CHECK: vbroadcastsd  %xmm0, %zmm
> +; CHECK: ret
> +define <8 x double> @test21(<8 x double> %a, <8 x double> %b) {
> +  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> +  ret <8 x double> %shuffle
> +}
> +
> +; CHECK-LABEL: test22
> +; CHECK: vpbroadcastq  %xmm0, %zmm
> +; CHECK: ret
> +define <8 x i64> @test22(<8 x i64> %a, <8 x i64> %b) {
> +  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> +  ret <8 x i64> %shuffle
> +}
> +
>
> Removed: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=238734&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (removed)
> @@ -1,1452 +0,0 @@
> -; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s
> --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
> -; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s
> --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
> -
> -target triple = "x86_64-unknown-unknown"
> -
> -define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00000000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00000010:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00000200:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00003000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00040000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00500000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,0]
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_06000000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,0,0]
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_70000000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[3,0,0,0]
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_01014545:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
> -; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00112233:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,1,1]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00001111:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_81a3c5e7:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_08080808:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
> -; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_08084c4c:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
> -; ALL-NEXT:    vinsertf128 $1, %xmm2, %ymm2, %ymm2
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
> -; ALL-NEXT:    vbroadcastsd %xmm3, %ymm3
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3]
> -; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
> -; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_8823cc67:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
> -; ALL-NEXT:    vbroadcastsd %xmm3, %ymm3
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3]
> -; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_9832dc76:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm1[0,1],ymm0[2,3]
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_9810dc54:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm2
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_08194c5d:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,0,2,1]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[0,1,1,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_2a3b6e7f:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,2,3]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[2,1,3,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_08192a3b:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm1[0,2,2,3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_08991abb:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm1[0,0,1,1]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm0[0],ymm2[1,2,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_091b2d3f:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_09ab1def:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm3 = ymm0[1,0,2,2]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00014445:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00204464:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_03004744:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_10005444:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_22006644:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_33307774:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_32107654:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00234467:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00224466:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_10325476:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_11335577:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_10235467:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,3]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_10225466:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,2]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00015444:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00204644:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_03004474:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_10004444:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_22006446:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,0,0,2]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_33307474:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,3,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_32104567:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00236744:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00226644:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_10324567:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_11334567:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_01235467:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_01235466:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_002u6u44:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_00uu66uu:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_103245uu:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_1133uu67:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_0uu354uu:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_uuu3uu66:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_c348cda0:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
> -; ALL-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm2[0,1]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
> -; ALL-NEXT:    vbroadcastsd %xmm1, %ymm4
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm4 = ymm3[0,1,2],ymm4[3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm4[0],ymm2[1,2],ymm4[3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2],ymm3[3]
> -; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double>
> %b) {
> -; ALL-LABEL: shuffle_v8f64_f511235a:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2,3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[2,3,1,3]
> -; ALL-NEXT:    vmovddup {{.*#+}} ymm4 = ymm1[0,0,2,2]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3]
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3]
> -; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
> -; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
> -; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
> -; ALL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>
> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
> -  ret <8 x double> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00000000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpbroadcastq %xmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00000010:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00000200:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00003000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00040000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
> -; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00500000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,0]
> -; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_06000000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,0,0]
> -; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_70000000:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[3,0,0,0]
> -; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7,
> i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_01014545:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
> -; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
> -; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00112233:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,1,1]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,3,3]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00001111:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_81a3c5e7:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
> -; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm2 =
> ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 =
> ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8,
> i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_08080808:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
> -; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 =
> ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_08084c4c:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
> -; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm2, %ymm2
> -; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
> -; ALL-NEXT:    vpbroadcastq %xmm3, %ymm3
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm2 =
> ymm2[0,1],ymm3[2,3],ymm2[4,5],ymm3[6,7]
> -; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
> -; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 =
> ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_8823cc67:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
> -; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
> -; ALL-NEXT:    vpbroadcastq %xmm3, %ymm3
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
> -; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8,
> i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_9832dc76:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7]
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9,
> i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_9810dc54:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm2
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9,
> i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_08194c5d:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
> -; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,0,2,1]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
> -; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[0,1,1,3]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm2 =
> ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 =
> ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_2a3b6e7f:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
> -; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
> -; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[2,1,3,3]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm2 =
> ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 =
> ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2,
> i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_08192a3b:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[0,2,2,3]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm2 =
> ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 =
> ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_08991abb:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[0,0,1,1]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm0[0,1],ymm2[2,3,4,5,6,7]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,3,3]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_091b2d3f:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
> -; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm2 =
> ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 =
> ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_09ab1def:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3,4,5,6,7]
> -; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00014445:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00204464:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_03004744:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_10005444:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1,
> i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_22006644:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2,
> i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_33307774:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3,
> i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_32107654:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3,
> i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00234467:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00224466:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_10325476:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1,
> i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_11335577:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1,
> i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_10235467:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,2,3]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,2,3]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1,
> i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_10225466:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,2,2]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,2,2]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1,
> i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00015444:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00204644:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_03004474:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,3,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_10004444:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1,
> i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_22006446:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,0,0,2]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2,
> i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_33307474:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,3,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3,
> i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_32104567:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3,
> i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00236744:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00226644:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_10324567:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1,
> i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_11334567:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1,
> i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_01235467:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,0,2,3]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_01235466:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
> -; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,0,2,2]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_002u6u44:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
> -; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
> -; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,0,0]
> -; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
> -; ALL-NEXT:    retq
> -  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0,
> i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
> -  ret <8 x i64> %shuffle
> -}
> -
> -define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
> -; ALL-LABEL: shuffle_v8i64_00uu66uu:
> -; ALL:       # BB#0:
> -; ALL-NEXT:    vpbroadc
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150601/205a4ca4/attachment.html>


More information about the llvm-commits mailing list