[llvm] r350155 - [PowerPC] Complete the custom legalization of vector int to fp conversion

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 29 05:40:48 PST 2018


Author: nemanjai
Date: Sat Dec 29 05:40:48 2018
New Revision: 350155

URL: http://llvm.org/viewvc/llvm-project?rev=350155&view=rev
Log:
[PowerPC] Complete the custom legalization of vector int to fp conversion

A recent patch has added custom legalization of vector conversions of
v2i16 -> v2f64. This just rounds it out for other types where the input vector
has an illegal (narrower) type than the result vector. Specifically, this will
handle the following conversions:

v2i8 -> v2f64
v4i8 -> v4f32
v4i16 -> v4f32

Differential revision: https://reviews.llvm.org/D54663

Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=350155&r1=350154&r2=350155&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Sat Dec 29 05:40:48 2018
@@ -788,8 +788,17 @@ PPCTargetLowering::PPCTargetLowering(con
       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
 
+      // Custom handling for partial vectors of integers converted to
+      // floating point. We already have optimal handling for v2i32 through
+      // the DAG combine, so those aren't necessary.
+      setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
 
       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
@@ -7288,43 +7297,49 @@ static SDValue widenVec(SelectionDAG &DA
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
 }
 
-SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op,
-                                                SelectionDAG &DAG,
+SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
                                                 const SDLoc &dl) const {
 
   unsigned Opc = Op.getOpcode();
   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
          "Unexpected conversion type");
-  assert(Op.getValueType() == MVT::v2f64 && "Supports v2f64 only.");
+  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
+         "Supports conversions to v2f64/v4f32 only.");
 
-  // CPU's prior to P9 don't have a way to sign-extend in vectors.
   bool SignedConv = Opc == ISD::SINT_TO_FP;
-  if (SignedConv && !Subtarget.hasP9Altivec())
-    return SDValue();
+  bool FourEltRes = Op.getValueType() == MVT::v4f32;
 
   SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
   EVT WideVT = Wide.getValueType();
   unsigned WideNumElts = WideVT.getVectorNumElements();
+  MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
 
   SmallVector<int, 16> ShuffV;
   for (unsigned i = 0; i < WideNumElts; ++i)
     ShuffV.push_back(i + WideNumElts);
 
-  if (Subtarget.isLittleEndian()) {
-    ShuffV[0] = 0;
-    ShuffV[WideNumElts / 2] = 1;
-  }
-  else {
-    ShuffV[WideNumElts / 2 - 1] = 0;
-    ShuffV[WideNumElts - 1] = 1;
-  }
+  int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
+  int SaveElts = FourEltRes ? 4 : 2;
+  if (Subtarget.isLittleEndian())
+    for (int i = 0; i < SaveElts; i++)
+      ShuffV[i * Stride] = i;
+  else
+    for (int i = 1; i <= SaveElts; i++)
+      ShuffV[i * Stride - 1] = i - 1;
 
-  SDValue ShuffleSrc2 = SignedConv ? DAG.getUNDEF(WideVT) :
-                                     DAG.getConstant(0, dl, WideVT);
+  SDValue ShuffleSrc2 =
+      SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
-  unsigned ExtendOp = SignedConv ? (unsigned) PPCISD::SExtVElems :
-                                   (unsigned) ISD::BITCAST;
-  SDValue Extend = DAG.getNode(ExtendOp, dl, MVT::v2i64, Arrange);
+  unsigned ExtendOp =
+      SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
+
+  SDValue Extend;
+  if (!Subtarget.hasP9Altivec() && SignedConv) {
+    Arrange = DAG.getBitcast(IntermediateVT, Arrange);
+    Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
+                         DAG.getValueType(Op.getOperand(0).getValueType()));
+  } else
+    Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
 
   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
 }
@@ -7333,8 +7348,10 @@ SDValue PPCTargetLowering::LowerINT_TO_F
                                           SelectionDAG &DAG) const {
   SDLoc dl(Op);
 
-  if (Op.getValueType() == MVT::v2f64 &&
-      Op.getOperand(0).getValueType() == MVT::v2i16)
+  EVT InVT = Op.getOperand(0).getValueType();
+  EVT OutVT = Op.getValueType();
+  if (OutVT.isVector() && OutVT.isFloatingPoint() &&
+      isOperationCustom(Op.getOpcode(), InVT))
     return LowerINT_TO_FPVector(Op, DAG, dl);
 
   // Conversions to f128 are legal.

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=350155&r1=350154&r2=350155&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Sat Dec 29 05:40:48 2018
@@ -1217,6 +1217,27 @@ def ScalarLoads {
   dag Li32 = (i32 (load xoaddr:$src));
 }
 
+def DWToSPExtractConv {
+  dag El0US1 = (f32 (PPCfcfidus
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+  dag El1US1 = (f32 (PPCfcfidus
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+  dag El0US2 = (f32 (PPCfcfidus
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+  dag El1US2 = (f32 (PPCfcfidus
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+  dag El0SS1 = (f32 (PPCfcfids
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+  dag El1SS1 = (f32 (PPCfcfids
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+  dag El0SS2 = (f32 (PPCfcfids
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+  dag El1SS2 = (f32 (PPCfcfids
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+  dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
+  dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
+}
+
 // The following VSX instructions were introduced in Power ISA 2.07
 /* FIXME: if the operands are v2i64, these patterns will not match.
    we should define new patterns or otherwise match the same patterns
@@ -1452,35 +1473,27 @@ let AddedComplexity = 400 in { // Prefer
   } // UseVSXReg = 1
 
   let Predicates = [IsLittleEndian] in {
-  def : Pat<(f32 (PPCfcfids
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
-            (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-  def : Pat<(f32 (PPCfcfids
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+  def : Pat<DWToSPExtractConv.El0SS1,
+            (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El1SS1,
             (f32 (XSCVSXDSP (COPY_TO_REGCLASS
-                              (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
-  def : Pat<(f32 (PPCfcfidus
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
-            (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-  def : Pat<(f32 (PPCfcfidus
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+                              (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El0US1,
+            (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El1US1,
             (f32 (XSCVUXDSP (COPY_TO_REGCLASS
-                              (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+                              (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
   }
 
   let Predicates = [IsBigEndian] in {
-  def : Pat<(f32 (PPCfcfids
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
-            (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
-  def : Pat<(f32 (PPCfcfids
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
-            (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-  def : Pat<(f32 (PPCfcfidus
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
-            (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
-  def : Pat<(f32 (PPCfcfidus
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
-            (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El0SS1,
+            (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El1SS1,
+            (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El0US1,
+            (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El1US1,
+            (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
   }
 
   // Instructions for converting float to i64 feeding a store.
@@ -3814,6 +3827,15 @@ let AddedComplexity = 400 in {
                                               (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
   }
 
+  let Predicates = [IsBigEndian, HasP8Vector] in {
+    def : Pat<DWToSPExtractConv.BVU,
+              (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
+                              (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
+    def : Pat<DWToSPExtractConv.BVS,
+              (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
+                              (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+  }
+
   // Big endian, available on all targets with VSX
   let Predicates = [IsBigEndian, HasVSX] in {
     def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3842,6 +3864,15 @@ let AddedComplexity = 400 in {
               (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
   }
 
+  let Predicates = [IsLittleEndian, HasP8Vector] in {
+    def : Pat<DWToSPExtractConv.BVU,
+              (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
+                              (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
+    def : Pat<DWToSPExtractConv.BVS,
+              (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
+                              (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+  }
+
   let Predicates = [IsLittleEndian, HasVSX] in {
   // Little endian, available on all targets with VSX
     def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll?rev=350155&r1=350154&r2=350155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll Sat Dec 29 05:40:48 2018
@@ -80,90 +80,37 @@ entry:
 define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
-; CHECK-P8-NEXT:    clrldi r4, r3, 48
-; CHECK-P8-NEXT:    rldicl r5, r3, 32, 48
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f0, r4
-; CHECK-P8-NEXT:    rldicl r4, r3, 48, 48
-; CHECK-P8-NEXT:    rldicl r3, r3, 16, 48
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P8-NEXT:    rlwinm r3, r3, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f1, r5
-; CHECK-P8-NEXT:    mtvsrwz f2, r4
-; CHECK-P8-NEXT:    mtvsrwz f3, r3
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
-; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xscvuxdsp f2, f2
-; CHECK-P8-NEXT:    xscvuxdsp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    addi r3, r4, .LCPI1_0 at toc@l
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    li r4, 4
-; CHECK-P9-NEXT:    li r5, 2
-; CHECK-P9-NEXT:    li r6, 6
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
-; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
-; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
-; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P9-NEXT:    mtvsrwz f0, r3
-; CHECK-P9-NEXT:    mtvsrwz f1, r4
-; CHECK-P9-NEXT:    mtvsrwz f2, r5
-; CHECK-P9-NEXT:    mtvsrwz f3, r6
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
-; CHECK-P9-NEXT:    xscvuxdsp f2, f2
-; CHECK-P9-NEXT:    xscvuxdsp f3, f3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    lxvx v2, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P9-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r4, 6
-; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    li r3, 2
-; CHECK-BE-NEXT:    li r5, 4
-; CHECK-BE-NEXT:    li r6, 0
-; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
-; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
-; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
-; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-BE-NEXT:    mtvsrwz f0, r4
-; CHECK-BE-NEXT:    mtvsrwz f1, r3
-; CHECK-BE-NEXT:    mtvsrwz f2, r5
-; CHECK-BE-NEXT:    mtvsrwz f3, r6
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
-; CHECK-BE-NEXT:    xscvuxdsp f2, f2
-; CHECK-BE-NEXT:    xscvuxdsp f3, f3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <4 x i16>
@@ -174,166 +121,54 @@ entry:
 define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mfvsrd r5, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    addi r4, r5, .LCPI2_1 at toc@l
+; CHECK-P8-NEXT:    lvx v5, 0, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    clrldi r6, r5, 48
-; CHECK-P8-NEXT:    rldicl r7, r5, 32, 48
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    mfvsrd r8, f0
-; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f1, r6
-; CHECK-P8-NEXT:    rldicl r6, r5, 48, 48
-; CHECK-P8-NEXT:    rldicl r5, r5, 16, 48
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f0, r7
-; CHECK-P8-NEXT:    mtvsrwz f2, r6
-; CHECK-P8-NEXT:    clrldi r6, r8, 48
-; CHECK-P8-NEXT:    mtvsrwz f3, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 32, 48
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f4, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 48, 48
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f5, r5
-; CHECK-P8-NEXT:    rlwinm r5, r6, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f6, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 16, 48
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 16, 31
-; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    mtvsrwz f7, r5
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
-; CHECK-P8-NEXT:    xscvuxdsp f2, f2
-; CHECK-P8-NEXT:    xscvuxdsp f4, f4
-; CHECK-P8-NEXT:    xscvuxdsp f5, f5
-; CHECK-P8-NEXT:    xscvuxdsp f6, f6
-; CHECK-P8-NEXT:    xscvuxdsp f7, f7
-; CHECK-P8-NEXT:    xscvuxdsp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs5, vs4
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xxmrghd vs4, vs7, vs6
-; CHECK-P8-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs4
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs2
-; CHECK-P8-NEXT:    vmrgew v3, v4, v3
-; CHECK-P8-NEXT:    vmrgew v2, v5, v2
+; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v5
+; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
+; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P8-NEXT:    stvx v3, 0, r3
 ; CHECK-P8-NEXT:    stvx v2, r3, r4
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r4, 8
-; CHECK-P9-NEXT:    li r5, 12
-; CHECK-P9-NEXT:    li r6, 10
-; CHECK-P9-NEXT:    li r7, 14
-; CHECK-P9-NEXT:    li r8, 0
-; CHECK-P9-NEXT:    li r9, 4
-; CHECK-P9-NEXT:    li r10, 2
-; CHECK-P9-NEXT:    li r11, 6
-; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
-; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
-; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
-; CHECK-P9-NEXT:    vextuhrx r7, r7, v2
-; CHECK-P9-NEXT:    vextuhrx r8, r8, v2
-; CHECK-P9-NEXT:    vextuhrx r9, r9, v2
-; CHECK-P9-NEXT:    vextuhrx r10, r10, v2
-; CHECK-P9-NEXT:    vextuhrx r11, r11, v2
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 16, 31
-; CHECK-P9-NEXT:    mtvsrwz f0, r4
-; CHECK-P9-NEXT:    mtvsrwz f1, r5
-; CHECK-P9-NEXT:    mtvsrwz f2, r6
-; CHECK-P9-NEXT:    mtvsrwz f3, r7
-; CHECK-P9-NEXT:    mtvsrwz f4, r8
-; CHECK-P9-NEXT:    mtvsrwz f5, r9
-; CHECK-P9-NEXT:    mtvsrwz f6, r10
-; CHECK-P9-NEXT:    mtvsrwz f7, r11
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
-; CHECK-P9-NEXT:    xscvuxdsp f2, f2
-; CHECK-P9-NEXT:    xscvuxdsp f3, f3
-; CHECK-P9-NEXT:    xscvuxdsp f4, f4
-; CHECK-P9-NEXT:    xscvuxdsp f5, f5
-; CHECK-P9-NEXT:    xscvuxdsp f6, f6
-; CHECK-P9-NEXT:    xscvuxdsp f7, f7
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    stxv v3, 0(r3)
-; CHECK-P9-NEXT:    stxv v2, 16(r3)
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
+; CHECK-P9-NEXT:    xxlxor v5, v5, v5
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    lxvx v4, 0, r5
+; CHECK-P9-NEXT:    vperm v3, v5, v2, v3
+; CHECK-P9-NEXT:    vperm v2, v5, v2, v4
+; CHECK-P9-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-P9-NEXT:    xvcvuxwsp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r4, 12
-; CHECK-BE-NEXT:    li r5, 8
-; CHECK-BE-NEXT:    li r6, 10
-; CHECK-BE-NEXT:    li r7, 14
-; CHECK-BE-NEXT:    li r8, 6
-; CHECK-BE-NEXT:    li r9, 2
-; CHECK-BE-NEXT:    li r10, 4
-; CHECK-BE-NEXT:    li r11, 0
-; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
-; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
-; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
-; CHECK-BE-NEXT:    vextuhlx r7, r7, v2
-; CHECK-BE-NEXT:    vextuhlx r8, r8, v2
-; CHECK-BE-NEXT:    vextuhlx r9, r9, v2
-; CHECK-BE-NEXT:    vextuhlx r10, r10, v2
-; CHECK-BE-NEXT:    vextuhlx r11, r11, v2
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 16, 31
-; CHECK-BE-NEXT:    mtvsrwz f0, r4
-; CHECK-BE-NEXT:    mtvsrwz f1, r5
-; CHECK-BE-NEXT:    mtvsrwz f2, r6
-; CHECK-BE-NEXT:    mtvsrwz f3, r7
-; CHECK-BE-NEXT:    mtvsrwz f4, r8
-; CHECK-BE-NEXT:    mtvsrwz f5, r9
-; CHECK-BE-NEXT:    mtvsrwz f6, r10
-; CHECK-BE-NEXT:    mtvsrwz f7, r11
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
-; CHECK-BE-NEXT:    xscvuxdsp f2, f2
-; CHECK-BE-NEXT:    xscvuxdsp f3, f3
-; CHECK-BE-NEXT:    xscvuxdsp f4, f4
-; CHECK-BE-NEXT:    xscvuxdsp f5, f5
-; CHECK-BE-NEXT:    xscvuxdsp f6, f6
-; CHECK-BE-NEXT:    xscvuxdsp f7, f7
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs2, vs3
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
-; CHECK-BE-NEXT:    vmrgew v2, v2, v3
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    stxv v3, 0(r3)
-; CHECK-BE-NEXT:    stxv v2, 16(r3)
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    lxvx v4, 0, r5
+; CHECK-BE-NEXT:    vperm v3, v2, v5, v3
+; CHECK-BE-NEXT:    vperm v2, v5, v2, v4
+; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-BE-NEXT:    xvcvuxwsp vs1, v2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = uitofp <8 x i16> %a to <8 x float>
@@ -344,341 +179,80 @@ entry:
 define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    xxlxor v3, v3, v3
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
+; CHECK-P8-NEXT:    addi r6, r6, .LCPI3_1 at toc@l
+; CHECK-P8-NEXT:    lvx v2, 0, r5
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    mfvsrd r7, v3
-; CHECK-P8-NEXT:    xxswapd vs8, v3
-; CHECK-P8-NEXT:    mfvsrd r6, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
-; CHECK-P8-NEXT:    clrldi r4, r6, 48
-; CHECK-P8-NEXT:    rldicl r8, r6, 32, 48
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P8-NEXT:    rlwinm r8, r8, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f0, r4
-; CHECK-P8-NEXT:    rldicl r4, r6, 48, 48
-; CHECK-P8-NEXT:    rldicl r6, r6, 16, 48
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f1, r8
-; CHECK-P8-NEXT:    clrldi r8, r7, 48
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f3, r4
-; CHECK-P8-NEXT:    rlwinm r4, r8, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f4, r6
-; CHECK-P8-NEXT:    rldicl r6, r7, 32, 48
-; CHECK-P8-NEXT:    mtvsrwz f5, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 48, 48
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    mfvsrd r8, f2
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f2, r6
-; CHECK-P8-NEXT:    rldicl r6, r7, 16, 48
-; CHECK-P8-NEXT:    mtvsrwz f6, r4
-; CHECK-P8-NEXT:    clrldi r4, r8, 48
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f7, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 32, 48
-; CHECK-P8-NEXT:    mtvsrwz f9, r4
-; CHECK-P8-NEXT:    rldicl r4, r8, 48, 48
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P8-NEXT:    mtvsrwz f10, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 16, 48
-; CHECK-P8-NEXT:    mtvsrwz f11, r4
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    mfvsrd r4, f8
-; CHECK-P8-NEXT:    mtvsrwz f8, r6
-; CHECK-P8-NEXT:    clrldi r6, r4, 48
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xscvuxdsp f3, f3
-; CHECK-P8-NEXT:    xscvuxdsp f4, f4
-; CHECK-P8-NEXT:    mtvsrwz f12, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 32, 48
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    xscvuxdsp f5, f5
-; CHECK-P8-NEXT:    mtvsrwz f13, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 48, 48
-; CHECK-P8-NEXT:    rldicl r4, r4, 16, 48
-; CHECK-P8-NEXT:    xscvuxdsp f2, f2
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    mtvsrwz v2, r6
-; CHECK-P8-NEXT:    mtvsrwz v3, r4
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    lvx v0, 0, r6
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lvx v5, r4, r5
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xscvuxdsp f6, f6
-; CHECK-P8-NEXT:    xscvuxdsp f7, f7
-; CHECK-P8-NEXT:    xscvuxdsp f9, f9
-; CHECK-P8-NEXT:    xscvuxdsp f10, f10
-; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
-; CHECK-P8-NEXT:    xscvuxdsp f11, f11
-; CHECK-P8-NEXT:    xscvuxdsp f8, f8
-; CHECK-P8-NEXT:    xscvuxdsp f12, f12
-; CHECK-P8-NEXT:    xscvuxdsp f13, f13
-; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
-; CHECK-P8-NEXT:    xscvuxdsp f1, v2
-; CHECK-P8-NEXT:    xscvuxdsp f4, v3
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xxmrghd vs0, vs10, vs9
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs3
-; CHECK-P8-NEXT:    xxmrghd vs3, vs8, vs11
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P8-NEXT:    xxmrghd vs2, vs13, vs12
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs5
-; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs4, vs1
-; CHECK-P8-NEXT:    xvcvdpsp v1, vs3
-; CHECK-P8-NEXT:    xvcvdpsp v6, vs2
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
-; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
-; CHECK-P8-NEXT:    vmrgew v3, v5, v4
-; CHECK-P8-NEXT:    vmrgew v4, v1, v0
-; CHECK-P8-NEXT:    stvx v2, r3, r4
-; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    vmrgew v5, v7, v6
+; CHECK-P8-NEXT:    vperm v1, v3, v4, v2
+; CHECK-P8-NEXT:    vperm v2, v3, v5, v2
+; CHECK-P8-NEXT:    vperm v5, v3, v5, v0
+; CHECK-P8-NEXT:    vperm v3, v3, v4, v0
+; CHECK-P8-NEXT:    xvcvuxwsp v4, v1
+; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
+; CHECK-P8-NEXT:    xvcvuxwsp v5, v5
+; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
+; CHECK-P8-NEXT:    stvx v4, 0, r3
+; CHECK-P8-NEXT:    stvx v2, r3, r6
+; CHECK-P8-NEXT:    stvx v5, r3, r4
 ; CHECK-P8-NEXT:    stvx v3, r3, r5
-; CHECK-P8-NEXT:    stvx v4, r3, r4
-; CHECK-P8-NEXT:    stvx v5, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v3, 0(r4)
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
 ; CHECK-P9-NEXT:    lxv v2, 16(r4)
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    li r5, 4
-; CHECK-P9-NEXT:    li r6, 2
-; CHECK-P9-NEXT:    li r7, 6
-; CHECK-P9-NEXT:    li r8, 8
-; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    li r9, 12
-; CHECK-P9-NEXT:    li r10, 10
-; CHECK-P9-NEXT:    li r11, 14
-; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    vextuhrx r12, r4, v3
-; CHECK-P9-NEXT:    vextuhrx r0, r5, v3
-; CHECK-P9-NEXT:    vextuhrx r30, r6, v3
-; CHECK-P9-NEXT:    vextuhrx r29, r7, v3
-; CHECK-P9-NEXT:    vextuhrx r28, r8, v3
-; CHECK-P9-NEXT:    vextuhrx r27, r9, v3
-; CHECK-P9-NEXT:    vextuhrx r26, r10, v3
-; CHECK-P9-NEXT:    vextuhrx r25, r11, v3
-; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
-; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
-; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
-; CHECK-P9-NEXT:    vextuhrx r7, r7, v2
-; CHECK-P9-NEXT:    vextuhrx r8, r8, v2
-; CHECK-P9-NEXT:    vextuhrx r9, r9, v2
-; CHECK-P9-NEXT:    vextuhrx r10, r10, v2
-; CHECK-P9-NEXT:    vextuhrx r11, r11, v2
-; CHECK-P9-NEXT:    rlwinm r12, r12, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r0, r0, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r30, r30, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r29, r29, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r28, r28, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r27, r27, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r26, r26, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r25, r25, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 16, 31
-; CHECK-P9-NEXT:    mtvsrwz f0, r12
-; CHECK-P9-NEXT:    mtvsrwz f1, r0
-; CHECK-P9-NEXT:    mtvsrwz f2, r30
-; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f3, r29
-; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f4, r28
-; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f5, r27
-; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f6, r26
-; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f7, r25
-; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f8, r4
-; CHECK-P9-NEXT:    mtvsrwz f9, r5
-; CHECK-P9-NEXT:    mtvsrwz f10, r6
-; CHECK-P9-NEXT:    mtvsrwz f11, r7
-; CHECK-P9-NEXT:    mtvsrwz f12, r8
-; CHECK-P9-NEXT:    mtvsrwz f13, r9
-; CHECK-P9-NEXT:    mtvsrwz v2, r10
-; CHECK-P9-NEXT:    mtvsrwz v3, r11
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
-; CHECK-P9-NEXT:    xscvuxdsp f2, f2
-; CHECK-P9-NEXT:    xscvuxdsp f3, f3
-; CHECK-P9-NEXT:    xscvuxdsp f4, f4
-; CHECK-P9-NEXT:    xscvuxdsp f5, f5
-; CHECK-P9-NEXT:    xscvuxdsp f6, f6
-; CHECK-P9-NEXT:    xscvuxdsp f7, f7
-; CHECK-P9-NEXT:    xscvuxdsp f8, f8
-; CHECK-P9-NEXT:    xscvuxdsp f9, f9
-; CHECK-P9-NEXT:    xscvuxdsp f10, f10
-; CHECK-P9-NEXT:    xscvuxdsp f11, f11
-; CHECK-P9-NEXT:    xscvuxdsp f12, f12
-; CHECK-P9-NEXT:    xscvuxdsp f13, f13
-; CHECK-P9-NEXT:    xscvuxdsp f31, v2
-; CHECK-P9-NEXT:    xscvuxdsp f30, v3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
-; CHECK-P9-NEXT:    xvcvdpsp v0, vs4
-; CHECK-P9-NEXT:    xvcvdpsp v1, vs5
-; CHECK-P9-NEXT:    xvcvdpsp v6, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v7, vs7
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    vmrgew v4, v1, v0
-; CHECK-P9-NEXT:    vmrgew v5, v7, v6
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
-; CHECK-P9-NEXT:    stxv v2, 0(r3)
-; CHECK-P9-NEXT:    stxv v5, 48(r3)
-; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    lxv v3, 0(r4)
+; CHECK-P9-NEXT:    xxlxor v0, v0, v0
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI3_1 at toc@l
+; CHECK-P9-NEXT:    lxvx v4, 0, r5
+; CHECK-P9-NEXT:    lxvx v5, 0, r6
+; CHECK-P9-NEXT:    vperm v1, v0, v3, v4
+; CHECK-P9-NEXT:    vperm v3, v0, v3, v5
+; CHECK-P9-NEXT:    vperm v4, v0, v2, v4
+; CHECK-P9-NEXT:    vperm v2, v0, v2, v5
+; CHECK-P9-NEXT:    xvcvuxwsp vs0, v1
+; CHECK-P9-NEXT:    xvcvuxwsp vs1, v3
+; CHECK-P9-NEXT:    xvcvuxwsp vs2, v4
+; CHECK-P9-NEXT:    xvcvuxwsp vs3, v2
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
 ; CHECK-BE-NEXT:    lxv v2, 16(r4)
-; CHECK-BE-NEXT:    li r4, 6
-; CHECK-BE-NEXT:    li r5, 2
-; CHECK-BE-NEXT:    li r6, 4
-; CHECK-BE-NEXT:    li r7, 0
-; CHECK-BE-NEXT:    li r8, 14
-; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    li r9, 10
-; CHECK-BE-NEXT:    li r10, 12
-; CHECK-BE-NEXT:    li r11, 8
-; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    vextuhlx r12, r4, v3
-; CHECK-BE-NEXT:    vextuhlx r0, r5, v3
-; CHECK-BE-NEXT:    vextuhlx r30, r6, v3
-; CHECK-BE-NEXT:    vextuhlx r29, r7, v3
-; CHECK-BE-NEXT:    vextuhlx r28, r8, v3
-; CHECK-BE-NEXT:    vextuhlx r27, r9, v3
-; CHECK-BE-NEXT:    vextuhlx r26, r10, v3
-; CHECK-BE-NEXT:    vextuhlx r25, r11, v3
-; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
-; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
-; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
-; CHECK-BE-NEXT:    vextuhlx r7, r7, v2
-; CHECK-BE-NEXT:    vextuhlx r8, r8, v2
-; CHECK-BE-NEXT:    vextuhlx r9, r9, v2
-; CHECK-BE-NEXT:    vextuhlx r10, r10, v2
-; CHECK-BE-NEXT:    vextuhlx r11, r11, v2
-; CHECK-BE-NEXT:    rlwinm r12, r12, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r0, r0, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r30, r30, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r29, r29, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r28, r28, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r27, r27, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r26, r26, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r25, r25, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 16, 31
-; CHECK-BE-NEXT:    mtvsrwz f0, r12
-; CHECK-BE-NEXT:    mtvsrwz f1, r0
-; CHECK-BE-NEXT:    mtvsrwz f2, r30
-; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f3, r29
-; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f4, r28
-; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f5, r27
-; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f6, r26
-; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f7, r25
-; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f8, r4
-; CHECK-BE-NEXT:    mtvsrwz f9, r5
-; CHECK-BE-NEXT:    mtvsrwz f10, r6
-; CHECK-BE-NEXT:    mtvsrwz f11, r7
-; CHECK-BE-NEXT:    mtvsrwz f12, r8
-; CHECK-BE-NEXT:    mtvsrwz f13, r9
-; CHECK-BE-NEXT:    mtvsrwz v2, r10
-; CHECK-BE-NEXT:    mtvsrwz v3, r11
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
-; CHECK-BE-NEXT:    xscvuxdsp f2, f2
-; CHECK-BE-NEXT:    xscvuxdsp f3, f3
-; CHECK-BE-NEXT:    xscvuxdsp f4, f4
-; CHECK-BE-NEXT:    xscvuxdsp f5, f5
-; CHECK-BE-NEXT:    xscvuxdsp f6, f6
-; CHECK-BE-NEXT:    xscvuxdsp f7, f7
-; CHECK-BE-NEXT:    xscvuxdsp f8, f8
-; CHECK-BE-NEXT:    xscvuxdsp f9, f9
-; CHECK-BE-NEXT:    xscvuxdsp f10, f10
-; CHECK-BE-NEXT:    xscvuxdsp f11, f11
-; CHECK-BE-NEXT:    xscvuxdsp f12, f12
-; CHECK-BE-NEXT:    xscvuxdsp f13, f13
-; CHECK-BE-NEXT:    xscvuxdsp f31, v2
-; CHECK-BE-NEXT:    xscvuxdsp f30, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
-; CHECK-BE-NEXT:    xvcvdpsp v0, vs4
-; CHECK-BE-NEXT:    xvcvdpsp v1, vs5
-; CHECK-BE-NEXT:    xvcvdpsp v6, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
-; CHECK-BE-NEXT:    vmrgew v2, v3, v2
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    vmrgew v4, v1, v0
-; CHECK-BE-NEXT:    vmrgew v5, v7, v6
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v5, 48(r3)
-; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    xxlxor v0, v0, v0
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI3_1 at toc@l
+; CHECK-BE-NEXT:    lxvx v4, 0, r5
+; CHECK-BE-NEXT:    lxvx v5, 0, r6
+; CHECK-BE-NEXT:    vperm v1, v3, v0, v4
+; CHECK-BE-NEXT:    vperm v3, v0, v3, v5
+; CHECK-BE-NEXT:    vperm v4, v2, v0, v4
+; CHECK-BE-NEXT:    vperm v2, v0, v2, v5
+; CHECK-BE-NEXT:    xvcvuxwsp vs0, v1
+; CHECK-BE-NEXT:    xvcvuxwsp vs1, v3
+; CHECK-BE-NEXT:    xvcvuxwsp vs2, v4
+; CHECK-BE-NEXT:    xvcvuxwsp vs3, v2
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i16>, <16 x i16>* %0, align 32
@@ -759,89 +333,30 @@ define <4 x float> @test4elt_signed(i64
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
-; CHECK-P8-NEXT:    clrldi r4, r3, 48
-; CHECK-P8-NEXT:    rldicl r5, r3, 32, 48
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    extsh r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f0, r4
-; CHECK-P8-NEXT:    rldicl r4, r3, 48, 48
-; CHECK-P8-NEXT:    rldicl r3, r3, 16, 48
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    extsh r3, r3
-; CHECK-P8-NEXT:    mtvsrwa f1, r5
-; CHECK-P8-NEXT:    mtvsrwa f2, r4
-; CHECK-P8-NEXT:    mtvsrwa f3, r3
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
-; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xscvsxdsp f2, f2
-; CHECK-P8-NEXT:    xscvsxdsp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    vspltisw v3, 8
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vadduwm v3, v3, v3
+; CHECK-P8-NEXT:    vmrglh v2, v2, v2
+; CHECK-P8-NEXT:    vslw v2, v2, v3
+; CHECK-P8-NEXT:    vsraw v2, v2, v3
+; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    li r4, 4
-; CHECK-P9-NEXT:    li r5, 2
-; CHECK-P9-NEXT:    li r6, 6
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
-; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
-; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
-; CHECK-P9-NEXT:    extsh r3, r3
-; CHECK-P9-NEXT:    extsh r4, r4
-; CHECK-P9-NEXT:    extsh r5, r5
-; CHECK-P9-NEXT:    extsh r6, r6
-; CHECK-P9-NEXT:    mtvsrwa f0, r3
-; CHECK-P9-NEXT:    mtvsrwa f1, r4
-; CHECK-P9-NEXT:    mtvsrwa f2, r5
-; CHECK-P9-NEXT:    mtvsrwa f3, r6
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
-; CHECK-P9-NEXT:    xscvsxdsp f2, f2
-; CHECK-P9-NEXT:    xscvsxdsp f3, f3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v2, v2, v2
+; CHECK-P9-NEXT:    vextsh2w v2, v2
+; CHECK-P9-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r4, 6
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    li r3, 2
-; CHECK-BE-NEXT:    li r5, 4
-; CHECK-BE-NEXT:    li r6, 0
-; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
-; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
-; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
-; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
-; CHECK-BE-NEXT:    extsh r4, r4
-; CHECK-BE-NEXT:    extsh r3, r3
-; CHECK-BE-NEXT:    extsh r5, r5
-; CHECK-BE-NEXT:    extsh r6, r6
-; CHECK-BE-NEXT:    mtvsrwa f0, r4
-; CHECK-BE-NEXT:    mtvsrwa f1, r3
-; CHECK-BE-NEXT:    mtvsrwa f2, r5
-; CHECK-BE-NEXT:    mtvsrwa f3, r6
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
-; CHECK-BE-NEXT:    xscvsxdsp f2, f2
-; CHECK-BE-NEXT:    xscvsxdsp f3, f3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v2
+; CHECK-BE-NEXT:    vextsh2w v2, v2
+; CHECK-BE-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <4 x i16>
@@ -852,166 +367,47 @@ entry:
 define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mfvsrd r5, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    vmrglh v4, v2, v2
+; CHECK-P8-NEXT:    vspltisw v3, 8
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    clrldi r6, r5, 48
-; CHECK-P8-NEXT:    rldicl r7, r5, 32, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    mfvsrd r8, f0
-; CHECK-P8-NEXT:    extsh r7, r7
-; CHECK-P8-NEXT:    mtvsrwa f1, r6
-; CHECK-P8-NEXT:    rldicl r6, r5, 48, 48
-; CHECK-P8-NEXT:    rldicl r5, r5, 16, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    extsh r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f0, r7
-; CHECK-P8-NEXT:    mtvsrwa f2, r6
-; CHECK-P8-NEXT:    clrldi r6, r8, 48
-; CHECK-P8-NEXT:    mtvsrwa f3, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 32, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f4, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 48, 48
-; CHECK-P8-NEXT:    extsh r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f5, r5
-; CHECK-P8-NEXT:    extsh r5, r6
-; CHECK-P8-NEXT:    mtvsrwa f6, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 16, 48
-; CHECK-P8-NEXT:    extsh r5, r5
-; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    mtvsrwa f7, r5
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
-; CHECK-P8-NEXT:    xscvsxdsp f2, f2
-; CHECK-P8-NEXT:    xscvsxdsp f4, f4
-; CHECK-P8-NEXT:    xscvsxdsp f5, f5
-; CHECK-P8-NEXT:    xscvsxdsp f6, f6
-; CHECK-P8-NEXT:    xscvsxdsp f7, f7
-; CHECK-P8-NEXT:    xscvsxdsp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs5, vs4
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xxmrghd vs4, vs7, vs6
-; CHECK-P8-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs4
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs2
-; CHECK-P8-NEXT:    vmrgew v3, v4, v3
-; CHECK-P8-NEXT:    vmrgew v2, v5, v2
+; CHECK-P8-NEXT:    vmrghh v2, v2, v2
+; CHECK-P8-NEXT:    vadduwm v3, v3, v3
+; CHECK-P8-NEXT:    vslw v4, v4, v3
+; CHECK-P8-NEXT:    vslw v2, v2, v3
+; CHECK-P8-NEXT:    vsraw v4, v4, v3
+; CHECK-P8-NEXT:    vsraw v2, v2, v3
+; CHECK-P8-NEXT:    xvcvsxwsp v3, v4
+; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-P8-NEXT:    stvx v3, 0, r3
 ; CHECK-P8-NEXT:    stvx v2, r3, r4
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r4, 8
-; CHECK-P9-NEXT:    li r5, 12
-; CHECK-P9-NEXT:    li r6, 10
-; CHECK-P9-NEXT:    li r7, 14
-; CHECK-P9-NEXT:    li r8, 0
-; CHECK-P9-NEXT:    li r9, 4
-; CHECK-P9-NEXT:    li r10, 2
-; CHECK-P9-NEXT:    li r11, 6
-; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
-; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
-; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
-; CHECK-P9-NEXT:    vextuhrx r7, r7, v2
-; CHECK-P9-NEXT:    vextuhrx r8, r8, v2
-; CHECK-P9-NEXT:    vextuhrx r9, r9, v2
-; CHECK-P9-NEXT:    vextuhrx r10, r10, v2
-; CHECK-P9-NEXT:    vextuhrx r11, r11, v2
-; CHECK-P9-NEXT:    extsh r4, r4
-; CHECK-P9-NEXT:    extsh r5, r5
-; CHECK-P9-NEXT:    extsh r6, r6
-; CHECK-P9-NEXT:    extsh r7, r7
-; CHECK-P9-NEXT:    extsh r8, r8
-; CHECK-P9-NEXT:    extsh r9, r9
-; CHECK-P9-NEXT:    extsh r10, r10
-; CHECK-P9-NEXT:    extsh r11, r11
-; CHECK-P9-NEXT:    mtvsrwa f0, r4
-; CHECK-P9-NEXT:    mtvsrwa f1, r5
-; CHECK-P9-NEXT:    mtvsrwa f2, r6
-; CHECK-P9-NEXT:    mtvsrwa f3, r7
-; CHECK-P9-NEXT:    mtvsrwa f4, r8
-; CHECK-P9-NEXT:    mtvsrwa f5, r9
-; CHECK-P9-NEXT:    mtvsrwa f6, r10
-; CHECK-P9-NEXT:    mtvsrwa f7, r11
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
-; CHECK-P9-NEXT:    xscvsxdsp f2, f2
-; CHECK-P9-NEXT:    xscvsxdsp f3, f3
-; CHECK-P9-NEXT:    xscvsxdsp f4, f4
-; CHECK-P9-NEXT:    xscvsxdsp f5, f5
-; CHECK-P9-NEXT:    xscvsxdsp f6, f6
-; CHECK-P9-NEXT:    xscvsxdsp f7, f7
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    stxv v3, 0(r3)
-; CHECK-P9-NEXT:    stxv v2, 16(r3)
+; CHECK-P9-NEXT:    vmrglh v3, v2, v2
+; CHECK-P9-NEXT:    vmrghh v2, v2, v2
+; CHECK-P9-NEXT:    vextsh2w v3, v3
+; CHECK-P9-NEXT:    vextsh2w v2, v2
+; CHECK-P9-NEXT:    xvcvsxwsp vs0, v3
+; CHECK-P9-NEXT:    xvcvsxwsp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r4, 12
-; CHECK-BE-NEXT:    li r5, 8
-; CHECK-BE-NEXT:    li r6, 10
-; CHECK-BE-NEXT:    li r7, 14
-; CHECK-BE-NEXT:    li r8, 6
-; CHECK-BE-NEXT:    li r9, 2
-; CHECK-BE-NEXT:    li r10, 4
-; CHECK-BE-NEXT:    li r11, 0
-; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
-; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
-; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
-; CHECK-BE-NEXT:    vextuhlx r7, r7, v2
-; CHECK-BE-NEXT:    vextuhlx r8, r8, v2
-; CHECK-BE-NEXT:    vextuhlx r9, r9, v2
-; CHECK-BE-NEXT:    vextuhlx r10, r10, v2
-; CHECK-BE-NEXT:    vextuhlx r11, r11, v2
-; CHECK-BE-NEXT:    extsh r4, r4
-; CHECK-BE-NEXT:    extsh r5, r5
-; CHECK-BE-NEXT:    extsh r6, r6
-; CHECK-BE-NEXT:    extsh r7, r7
-; CHECK-BE-NEXT:    extsh r8, r8
-; CHECK-BE-NEXT:    extsh r9, r9
-; CHECK-BE-NEXT:    extsh r10, r10
-; CHECK-BE-NEXT:    extsh r11, r11
-; CHECK-BE-NEXT:    mtvsrwa f0, r4
-; CHECK-BE-NEXT:    mtvsrwa f1, r5
-; CHECK-BE-NEXT:    mtvsrwa f2, r6
-; CHECK-BE-NEXT:    mtvsrwa f3, r7
-; CHECK-BE-NEXT:    mtvsrwa f4, r8
-; CHECK-BE-NEXT:    mtvsrwa f5, r9
-; CHECK-BE-NEXT:    mtvsrwa f6, r10
-; CHECK-BE-NEXT:    mtvsrwa f7, r11
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
-; CHECK-BE-NEXT:    xscvsxdsp f2, f2
-; CHECK-BE-NEXT:    xscvsxdsp f3, f3
-; CHECK-BE-NEXT:    xscvsxdsp f4, f4
-; CHECK-BE-NEXT:    xscvsxdsp f5, f5
-; CHECK-BE-NEXT:    xscvsxdsp f6, f6
-; CHECK-BE-NEXT:    xscvsxdsp f7, f7
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs2, vs3
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
-; CHECK-BE-NEXT:    vmrgew v2, v2, v3
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    stxv v3, 0(r3)
-; CHECK-BE-NEXT:    stxv v2, 16(r3)
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    vmrghh v2, v2, v2
+; CHECK-BE-NEXT:    vextsh2w v3, v3
+; CHECK-BE-NEXT:    vextsh2w v2, v2
+; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
+; CHECK-BE-NEXT:    xvcvsxwsp vs1, v2
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = sitofp <8 x i16> %a to <8 x float>
@@ -1023,340 +419,80 @@ define void @test16elt_signed(<16 x floa
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    mfvsrd r7, v3
-; CHECK-P8-NEXT:    xxswapd vs8, v3
-; CHECK-P8-NEXT:    mfvsrd r6, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
-; CHECK-P8-NEXT:    clrldi r4, r6, 48
-; CHECK-P8-NEXT:    rldicl r8, r6, 32, 48
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    extsh r8, r8
-; CHECK-P8-NEXT:    mtvsrwa f0, r4
-; CHECK-P8-NEXT:    rldicl r4, r6, 48, 48
-; CHECK-P8-NEXT:    rldicl r6, r6, 16, 48
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f1, r8
-; CHECK-P8-NEXT:    clrldi r8, r7, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f3, r4
-; CHECK-P8-NEXT:    extsh r4, r8
-; CHECK-P8-NEXT:    mtvsrwa f4, r6
-; CHECK-P8-NEXT:    rldicl r6, r7, 32, 48
-; CHECK-P8-NEXT:    mtvsrwa f5, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 48, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    mfvsrd r8, f2
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f2, r6
-; CHECK-P8-NEXT:    rldicl r6, r7, 16, 48
-; CHECK-P8-NEXT:    mtvsrwa f6, r4
-; CHECK-P8-NEXT:    clrldi r4, r8, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f7, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 32, 48
-; CHECK-P8-NEXT:    mtvsrwa f9, r4
-; CHECK-P8-NEXT:    rldicl r4, r8, 48, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f10, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 16, 48
-; CHECK-P8-NEXT:    mtvsrwa f11, r4
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    mfvsrd r4, f8
-; CHECK-P8-NEXT:    mtvsrwa f8, r6
-; CHECK-P8-NEXT:    clrldi r6, r4, 48
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xscvsxdsp f3, f3
-; CHECK-P8-NEXT:    xscvsxdsp f4, f4
-; CHECK-P8-NEXT:    mtvsrwa f12, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 32, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    xscvsxdsp f5, f5
-; CHECK-P8-NEXT:    mtvsrwa f13, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 48, 48
-; CHECK-P8-NEXT:    rldicl r4, r4, 16, 48
-; CHECK-P8-NEXT:    xscvsxdsp f2, f2
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    mtvsrwa v2, r6
-; CHECK-P8-NEXT:    mtvsrwa v3, r4
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    vspltisw v5, 8
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lvx v3, r4, r5
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xscvsxdsp f6, f6
-; CHECK-P8-NEXT:    xscvsxdsp f7, f7
-; CHECK-P8-NEXT:    xscvsxdsp f9, f9
-; CHECK-P8-NEXT:    xscvsxdsp f10, f10
-; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
-; CHECK-P8-NEXT:    xscvsxdsp f11, f11
-; CHECK-P8-NEXT:    xscvsxdsp f8, f8
-; CHECK-P8-NEXT:    xscvsxdsp f12, f12
-; CHECK-P8-NEXT:    xscvsxdsp f13, f13
-; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
-; CHECK-P8-NEXT:    xscvsxdsp f1, v2
-; CHECK-P8-NEXT:    xscvsxdsp f4, v3
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xxmrghd vs0, vs10, vs9
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs3
-; CHECK-P8-NEXT:    xxmrghd vs3, vs8, vs11
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P8-NEXT:    xxmrghd vs2, vs13, vs12
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs5
-; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs4, vs1
-; CHECK-P8-NEXT:    xvcvdpsp v1, vs3
-; CHECK-P8-NEXT:    xvcvdpsp v6, vs2
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
-; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
-; CHECK-P8-NEXT:    vmrgew v3, v5, v4
-; CHECK-P8-NEXT:    vmrgew v4, v1, v0
-; CHECK-P8-NEXT:    stvx v2, r3, r4
-; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    vmrgew v5, v7, v6
-; CHECK-P8-NEXT:    stvx v3, r3, r5
-; CHECK-P8-NEXT:    stvx v4, r3, r4
-; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    vmrglh v4, v2, v2
+; CHECK-P8-NEXT:    vmrglh v0, v3, v3
+; CHECK-P8-NEXT:    vmrghh v3, v3, v3
+; CHECK-P8-NEXT:    vmrghh v2, v2, v2
+; CHECK-P8-NEXT:    vadduwm v5, v5, v5
+; CHECK-P8-NEXT:    vslw v4, v4, v5
+; CHECK-P8-NEXT:    vslw v0, v0, v5
+; CHECK-P8-NEXT:    vslw v3, v3, v5
+; CHECK-P8-NEXT:    vslw v2, v2, v5
+; CHECK-P8-NEXT:    vsraw v4, v4, v5
+; CHECK-P8-NEXT:    vsraw v0, v0, v5
+; CHECK-P8-NEXT:    vsraw v3, v3, v5
+; CHECK-P8-NEXT:    vsraw v2, v2, v5
+; CHECK-P8-NEXT:    xvcvsxwsp v4, v4
+; CHECK-P8-NEXT:    xvcvsxwsp v5, v0
+; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
+; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
+; CHECK-P8-NEXT:    stvx v4, 0, r3
+; CHECK-P8-NEXT:    stvx v5, r3, r6
+; CHECK-P8-NEXT:    stvx v3, r3, r4
+; CHECK-P8-NEXT:    stvx v2, r3, r5
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v3, 0(r4)
 ; CHECK-P9-NEXT:    lxv v2, 16(r4)
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    li r5, 4
-; CHECK-P9-NEXT:    li r6, 2
-; CHECK-P9-NEXT:    li r7, 6
-; CHECK-P9-NEXT:    li r8, 8
-; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    li r9, 12
-; CHECK-P9-NEXT:    li r10, 10
-; CHECK-P9-NEXT:    li r11, 14
-; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    vextuhrx r12, r4, v3
-; CHECK-P9-NEXT:    vextuhrx r0, r5, v3
-; CHECK-P9-NEXT:    vextuhrx r30, r6, v3
-; CHECK-P9-NEXT:    vextuhrx r29, r7, v3
-; CHECK-P9-NEXT:    vextuhrx r28, r8, v3
-; CHECK-P9-NEXT:    vextuhrx r27, r9, v3
-; CHECK-P9-NEXT:    vextuhrx r26, r10, v3
-; CHECK-P9-NEXT:    vextuhrx r25, r11, v3
-; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
-; CHECK-P9-NEXT:    vextuhrx r5, r5, v2
-; CHECK-P9-NEXT:    vextuhrx r6, r6, v2
-; CHECK-P9-NEXT:    vextuhrx r7, r7, v2
-; CHECK-P9-NEXT:    vextuhrx r8, r8, v2
-; CHECK-P9-NEXT:    vextuhrx r9, r9, v2
-; CHECK-P9-NEXT:    vextuhrx r10, r10, v2
-; CHECK-P9-NEXT:    vextuhrx r11, r11, v2
-; CHECK-P9-NEXT:    extsh r12, r12
-; CHECK-P9-NEXT:    extsh r0, r0
-; CHECK-P9-NEXT:    extsh r30, r30
-; CHECK-P9-NEXT:    extsh r29, r29
-; CHECK-P9-NEXT:    extsh r28, r28
-; CHECK-P9-NEXT:    extsh r27, r27
-; CHECK-P9-NEXT:    extsh r26, r26
-; CHECK-P9-NEXT:    extsh r25, r25
-; CHECK-P9-NEXT:    extsh r4, r4
-; CHECK-P9-NEXT:    extsh r5, r5
-; CHECK-P9-NEXT:    extsh r6, r6
-; CHECK-P9-NEXT:    extsh r7, r7
-; CHECK-P9-NEXT:    extsh r8, r8
-; CHECK-P9-NEXT:    extsh r9, r9
-; CHECK-P9-NEXT:    extsh r10, r10
-; CHECK-P9-NEXT:    extsh r11, r11
-; CHECK-P9-NEXT:    mtvsrwa f0, r12
-; CHECK-P9-NEXT:    mtvsrwa f1, r0
-; CHECK-P9-NEXT:    mtvsrwa f2, r30
-; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f3, r29
-; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f4, r28
-; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f5, r27
-; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f6, r26
-; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f7, r25
-; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f8, r4
-; CHECK-P9-NEXT:    mtvsrwa f9, r5
-; CHECK-P9-NEXT:    mtvsrwa f10, r6
-; CHECK-P9-NEXT:    mtvsrwa f11, r7
-; CHECK-P9-NEXT:    mtvsrwa f12, r8
-; CHECK-P9-NEXT:    mtvsrwa f13, r9
-; CHECK-P9-NEXT:    mtvsrwa v2, r10
-; CHECK-P9-NEXT:    mtvsrwa v3, r11
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
-; CHECK-P9-NEXT:    xscvsxdsp f2, f2
-; CHECK-P9-NEXT:    xscvsxdsp f3, f3
-; CHECK-P9-NEXT:    xscvsxdsp f4, f4
-; CHECK-P9-NEXT:    xscvsxdsp f5, f5
-; CHECK-P9-NEXT:    xscvsxdsp f6, f6
-; CHECK-P9-NEXT:    xscvsxdsp f7, f7
-; CHECK-P9-NEXT:    xscvsxdsp f8, f8
-; CHECK-P9-NEXT:    xscvsxdsp f9, f9
-; CHECK-P9-NEXT:    xscvsxdsp f10, f10
-; CHECK-P9-NEXT:    xscvsxdsp f11, f11
-; CHECK-P9-NEXT:    xscvsxdsp f12, f12
-; CHECK-P9-NEXT:    xscvsxdsp f13, f13
-; CHECK-P9-NEXT:    xscvsxdsp f31, v2
-; CHECK-P9-NEXT:    xscvsxdsp f30, v3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
-; CHECK-P9-NEXT:    xvcvdpsp v0, vs4
-; CHECK-P9-NEXT:    xvcvdpsp v1, vs5
-; CHECK-P9-NEXT:    xvcvdpsp v6, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v7, vs7
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    vmrgew v4, v1, v0
-; CHECK-P9-NEXT:    vmrgew v5, v7, v6
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
-; CHECK-P9-NEXT:    stxv v2, 0(r3)
-; CHECK-P9-NEXT:    stxv v5, 48(r3)
-; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    lxv v3, 0(r4)
+; CHECK-P9-NEXT:    vmrglh v4, v3, v3
+; CHECK-P9-NEXT:    vmrghh v3, v3, v3
+; CHECK-P9-NEXT:    vmrglh v5, v2, v2
+; CHECK-P9-NEXT:    vmrghh v2, v2, v2
+; CHECK-P9-NEXT:    vextsh2w v4, v4
+; CHECK-P9-NEXT:    vextsh2w v3, v3
+; CHECK-P9-NEXT:    vextsh2w v5, v5
+; CHECK-P9-NEXT:    vextsh2w v2, v2
+; CHECK-P9-NEXT:    xvcvsxwsp vs0, v4
+; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
+; CHECK-P9-NEXT:    xvcvsxwsp vs2, v5
+; CHECK-P9-NEXT:    xvcvsxwsp vs3, v2
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
 ; CHECK-BE-NEXT:    lxv v2, 16(r4)
-; CHECK-BE-NEXT:    li r4, 6
-; CHECK-BE-NEXT:    li r5, 2
-; CHECK-BE-NEXT:    li r6, 4
-; CHECK-BE-NEXT:    li r7, 0
-; CHECK-BE-NEXT:    li r8, 14
-; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    li r9, 10
-; CHECK-BE-NEXT:    li r10, 12
-; CHECK-BE-NEXT:    li r11, 8
-; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    vextuhlx r12, r4, v3
-; CHECK-BE-NEXT:    vextuhlx r0, r5, v3
-; CHECK-BE-NEXT:    vextuhlx r30, r6, v3
-; CHECK-BE-NEXT:    vextuhlx r29, r7, v3
-; CHECK-BE-NEXT:    vextuhlx r28, r8, v3
-; CHECK-BE-NEXT:    vextuhlx r27, r9, v3
-; CHECK-BE-NEXT:    vextuhlx r26, r10, v3
-; CHECK-BE-NEXT:    vextuhlx r25, r11, v3
-; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
-; CHECK-BE-NEXT:    vextuhlx r5, r5, v2
-; CHECK-BE-NEXT:    vextuhlx r6, r6, v2
-; CHECK-BE-NEXT:    vextuhlx r7, r7, v2
-; CHECK-BE-NEXT:    vextuhlx r8, r8, v2
-; CHECK-BE-NEXT:    vextuhlx r9, r9, v2
-; CHECK-BE-NEXT:    vextuhlx r10, r10, v2
-; CHECK-BE-NEXT:    vextuhlx r11, r11, v2
-; CHECK-BE-NEXT:    extsh r12, r12
-; CHECK-BE-NEXT:    extsh r0, r0
-; CHECK-BE-NEXT:    extsh r30, r30
-; CHECK-BE-NEXT:    extsh r29, r29
-; CHECK-BE-NEXT:    extsh r28, r28
-; CHECK-BE-NEXT:    extsh r27, r27
-; CHECK-BE-NEXT:    extsh r26, r26
-; CHECK-BE-NEXT:    extsh r25, r25
-; CHECK-BE-NEXT:    extsh r4, r4
-; CHECK-BE-NEXT:    extsh r5, r5
-; CHECK-BE-NEXT:    extsh r6, r6
-; CHECK-BE-NEXT:    extsh r7, r7
-; CHECK-BE-NEXT:    extsh r8, r8
-; CHECK-BE-NEXT:    extsh r9, r9
-; CHECK-BE-NEXT:    extsh r10, r10
-; CHECK-BE-NEXT:    extsh r11, r11
-; CHECK-BE-NEXT:    mtvsrwa f0, r12
-; CHECK-BE-NEXT:    mtvsrwa f1, r0
-; CHECK-BE-NEXT:    mtvsrwa f2, r30
-; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f3, r29
-; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f4, r28
-; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f5, r27
-; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f6, r26
-; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f7, r25
-; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f8, r4
-; CHECK-BE-NEXT:    mtvsrwa f9, r5
-; CHECK-BE-NEXT:    mtvsrwa f10, r6
-; CHECK-BE-NEXT:    mtvsrwa f11, r7
-; CHECK-BE-NEXT:    mtvsrwa f12, r8
-; CHECK-BE-NEXT:    mtvsrwa f13, r9
-; CHECK-BE-NEXT:    mtvsrwa v2, r10
-; CHECK-BE-NEXT:    mtvsrwa v3, r11
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
-; CHECK-BE-NEXT:    xscvsxdsp f2, f2
-; CHECK-BE-NEXT:    xscvsxdsp f3, f3
-; CHECK-BE-NEXT:    xscvsxdsp f4, f4
-; CHECK-BE-NEXT:    xscvsxdsp f5, f5
-; CHECK-BE-NEXT:    xscvsxdsp f6, f6
-; CHECK-BE-NEXT:    xscvsxdsp f7, f7
-; CHECK-BE-NEXT:    xscvsxdsp f8, f8
-; CHECK-BE-NEXT:    xscvsxdsp f9, f9
-; CHECK-BE-NEXT:    xscvsxdsp f10, f10
-; CHECK-BE-NEXT:    xscvsxdsp f11, f11
-; CHECK-BE-NEXT:    xscvsxdsp f12, f12
-; CHECK-BE-NEXT:    xscvsxdsp f13, f13
-; CHECK-BE-NEXT:    xscvsxdsp f31, v2
-; CHECK-BE-NEXT:    xscvsxdsp f30, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
-; CHECK-BE-NEXT:    xvcvdpsp v0, vs4
-; CHECK-BE-NEXT:    xvcvdpsp v1, vs5
-; CHECK-BE-NEXT:    xvcvdpsp v6, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
-; CHECK-BE-NEXT:    vmrgew v2, v3, v2
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    vmrgew v4, v1, v0
-; CHECK-BE-NEXT:    vmrgew v5, v7, v6
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v5, 48(r3)
-; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v4, 0, r5
+; CHECK-BE-NEXT:    vperm v0, v5, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v5, v2, v4
+; CHECK-BE-NEXT:    vmrghh v3, v3, v3
+; CHECK-BE-NEXT:    vmrghh v2, v2, v2
+; CHECK-BE-NEXT:    vextsh2w v5, v0
+; CHECK-BE-NEXT:    vextsh2w v4, v4
+; CHECK-BE-NEXT:    vextsh2w v3, v3
+; CHECK-BE-NEXT:    vextsh2w v2, v2
+; CHECK-BE-NEXT:    xvcvsxwsp vs0, v5
+; CHECK-BE-NEXT:    xvcvsxwsp vs1, v4
+; CHECK-BE-NEXT:    xvcvsxwsp vs2, v3
+; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i16>, <16 x i16>* %0, align 32

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll?rev=350155&r1=350154&r2=350155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll Sat Dec 29 05:40:48 2018
@@ -369,17 +369,19 @@ entry:
 define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
-; CHECK-P8-NEXT:    clrldi r4, r3, 48
-; CHECK-P8-NEXT:    rldicl r3, r3, 48, 48
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    extsh r3, r3
-; CHECK-P8-NEXT:    mtvsrwa f0, r4
-; CHECK-P8-NEXT:    mtvsrwa f1, r3
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
-; CHECK-P8-NEXT:    xscvsxddp f1, f1
-; CHECK-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P8-NEXT:    addi r3, r4, .LCPI4_0 at toc@l
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI4_1 at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, .LCPI4_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    vsld v2, v2, v3
+; CHECK-P8-NEXT:    vsrad v2, v2, v3
+; CHECK-P8-NEXT:    xvcvsxddp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
@@ -412,27 +414,27 @@ entry:
 define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r4
-; CHECK-P8-NEXT:    mfvsrd r4, f0
-; CHECK-P8-NEXT:    clrldi r5, r4, 48
-; CHECK-P8-NEXT:    rldicl r6, r4, 48, 48
-; CHECK-P8-NEXT:    extsh r5, r5
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f0, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 32, 48
-; CHECK-P8-NEXT:    rldicl r4, r4, 16, 48
-; CHECK-P8-NEXT:    extsh r5, r5
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f1, r6
-; CHECK-P8-NEXT:    mtvsrwa f2, r5
-; CHECK-P8-NEXT:    mtvsrwa f3, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_2 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_2 at toc@l
+; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
-; CHECK-P8-NEXT:    xscvsxddp f1, f1
-; CHECK-P8-NEXT:    xscvsxddp f2, f2
-; CHECK-P8-NEXT:    xscvsxddp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    vsld v2, v2, v4
+; CHECK-P8-NEXT:    vsld v3, v3, v4
+; CHECK-P8-NEXT:    vsrad v2, v2, v4
+; CHECK-P8-NEXT:    vsrad v3, v3, v4
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -488,56 +490,49 @@ entry:
 define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mfvsrd r4, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    clrldi r5, r4, 48
-; CHECK-P8-NEXT:    rldicl r6, r4, 48, 48
-; CHECK-P8-NEXT:    extsh r5, r5
-; CHECK-P8-NEXT:    mfvsrd r7, f0
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f1, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 32, 48
-; CHECK-P8-NEXT:    rldicl r4, r4, 16, 48
-; CHECK-P8-NEXT:    extsh r5, r5
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f0, r6
-; CHECK-P8-NEXT:    mtvsrwa f2, r5
-; CHECK-P8-NEXT:    clrldi r5, r7, 48
-; CHECK-P8-NEXT:    mtvsrwa f3, r4
-; CHECK-P8-NEXT:    extsh r4, r5
-; CHECK-P8-NEXT:    rldicl r5, r7, 16, 48
-; CHECK-P8-NEXT:    mtvsrwa f4, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 48, 48
-; CHECK-P8-NEXT:    extsh r5, r5
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f7, r5
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    mtvsrwa f5, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 32, 48
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    xscvsxddp f1, f1
-; CHECK-P8-NEXT:    mtvsrwa f6, r4
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_2 at toc@ha
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_3 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_2 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
+; CHECK-P8-NEXT:    addi r6, r6, .LCPI6_3 at toc@l
+; CHECK-P8-NEXT:    lvx v4, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_4 at toc@ha
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lvx v5, 0, r6
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_4 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
+; CHECK-P8-NEXT:    lvx v0, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
-; CHECK-P8-NEXT:    xscvsxddp f2, f2
-; CHECK-P8-NEXT:    xscvsxddp f3, f3
-; CHECK-P8-NEXT:    xscvsxddp f4, f4
-; CHECK-P8-NEXT:    xscvsxddp f5, f5
-; CHECK-P8-NEXT:    xscvsxddp f6, f6
-; CHECK-P8-NEXT:    xscvsxddp f7, f7
-; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v0
+; CHECK-P8-NEXT:    xxswapd v0, vs0
+; CHECK-P8-NEXT:    vsld v3, v3, v0
+; CHECK-P8-NEXT:    vsld v4, v4, v0
+; CHECK-P8-NEXT:    vsld v5, v5, v0
+; CHECK-P8-NEXT:    vsld v2, v2, v0
+; CHECK-P8-NEXT:    vsrad v3, v3, v0
+; CHECK-P8-NEXT:    vsrad v2, v2, v0
+; CHECK-P8-NEXT:    vsrad v4, v4, v0
+; CHECK-P8-NEXT:    vsrad v5, v5, v0
+; CHECK-P8-NEXT:    xvcvsxddp vs2, v2
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v5
+; CHECK-P8-NEXT:    xvcvsxddp vs3, v4
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxmrghd vs2, vs5, vs4
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -613,111 +608,79 @@ entry:
 define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    mfvsrd r7, v3
-; CHECK-P8-NEXT:    xxswapd vs8, v3
-; CHECK-P8-NEXT:    mfvsrd r6, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
-; CHECK-P8-NEXT:    clrldi r4, r6, 48
-; CHECK-P8-NEXT:    rldicl r8, r6, 48, 48
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    extsh r8, r8
-; CHECK-P8-NEXT:    mtvsrwa f0, r4
-; CHECK-P8-NEXT:    rldicl r4, r6, 32, 48
-; CHECK-P8-NEXT:    rldicl r6, r6, 16, 48
-; CHECK-P8-NEXT:    mtvsrwa f1, r8
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    clrldi r8, r7, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f3, r4
-; CHECK-P8-NEXT:    extsh r4, r8
-; CHECK-P8-NEXT:    mtvsrwa f4, r6
-; CHECK-P8-NEXT:    rldicl r6, r7, 48, 48
-; CHECK-P8-NEXT:    mtvsrwa f5, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 32, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    mfvsrd r8, f2
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f2, r6
-; CHECK-P8-NEXT:    rldicl r6, r7, 16, 48
-; CHECK-P8-NEXT:    mtvsrwa f6, r4
-; CHECK-P8-NEXT:    clrldi r4, r8, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f7, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 48, 48
-; CHECK-P8-NEXT:    mtvsrwa f9, r4
-; CHECK-P8-NEXT:    rldicl r4, r8, 32, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f10, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 16, 48
-; CHECK-P8-NEXT:    mtvsrwa f11, r4
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    mfvsrd r4, f8
-; CHECK-P8-NEXT:    mtvsrwa f8, r6
-; CHECK-P8-NEXT:    clrldi r6, r4, 48
-; CHECK-P8-NEXT:    xscvsxddp f3, f3
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    xscvsxddp f4, f4
-; CHECK-P8-NEXT:    mtvsrwa f12, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 48, 48
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
-; CHECK-P8-NEXT:    mtvsrwa f13, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 32, 48
-; CHECK-P8-NEXT:    rldicl r4, r4, 16, 48
-; CHECK-P8-NEXT:    xscvsxddp f1, f1
-; CHECK-P8-NEXT:    extsh r6, r6
-; CHECK-P8-NEXT:    extsh r4, r4
-; CHECK-P8-NEXT:    xscvsxddp f5, f5
-; CHECK-P8-NEXT:    xscvsxddp f2, f2
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
-; CHECK-P8-NEXT:    mtvsrwa v2, r6
-; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    mtvsrwa v3, r4
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
+; CHECK-P8-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
+; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_3 at toc@ha
+; CHECK-P8-NEXT:    lvx v3, 0, r6
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI7_4 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_3 at toc@l
+; CHECK-P8-NEXT:    addi r6, r6, .LCPI7_4 at toc@l
+; CHECK-P8-NEXT:    lvx v5, 0, r5
+; CHECK-P8-NEXT:    lvx v0, 0, r6
+; CHECK-P8-NEXT:    li r6, 16
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
+; CHECK-P8-NEXT:    lvx v7, r4, r6
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
+; CHECK-P8-NEXT:    vperm v1, v4, v4, v2
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xscvsxddp f6, f6
-; CHECK-P8-NEXT:    xscvsxddp f7, f7
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xscvsxddp f9, f9
-; CHECK-P8-NEXT:    xscvsxddp f10, f10
-; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs5
-; CHECK-P8-NEXT:    xscvsxddp f11, f11
-; CHECK-P8-NEXT:    xxswapd vs2, vs3
-; CHECK-P8-NEXT:    xscvsxddp f8, f8
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvsxddp f12, f12
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xscvsxddp f13, f13
-; CHECK-P8-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P8-NEXT:    xscvsxddp f4, v2
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 96
-; CHECK-P8-NEXT:    xscvsxddp f31, v3
-; CHECK-P8-NEXT:    xxmrghd vs5, vs10, vs9
+; CHECK-P8-NEXT:    vperm v6, v4, v4, v3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    li r5, 96
+; CHECK-P8-NEXT:    vperm v8, v4, v4, v5
+; CHECK-P8-NEXT:    vperm v4, v4, v4, v0
+; CHECK-P8-NEXT:    vperm v5, v7, v7, v5
+; CHECK-P8-NEXT:    xxswapd v9, vs0
+; CHECK-P8-NEXT:    vperm v0, v7, v7, v0
+; CHECK-P8-NEXT:    vperm v2, v7, v7, v2
+; CHECK-P8-NEXT:    vperm v3, v7, v7, v3
+; CHECK-P8-NEXT:    vsld v1, v1, v9
+; CHECK-P8-NEXT:    vsld v6, v6, v9
+; CHECK-P8-NEXT:    vsld v5, v5, v9
+; CHECK-P8-NEXT:    vsld v0, v0, v9
+; CHECK-P8-NEXT:    vsld v2, v2, v9
+; CHECK-P8-NEXT:    vsld v3, v3, v9
+; CHECK-P8-NEXT:    vsrad v5, v5, v9
+; CHECK-P8-NEXT:    vsrad v0, v0, v9
+; CHECK-P8-NEXT:    vsld v7, v8, v9
+; CHECK-P8-NEXT:    vsld v4, v4, v9
+; CHECK-P8-NEXT:    vsrad v2, v2, v9
+; CHECK-P8-NEXT:    vsrad v3, v3, v9
+; CHECK-P8-NEXT:    xvcvsxddp vs2, v5
+; CHECK-P8-NEXT:    xvcvsxddp vs3, v0
+; CHECK-P8-NEXT:    vsrad v1, v1, v9
+; CHECK-P8-NEXT:    vsrad v6, v6, v9
+; CHECK-P8-NEXT:    vsrad v7, v7, v9
+; CHECK-P8-NEXT:    vsrad v4, v4, v9
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xvcvsxddp vs4, v3
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxmrghd vs6, vs8, vs11
-; CHECK-P8-NEXT:    xxmrghd vs7, vs13, vs12
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v7
+; CHECK-P8-NEXT:    xvcvsxddp vs5, v4
+; CHECK-P8-NEXT:    xvcvsxddp vs6, v1
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
 ; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xxswapd vs0, vs6
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
-; CHECK-P8-NEXT:    li r6, 64
-; CHECK-P8-NEXT:    xxmrghd vs2, vs31, vs4
-; CHECK-P8-NEXT:    xxswapd vs4, vs5
-; CHECK-P8-NEXT:    xxswapd vs5, vs7
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    stxvd2x vs4, r3, r6
+; CHECK-P8-NEXT:    xvcvsxddp vs7, v6
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
-; CHECK-P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    li r5, 64
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xxswapd vs3, vs6
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd vs2, vs7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll?rev=350155&r1=350154&r2=350155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll Sat Dec 29 05:40:48 2018
@@ -62,51 +62,35 @@ define <4 x float> @test4elt(<4 x i64>*
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    xxswapd vs3, vs1
-; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xxswapd vs2, vs0
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
-; CHECK-P8-NEXT:    xscvuxdsp f3, f3
-; CHECK-P8-NEXT:    xscvuxdsp f2, f2
-; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xvcvuxdsp vs1, v3
+; CHECK-P8-NEXT:    xvcvuxdsp vs0, v2
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    vpkudum v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    xxswapd vs3, vs0
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f2, f2
-; CHECK-P9-NEXT:    xscvuxdsp f3, f3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs2
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    lxv v2, 16(r3)
+; CHECK-P9-NEXT:    lxv v3, 0(r3)
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v3
+; CHECK-P9-NEXT:    xvcvuxdsp vs1, v2
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P9-NEXT:    vpkudum v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    xxswapd vs3, vs0
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f2, f2
-; CHECK-BE-NEXT:    xscvuxdsp f3, f3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    vmrgew v2, v2, v3
+; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv v3, 16(r3)
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v3
+; CHECK-BE-NEXT:    xvcvuxdsp vs1, v2
+; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-BE-NEXT:    vpkudum v2, v3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <4 x i64>, <4 x i64>* %0, align 32
@@ -124,92 +108,60 @@ define void @test8elt(<8 x float>* noali
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
-; CHECK-P8-NEXT:    xxswapd vs7, vs3
-; CHECK-P8-NEXT:    xscvuxdsp f3, f3
-; CHECK-P8-NEXT:    xxswapd vs4, vs0
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
-; CHECK-P8-NEXT:    xxswapd vs5, vs1
-; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xxswapd vs6, vs2
-; CHECK-P8-NEXT:    xscvuxdsp f2, f2
-; CHECK-P8-NEXT:    xscvuxdsp f4, f4
-; CHECK-P8-NEXT:    xscvuxdsp f5, f5
-; CHECK-P8-NEXT:    xscvuxdsp f6, f6
-; CHECK-P8-NEXT:    xscvuxdsp f7, f7
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
-; CHECK-P8-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    xxmrghd vs0, vs6, vs7
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs0
-; CHECK-P8-NEXT:    vmrgew v2, v4, v2
-; CHECK-P8-NEXT:    vmrgew v3, v5, v3
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    xvcvuxdsp vs3, v5
+; CHECK-P8-NEXT:    xvcvuxdsp vs0, v2
+; CHECK-P8-NEXT:    xvcvuxdsp vs1, v3
+; CHECK-P8-NEXT:    xvcvuxdsp vs2, v4
+; CHECK-P8-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P8-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-P8-NEXT:    vpkudum v2, v3, v2
+; CHECK-P8-NEXT:    vpkudum v3, v4, v5
 ; CHECK-P8-NEXT:    stvx v2, r3, r5
 ; CHECK-P8-NEXT:    stvx v3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 48(r4)
-; CHECK-P9-NEXT:    lxv vs1, 32(r4)
-; CHECK-P9-NEXT:    lxv vs2, 16(r4)
-; CHECK-P9-NEXT:    lxv vs3, 0(r4)
-; CHECK-P9-NEXT:    xxswapd vs4, vs3
-; CHECK-P9-NEXT:    xxswapd vs5, vs2
-; CHECK-P9-NEXT:    xxswapd vs6, vs1
-; CHECK-P9-NEXT:    xxswapd vs7, vs0
-; CHECK-P9-NEXT:    xscvuxdsp f3, f3
-; CHECK-P9-NEXT:    xscvuxdsp f2, f2
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f4, f4
-; CHECK-P9-NEXT:    xscvuxdsp f5, f5
-; CHECK-P9-NEXT:    xscvuxdsp f6, f6
-; CHECK-P9-NEXT:    xscvuxdsp f7, f7
-; CHECK-P9-NEXT:    xxmrghd vs2, vs2, vs3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs4
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs3
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    lxv v5, 0(r4)
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v5
+; CHECK-P9-NEXT:    xvcvuxdsp vs1, v4
+; CHECK-P9-NEXT:    xvcvuxdsp vs2, v3
+; CHECK-P9-NEXT:    xvcvuxdsp vs3, v2
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-P9-NEXT:    vpkudum v2, v3, v2
+; CHECK-P9-NEXT:    vpkudum v3, v5, v4
 ; CHECK-P9-NEXT:    stxv v3, 16(r3)
 ; CHECK-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 32(r4)
-; CHECK-BE-NEXT:    lxv vs1, 48(r4)
-; CHECK-BE-NEXT:    lxv vs2, 0(r4)
-; CHECK-BE-NEXT:    lxv vs3, 16(r4)
-; CHECK-BE-NEXT:    xxswapd vs4, vs3
-; CHECK-BE-NEXT:    xxswapd vs5, vs2
-; CHECK-BE-NEXT:    xxswapd vs6, vs1
-; CHECK-BE-NEXT:    xxswapd vs7, vs0
-; CHECK-BE-NEXT:    xscvuxdsp f3, f3
-; CHECK-BE-NEXT:    xscvuxdsp f2, f2
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f4, f4
-; CHECK-BE-NEXT:    xscvuxdsp f5, f5
-; CHECK-BE-NEXT:    xscvuxdsp f6, f6
-; CHECK-BE-NEXT:    xscvuxdsp f7, f7
-; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-BE-NEXT:    xxmrghd vs3, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs1, vs7, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs3
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs1
-; CHECK-BE-NEXT:    vmrgew v2, v2, v3
-; CHECK-BE-NEXT:    vmrgew v3, v4, v5
+; CHECK-BE-NEXT:    lxv v2, 32(r4)
+; CHECK-BE-NEXT:    lxv v3, 48(r4)
+; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v5
+; CHECK-BE-NEXT:    xvcvuxdsp vs1, v4
+; CHECK-BE-NEXT:    xvcvuxdsp vs2, v3
+; CHECK-BE-NEXT:    xvcvuxdsp vs3, v2
+; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-BE-NEXT:    vpkudum v2, v3, v2
+; CHECK-BE-NEXT:    vpkudum v3, v5, v4
 ; CHECK-BE-NEXT:    stxv v3, 16(r3)
 ; CHECK-BE-NEXT:    stxv v2, 0(r3)
 ; CHECK-BE-NEXT:    blr
@@ -223,69 +175,49 @@ entry:
 define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i64>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r7, 64
 ; CHECK-P8-NEXT:    li r5, 32
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs11, 0, r4
-; CHECK-P8-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    lxvd2x vs8, r4, r7
-; CHECK-P8-NEXT:    li r7, 80
-; CHECK-P8-NEXT:    lxvd2x vs6, r4, r5
-; CHECK-P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    lxvd2x vs7, r4, r6
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    lxvd2x vs4, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    li r7, 96
+; CHECK-P8-NEXT:    li r7, 80
 ; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
+; CHECK-P8-NEXT:    li r7, 96
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r7
 ; CHECK-P8-NEXT:    li r7, 112
-; CHECK-P8-NEXT:    xscvuxdsp f30, f11
-; CHECK-P8-NEXT:    xxswapd vs11, vs11
-; CHECK-P8-NEXT:    lxvd2x vs4, r4, r7
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r7
 ; CHECK-P8-NEXT:    li r7, 16
-; CHECK-P8-NEXT:    xscvuxdsp f0, f6
-; CHECK-P8-NEXT:    xxswapd vs6, vs6
-; CHECK-P8-NEXT:    xscvuxdsp f1, f7
-; CHECK-P8-NEXT:    lxvd2x vs9, r4, r7
-; CHECK-P8-NEXT:    xxswapd vs7, vs7
-; CHECK-P8-NEXT:    xscvuxdsp f5, f8
-; CHECK-P8-NEXT:    xxswapd vs8, vs8
-; CHECK-P8-NEXT:    xscvuxdsp f10, f2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xscvuxdsp f12, f3
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xscvuxdsp f13, f4
-; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xscvuxdsp f31, f9
-; CHECK-P8-NEXT:    xxswapd vs9, vs9
-; CHECK-P8-NEXT:    xscvuxdsp f6, f6
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xscvuxdsp f7, f7
-; CHECK-P8-NEXT:    xscvuxdsp f8, f8
-; CHECK-P8-NEXT:    xxmrghd vs5, vs10, vs5
-; CHECK-P8-NEXT:    xscvuxdsp f2, f2
-; CHECK-P8-NEXT:    xscvuxdsp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs10, vs13, vs12
-; CHECK-P8-NEXT:    xscvuxdsp f4, f4
-; CHECK-P8-NEXT:    xscvuxdsp f1, f9
-; CHECK-P8-NEXT:    xscvuxdsp f9, f11
-; CHECK-P8-NEXT:    xxmrghd vs11, vs31, vs30
-; CHECK-P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P8-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xxmrghd vs0, vs7, vs6
-; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs8
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs5
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs10
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs11
-; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs1, vs9
-; CHECK-P8-NEXT:    xvcvdpsp v1, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v6, vs3
-; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
-; CHECK-P8-NEXT:    vmrgew v2, v0, v2
-; CHECK-P8-NEXT:    vmrgew v3, v1, v3
-; CHECK-P8-NEXT:    vmrgew v4, v6, v4
-; CHECK-P8-NEXT:    vmrgew v5, v7, v5
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    xvcvuxdsp vs3, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xvcvuxdsp vs0, v3
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xvcvuxdsp vs1, v4
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    xvcvuxdsp vs2, v5
+; CHECK-P8-NEXT:    xxswapd v5, vs4
+; CHECK-P8-NEXT:    xvcvuxdsp vs4, v2
+; CHECK-P8-NEXT:    xvcvuxdsp vs5, v3
+; CHECK-P8-NEXT:    xvcvuxdsp vs6, v4
+; CHECK-P8-NEXT:    xxsldwi v2, vs3, vs3, 3
+; CHECK-P8-NEXT:    xvcvuxdsp vs7, v5
+; CHECK-P8-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P8-NEXT:    xxsldwi v4, vs1, vs1, 3
+; CHECK-P8-NEXT:    xxsldwi v5, vs2, vs2, 3
+; CHECK-P8-NEXT:    xxsldwi v0, vs4, vs4, 3
+; CHECK-P8-NEXT:    vpkudum v2, v3, v2
+; CHECK-P8-NEXT:    xxsldwi v1, vs5, vs5, 3
+; CHECK-P8-NEXT:    xxsldwi v6, vs6, vs6, 3
+; CHECK-P8-NEXT:    vpkudum v3, v5, v4
+; CHECK-P8-NEXT:    xxsldwi v7, vs7, vs7, 3
+; CHECK-P8-NEXT:    vpkudum v4, v1, v0
+; CHECK-P8-NEXT:    vpkudum v5, v6, v7
 ; CHECK-P8-NEXT:    stvx v2, r3, r7
 ; CHECK-P8-NEXT:    stvx v3, r3, r5
 ; CHECK-P8-NEXT:    stvx v4, r3, r6
@@ -294,130 +226,74 @@ define void @test16elt(<16 x float>* noa
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs4, 48(r4)
-; CHECK-P9-NEXT:    lxv vs5, 32(r4)
-; CHECK-P9-NEXT:    lxv vs6, 16(r4)
-; CHECK-P9-NEXT:    lxv vs7, 0(r4)
-; CHECK-P9-NEXT:    lxv vs8, 112(r4)
-; CHECK-P9-NEXT:    lxv vs9, 96(r4)
-; CHECK-P9-NEXT:    lxv vs10, 80(r4)
-; CHECK-P9-NEXT:    lxv vs11, 64(r4)
-; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxswapd vs0, vs7
-; CHECK-P9-NEXT:    xxswapd vs1, vs6
-; CHECK-P9-NEXT:    xxswapd vs2, vs5
-; CHECK-P9-NEXT:    xxswapd vs3, vs4
-; CHECK-P9-NEXT:    xxswapd vs12, vs11
-; CHECK-P9-NEXT:    xxswapd vs13, vs10
-; CHECK-P9-NEXT:    xxswapd vs31, vs9
-; CHECK-P9-NEXT:    xxswapd vs30, vs8
-; CHECK-P9-NEXT:    xscvuxdsp f7, f7
-; CHECK-P9-NEXT:    xscvuxdsp f6, f6
-; CHECK-P9-NEXT:    xscvuxdsp f5, f5
-; CHECK-P9-NEXT:    xscvuxdsp f4, f4
-; CHECK-P9-NEXT:    xscvuxdsp f11, f11
-; CHECK-P9-NEXT:    xscvuxdsp f10, f10
-; CHECK-P9-NEXT:    xscvuxdsp f9, f9
-; CHECK-P9-NEXT:    xscvuxdsp f8, f8
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
-; CHECK-P9-NEXT:    xscvuxdsp f2, f2
-; CHECK-P9-NEXT:    xscvuxdsp f3, f3
-; CHECK-P9-NEXT:    xscvuxdsp f12, f12
-; CHECK-P9-NEXT:    xscvuxdsp f13, f13
-; CHECK-P9-NEXT:    xscvuxdsp f31, f31
-; CHECK-P9-NEXT:    xscvuxdsp f30, f30
-; CHECK-P9-NEXT:    xxmrghd vs6, vs6, vs7
-; CHECK-P9-NEXT:    xxmrghd vs4, vs4, vs5
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs10, vs11
-; CHECK-P9-NEXT:    xxmrghd vs3, vs8, vs9
-; CHECK-P9-NEXT:    xxmrghd vs5, vs13, vs12
-; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs4
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs1
-; CHECK-P9-NEXT:    xvcvdpsp v0, vs5
-; CHECK-P9-NEXT:    xvcvdpsp v1, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v6, vs7
-; CHECK-P9-NEXT:    xvcvdpsp v7, vs3
-; CHECK-P9-NEXT:    vmrgew v2, v2, v4
-; CHECK-P9-NEXT:    vmrgew v3, v3, v5
-; CHECK-P9-NEXT:    vmrgew v4, v1, v0
-; CHECK-P9-NEXT:    vmrgew v5, v7, v6
-; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    lxv v5, 0(r4)
+; CHECK-P9-NEXT:    lxv v0, 112(r4)
+; CHECK-P9-NEXT:    lxv v1, 96(r4)
+; CHECK-P9-NEXT:    lxv v6, 80(r4)
+; CHECK-P9-NEXT:    lxv v7, 64(r4)
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v5
+; CHECK-P9-NEXT:    xvcvuxdsp vs1, v4
+; CHECK-P9-NEXT:    xvcvuxdsp vs2, v3
+; CHECK-P9-NEXT:    xvcvuxdsp vs3, v2
+; CHECK-P9-NEXT:    xvcvuxdsp vs4, v7
+; CHECK-P9-NEXT:    xvcvuxdsp vs5, v6
+; CHECK-P9-NEXT:    xvcvuxdsp vs6, v1
+; CHECK-P9-NEXT:    xvcvuxdsp vs7, v0
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-P9-NEXT:    xxsldwi v0, vs4, vs4, 3
+; CHECK-P9-NEXT:    xxsldwi v1, vs5, vs5, 3
+; CHECK-P9-NEXT:    xxsldwi v6, vs6, vs6, 3
+; CHECK-P9-NEXT:    xxsldwi v7, vs7, vs7, 3
+; CHECK-P9-NEXT:    vpkudum v2, v3, v2
+; CHECK-P9-NEXT:    vpkudum v3, v5, v4
+; CHECK-P9-NEXT:    vpkudum v4, v1, v0
+; CHECK-P9-NEXT:    vpkudum v5, v7, v6
 ; CHECK-P9-NEXT:    stxv v3, 16(r3)
 ; CHECK-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    stxv v4, 32(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 32(r4)
-; CHECK-BE-NEXT:    lxv vs3, 48(r4)
-; CHECK-BE-NEXT:    lxv vs4, 0(r4)
-; CHECK-BE-NEXT:    lxv vs5, 16(r4)
-; CHECK-BE-NEXT:    lxv vs6, 96(r4)
-; CHECK-BE-NEXT:    lxv vs7, 112(r4)
-; CHECK-BE-NEXT:    lxv vs8, 64(r4)
-; CHECK-BE-NEXT:    lxv vs9, 80(r4)
-; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxswapd vs0, vs5
-; CHECK-BE-NEXT:    xxswapd vs1, vs4
-; CHECK-BE-NEXT:    xxswapd vs10, vs3
-; CHECK-BE-NEXT:    xxswapd vs11, vs2
-; CHECK-BE-NEXT:    xxswapd vs12, vs9
-; CHECK-BE-NEXT:    xxswapd vs13, vs8
-; CHECK-BE-NEXT:    xxswapd vs31, vs7
-; CHECK-BE-NEXT:    xxswapd vs30, vs6
-; CHECK-BE-NEXT:    xscvuxdsp f5, f5
-; CHECK-BE-NEXT:    xscvuxdsp f4, f4
-; CHECK-BE-NEXT:    xscvuxdsp f3, f3
-; CHECK-BE-NEXT:    xscvuxdsp f2, f2
-; CHECK-BE-NEXT:    xscvuxdsp f9, f9
-; CHECK-BE-NEXT:    xscvuxdsp f8, f8
-; CHECK-BE-NEXT:    xscvuxdsp f7, f7
-; CHECK-BE-NEXT:    xscvuxdsp f6, f6
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
-; CHECK-BE-NEXT:    xscvuxdsp f10, f10
-; CHECK-BE-NEXT:    xscvuxdsp f11, f11
-; CHECK-BE-NEXT:    xscvuxdsp f12, f12
-; CHECK-BE-NEXT:    xscvuxdsp f13, f13
-; CHECK-BE-NEXT:    xscvuxdsp f31, f31
-; CHECK-BE-NEXT:    xscvuxdsp f30, f30
-; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs5
-; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
-; CHECK-BE-NEXT:    xxmrghd vs3, vs8, vs9
-; CHECK-BE-NEXT:    xxmrghd vs5, vs6, vs7
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs11, vs10
-; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs4
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v0, vs3
-; CHECK-BE-NEXT:    xvcvdpsp v6, vs5
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs1
-; CHECK-BE-NEXT:    xvcvdpsp v1, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
-; CHECK-BE-NEXT:    vmrgew v2, v2, v4
-; CHECK-BE-NEXT:    vmrgew v3, v3, v5
-; CHECK-BE-NEXT:    vmrgew v4, v0, v1
-; CHECK-BE-NEXT:    vmrgew v5, v6, v7
-; CHECK-BE-NEXT:    stxv v5, 48(r3)
-; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    lxv v2, 32(r4)
+; CHECK-BE-NEXT:    lxv v3, 48(r4)
+; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    lxv v0, 96(r4)
+; CHECK-BE-NEXT:    lxv v1, 112(r4)
+; CHECK-BE-NEXT:    lxv v6, 64(r4)
+; CHECK-BE-NEXT:    lxv v7, 80(r4)
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v5
+; CHECK-BE-NEXT:    xvcvuxdsp vs1, v4
+; CHECK-BE-NEXT:    xvcvuxdsp vs2, v3
+; CHECK-BE-NEXT:    xvcvuxdsp vs3, v2
+; CHECK-BE-NEXT:    xvcvuxdsp vs4, v7
+; CHECK-BE-NEXT:    xvcvuxdsp vs5, v6
+; CHECK-BE-NEXT:    xvcvuxdsp vs6, v1
+; CHECK-BE-NEXT:    xvcvuxdsp vs7, v0
+; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-BE-NEXT:    xxsldwi v0, vs4, vs4, 3
+; CHECK-BE-NEXT:    xxsldwi v1, vs5, vs5, 3
+; CHECK-BE-NEXT:    xxsldwi v6, vs6, vs6, 3
+; CHECK-BE-NEXT:    xxsldwi v7, vs7, vs7, 3
+; CHECK-BE-NEXT:    vpkudum v2, v3, v2
+; CHECK-BE-NEXT:    vpkudum v3, v5, v4
+; CHECK-BE-NEXT:    vpkudum v4, v1, v0
+; CHECK-BE-NEXT:    vpkudum v5, v7, v6
 ; CHECK-BE-NEXT:    stxv v3, 16(r3)
 ; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i64>, <16 x i64>* %0, align 128
@@ -479,51 +355,35 @@ define <4 x float> @test4elt_signed(<4 x
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    xxswapd vs3, vs1
-; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xxswapd vs2, vs0
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
-; CHECK-P8-NEXT:    xscvsxdsp f3, f3
-; CHECK-P8-NEXT:    xscvsxdsp f2, f2
-; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xvcvsxdsp vs1, v3
+; CHECK-P8-NEXT:    xvcvsxdsp vs0, v2
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    vpkudum v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    xxswapd vs3, vs0
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f2, f2
-; CHECK-P9-NEXT:    xscvsxdsp f3, f3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs2
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    lxv v2, 16(r3)
+; CHECK-P9-NEXT:    lxv v3, 0(r3)
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v3
+; CHECK-P9-NEXT:    xvcvsxdsp vs1, v2
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P9-NEXT:    vpkudum v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    xxswapd vs3, vs0
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f2, f2
-; CHECK-BE-NEXT:    xscvsxdsp f3, f3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    vmrgew v2, v2, v3
+; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv v3, 16(r3)
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v3
+; CHECK-BE-NEXT:    xvcvsxdsp vs1, v2
+; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-BE-NEXT:    vpkudum v2, v3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <4 x i64>, <4 x i64>* %0, align 32
@@ -541,92 +401,60 @@ define void @test8elt_signed(<8 x float>
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
-; CHECK-P8-NEXT:    xxswapd vs7, vs3
-; CHECK-P8-NEXT:    xscvsxdsp f3, f3
-; CHECK-P8-NEXT:    xxswapd vs4, vs0
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
-; CHECK-P8-NEXT:    xxswapd vs5, vs1
-; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xxswapd vs6, vs2
-; CHECK-P8-NEXT:    xscvsxdsp f2, f2
-; CHECK-P8-NEXT:    xscvsxdsp f4, f4
-; CHECK-P8-NEXT:    xscvsxdsp f5, f5
-; CHECK-P8-NEXT:    xscvsxdsp f6, f6
-; CHECK-P8-NEXT:    xscvsxdsp f7, f7
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
-; CHECK-P8-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    xxmrghd vs0, vs6, vs7
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs0
-; CHECK-P8-NEXT:    vmrgew v2, v4, v2
-; CHECK-P8-NEXT:    vmrgew v3, v5, v3
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    xvcvsxdsp vs3, v5
+; CHECK-P8-NEXT:    xvcvsxdsp vs0, v2
+; CHECK-P8-NEXT:    xvcvsxdsp vs1, v3
+; CHECK-P8-NEXT:    xvcvsxdsp vs2, v4
+; CHECK-P8-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P8-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-P8-NEXT:    vpkudum v2, v3, v2
+; CHECK-P8-NEXT:    vpkudum v3, v4, v5
 ; CHECK-P8-NEXT:    stvx v2, r3, r5
 ; CHECK-P8-NEXT:    stvx v3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 48(r4)
-; CHECK-P9-NEXT:    lxv vs1, 32(r4)
-; CHECK-P9-NEXT:    lxv vs2, 16(r4)
-; CHECK-P9-NEXT:    lxv vs3, 0(r4)
-; CHECK-P9-NEXT:    xxswapd vs4, vs3
-; CHECK-P9-NEXT:    xxswapd vs5, vs2
-; CHECK-P9-NEXT:    xxswapd vs6, vs1
-; CHECK-P9-NEXT:    xxswapd vs7, vs0
-; CHECK-P9-NEXT:    xscvsxdsp f3, f3
-; CHECK-P9-NEXT:    xscvsxdsp f2, f2
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f4, f4
-; CHECK-P9-NEXT:    xscvsxdsp f5, f5
-; CHECK-P9-NEXT:    xscvsxdsp f6, f6
-; CHECK-P9-NEXT:    xscvsxdsp f7, f7
-; CHECK-P9-NEXT:    xxmrghd vs2, vs2, vs3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs4
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs3
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    lxv v5, 0(r4)
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v5
+; CHECK-P9-NEXT:    xvcvsxdsp vs1, v4
+; CHECK-P9-NEXT:    xvcvsxdsp vs2, v3
+; CHECK-P9-NEXT:    xvcvsxdsp vs3, v2
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-P9-NEXT:    vpkudum v2, v3, v2
+; CHECK-P9-NEXT:    vpkudum v3, v5, v4
 ; CHECK-P9-NEXT:    stxv v3, 16(r3)
 ; CHECK-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 32(r4)
-; CHECK-BE-NEXT:    lxv vs1, 48(r4)
-; CHECK-BE-NEXT:    lxv vs2, 0(r4)
-; CHECK-BE-NEXT:    lxv vs3, 16(r4)
-; CHECK-BE-NEXT:    xxswapd vs4, vs3
-; CHECK-BE-NEXT:    xxswapd vs5, vs2
-; CHECK-BE-NEXT:    xxswapd vs6, vs1
-; CHECK-BE-NEXT:    xxswapd vs7, vs0
-; CHECK-BE-NEXT:    xscvsxdsp f3, f3
-; CHECK-BE-NEXT:    xscvsxdsp f2, f2
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f4, f4
-; CHECK-BE-NEXT:    xscvsxdsp f5, f5
-; CHECK-BE-NEXT:    xscvsxdsp f6, f6
-; CHECK-BE-NEXT:    xscvsxdsp f7, f7
-; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
-; CHECK-BE-NEXT:    xxmrghd vs3, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs1, vs7, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs3
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs1
-; CHECK-BE-NEXT:    vmrgew v2, v2, v3
-; CHECK-BE-NEXT:    vmrgew v3, v4, v5
+; CHECK-BE-NEXT:    lxv v2, 32(r4)
+; CHECK-BE-NEXT:    lxv v3, 48(r4)
+; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v5
+; CHECK-BE-NEXT:    xvcvsxdsp vs1, v4
+; CHECK-BE-NEXT:    xvcvsxdsp vs2, v3
+; CHECK-BE-NEXT:    xvcvsxdsp vs3, v2
+; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-BE-NEXT:    vpkudum v2, v3, v2
+; CHECK-BE-NEXT:    vpkudum v3, v5, v4
 ; CHECK-BE-NEXT:    stxv v3, 16(r3)
 ; CHECK-BE-NEXT:    stxv v2, 0(r3)
 ; CHECK-BE-NEXT:    blr
@@ -640,69 +468,49 @@ entry:
 define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, <16 x i64>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r7, 64
 ; CHECK-P8-NEXT:    li r5, 32
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs11, 0, r4
-; CHECK-P8-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    lxvd2x vs8, r4, r7
-; CHECK-P8-NEXT:    li r7, 80
-; CHECK-P8-NEXT:    lxvd2x vs6, r4, r5
-; CHECK-P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    lxvd2x vs7, r4, r6
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    lxvd2x vs4, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    li r7, 96
+; CHECK-P8-NEXT:    li r7, 80
 ; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
+; CHECK-P8-NEXT:    li r7, 96
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r7
 ; CHECK-P8-NEXT:    li r7, 112
-; CHECK-P8-NEXT:    xscvsxdsp f30, f11
-; CHECK-P8-NEXT:    xxswapd vs11, vs11
-; CHECK-P8-NEXT:    lxvd2x vs4, r4, r7
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r7
 ; CHECK-P8-NEXT:    li r7, 16
-; CHECK-P8-NEXT:    xscvsxdsp f0, f6
-; CHECK-P8-NEXT:    xxswapd vs6, vs6
-; CHECK-P8-NEXT:    xscvsxdsp f1, f7
-; CHECK-P8-NEXT:    lxvd2x vs9, r4, r7
-; CHECK-P8-NEXT:    xxswapd vs7, vs7
-; CHECK-P8-NEXT:    xscvsxdsp f5, f8
-; CHECK-P8-NEXT:    xxswapd vs8, vs8
-; CHECK-P8-NEXT:    xscvsxdsp f10, f2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xscvsxdsp f12, f3
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xscvsxdsp f13, f4
-; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xscvsxdsp f31, f9
-; CHECK-P8-NEXT:    xxswapd vs9, vs9
-; CHECK-P8-NEXT:    xscvsxdsp f6, f6
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xscvsxdsp f7, f7
-; CHECK-P8-NEXT:    xscvsxdsp f8, f8
-; CHECK-P8-NEXT:    xxmrghd vs5, vs10, vs5
-; CHECK-P8-NEXT:    xscvsxdsp f2, f2
-; CHECK-P8-NEXT:    xscvsxdsp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs10, vs13, vs12
-; CHECK-P8-NEXT:    xscvsxdsp f4, f4
-; CHECK-P8-NEXT:    xscvsxdsp f1, f9
-; CHECK-P8-NEXT:    xscvsxdsp f9, f11
-; CHECK-P8-NEXT:    xxmrghd vs11, vs31, vs30
-; CHECK-P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P8-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xxmrghd vs0, vs7, vs6
-; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs8
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs5
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs10
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs11
-; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs1, vs9
-; CHECK-P8-NEXT:    xvcvdpsp v1, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v6, vs3
-; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
-; CHECK-P8-NEXT:    vmrgew v2, v0, v2
-; CHECK-P8-NEXT:    vmrgew v3, v1, v3
-; CHECK-P8-NEXT:    vmrgew v4, v6, v4
-; CHECK-P8-NEXT:    vmrgew v5, v7, v5
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    xvcvsxdsp vs3, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xvcvsxdsp vs0, v3
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xvcvsxdsp vs1, v4
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    xvcvsxdsp vs2, v5
+; CHECK-P8-NEXT:    xxswapd v5, vs4
+; CHECK-P8-NEXT:    xvcvsxdsp vs4, v2
+; CHECK-P8-NEXT:    xvcvsxdsp vs5, v3
+; CHECK-P8-NEXT:    xvcvsxdsp vs6, v4
+; CHECK-P8-NEXT:    xxsldwi v2, vs3, vs3, 3
+; CHECK-P8-NEXT:    xvcvsxdsp vs7, v5
+; CHECK-P8-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P8-NEXT:    xxsldwi v4, vs1, vs1, 3
+; CHECK-P8-NEXT:    xxsldwi v5, vs2, vs2, 3
+; CHECK-P8-NEXT:    xxsldwi v0, vs4, vs4, 3
+; CHECK-P8-NEXT:    vpkudum v2, v3, v2
+; CHECK-P8-NEXT:    xxsldwi v1, vs5, vs5, 3
+; CHECK-P8-NEXT:    xxsldwi v6, vs6, vs6, 3
+; CHECK-P8-NEXT:    vpkudum v3, v5, v4
+; CHECK-P8-NEXT:    xxsldwi v7, vs7, vs7, 3
+; CHECK-P8-NEXT:    vpkudum v4, v1, v0
+; CHECK-P8-NEXT:    vpkudum v5, v6, v7
 ; CHECK-P8-NEXT:    stvx v2, r3, r7
 ; CHECK-P8-NEXT:    stvx v3, r3, r5
 ; CHECK-P8-NEXT:    stvx v4, r3, r6
@@ -711,130 +519,74 @@ define void @test16elt_signed(<16 x floa
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs4, 48(r4)
-; CHECK-P9-NEXT:    lxv vs5, 32(r4)
-; CHECK-P9-NEXT:    lxv vs6, 16(r4)
-; CHECK-P9-NEXT:    lxv vs7, 0(r4)
-; CHECK-P9-NEXT:    lxv vs8, 112(r4)
-; CHECK-P9-NEXT:    lxv vs9, 96(r4)
-; CHECK-P9-NEXT:    lxv vs10, 80(r4)
-; CHECK-P9-NEXT:    lxv vs11, 64(r4)
-; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxswapd vs0, vs7
-; CHECK-P9-NEXT:    xxswapd vs1, vs6
-; CHECK-P9-NEXT:    xxswapd vs2, vs5
-; CHECK-P9-NEXT:    xxswapd vs3, vs4
-; CHECK-P9-NEXT:    xxswapd vs12, vs11
-; CHECK-P9-NEXT:    xxswapd vs13, vs10
-; CHECK-P9-NEXT:    xxswapd vs31, vs9
-; CHECK-P9-NEXT:    xxswapd vs30, vs8
-; CHECK-P9-NEXT:    xscvsxdsp f7, f7
-; CHECK-P9-NEXT:    xscvsxdsp f6, f6
-; CHECK-P9-NEXT:    xscvsxdsp f5, f5
-; CHECK-P9-NEXT:    xscvsxdsp f4, f4
-; CHECK-P9-NEXT:    xscvsxdsp f11, f11
-; CHECK-P9-NEXT:    xscvsxdsp f10, f10
-; CHECK-P9-NEXT:    xscvsxdsp f9, f9
-; CHECK-P9-NEXT:    xscvsxdsp f8, f8
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
-; CHECK-P9-NEXT:    xscvsxdsp f2, f2
-; CHECK-P9-NEXT:    xscvsxdsp f3, f3
-; CHECK-P9-NEXT:    xscvsxdsp f12, f12
-; CHECK-P9-NEXT:    xscvsxdsp f13, f13
-; CHECK-P9-NEXT:    xscvsxdsp f31, f31
-; CHECK-P9-NEXT:    xscvsxdsp f30, f30
-; CHECK-P9-NEXT:    xxmrghd vs6, vs6, vs7
-; CHECK-P9-NEXT:    xxmrghd vs4, vs4, vs5
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs10, vs11
-; CHECK-P9-NEXT:    xxmrghd vs3, vs8, vs9
-; CHECK-P9-NEXT:    xxmrghd vs5, vs13, vs12
-; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs4
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs1
-; CHECK-P9-NEXT:    xvcvdpsp v0, vs5
-; CHECK-P9-NEXT:    xvcvdpsp v1, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v6, vs7
-; CHECK-P9-NEXT:    xvcvdpsp v7, vs3
-; CHECK-P9-NEXT:    vmrgew v2, v2, v4
-; CHECK-P9-NEXT:    vmrgew v3, v3, v5
-; CHECK-P9-NEXT:    vmrgew v4, v1, v0
-; CHECK-P9-NEXT:    vmrgew v5, v7, v6
-; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    lxv v5, 0(r4)
+; CHECK-P9-NEXT:    lxv v0, 112(r4)
+; CHECK-P9-NEXT:    lxv v1, 96(r4)
+; CHECK-P9-NEXT:    lxv v6, 80(r4)
+; CHECK-P9-NEXT:    lxv v7, 64(r4)
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v5
+; CHECK-P9-NEXT:    xvcvsxdsp vs1, v4
+; CHECK-P9-NEXT:    xvcvsxdsp vs2, v3
+; CHECK-P9-NEXT:    xvcvsxdsp vs3, v2
+; CHECK-P9-NEXT:    xvcvsxdsp vs4, v7
+; CHECK-P9-NEXT:    xvcvsxdsp vs5, v6
+; CHECK-P9-NEXT:    xvcvsxdsp vs6, v1
+; CHECK-P9-NEXT:    xvcvsxdsp vs7, v0
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-P9-NEXT:    xxsldwi v0, vs4, vs4, 3
+; CHECK-P9-NEXT:    xxsldwi v1, vs5, vs5, 3
+; CHECK-P9-NEXT:    xxsldwi v6, vs6, vs6, 3
+; CHECK-P9-NEXT:    xxsldwi v7, vs7, vs7, 3
+; CHECK-P9-NEXT:    vpkudum v2, v3, v2
+; CHECK-P9-NEXT:    vpkudum v3, v5, v4
+; CHECK-P9-NEXT:    vpkudum v4, v1, v0
+; CHECK-P9-NEXT:    vpkudum v5, v7, v6
 ; CHECK-P9-NEXT:    stxv v3, 16(r3)
 ; CHECK-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    stxv v4, 32(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 32(r4)
-; CHECK-BE-NEXT:    lxv vs3, 48(r4)
-; CHECK-BE-NEXT:    lxv vs4, 0(r4)
-; CHECK-BE-NEXT:    lxv vs5, 16(r4)
-; CHECK-BE-NEXT:    lxv vs6, 96(r4)
-; CHECK-BE-NEXT:    lxv vs7, 112(r4)
-; CHECK-BE-NEXT:    lxv vs8, 64(r4)
-; CHECK-BE-NEXT:    lxv vs9, 80(r4)
-; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxswapd vs0, vs5
-; CHECK-BE-NEXT:    xxswapd vs1, vs4
-; CHECK-BE-NEXT:    xxswapd vs10, vs3
-; CHECK-BE-NEXT:    xxswapd vs11, vs2
-; CHECK-BE-NEXT:    xxswapd vs12, vs9
-; CHECK-BE-NEXT:    xxswapd vs13, vs8
-; CHECK-BE-NEXT:    xxswapd vs31, vs7
-; CHECK-BE-NEXT:    xxswapd vs30, vs6
-; CHECK-BE-NEXT:    xscvsxdsp f5, f5
-; CHECK-BE-NEXT:    xscvsxdsp f4, f4
-; CHECK-BE-NEXT:    xscvsxdsp f3, f3
-; CHECK-BE-NEXT:    xscvsxdsp f2, f2
-; CHECK-BE-NEXT:    xscvsxdsp f9, f9
-; CHECK-BE-NEXT:    xscvsxdsp f8, f8
-; CHECK-BE-NEXT:    xscvsxdsp f7, f7
-; CHECK-BE-NEXT:    xscvsxdsp f6, f6
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
-; CHECK-BE-NEXT:    xscvsxdsp f10, f10
-; CHECK-BE-NEXT:    xscvsxdsp f11, f11
-; CHECK-BE-NEXT:    xscvsxdsp f12, f12
-; CHECK-BE-NEXT:    xscvsxdsp f13, f13
-; CHECK-BE-NEXT:    xscvsxdsp f31, f31
-; CHECK-BE-NEXT:    xscvsxdsp f30, f30
-; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs5
-; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
-; CHECK-BE-NEXT:    xxmrghd vs3, vs8, vs9
-; CHECK-BE-NEXT:    xxmrghd vs5, vs6, vs7
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs11, vs10
-; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs4
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v0, vs3
-; CHECK-BE-NEXT:    xvcvdpsp v6, vs5
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs1
-; CHECK-BE-NEXT:    xvcvdpsp v1, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
-; CHECK-BE-NEXT:    vmrgew v2, v2, v4
-; CHECK-BE-NEXT:    vmrgew v3, v3, v5
-; CHECK-BE-NEXT:    vmrgew v4, v0, v1
-; CHECK-BE-NEXT:    vmrgew v5, v6, v7
-; CHECK-BE-NEXT:    stxv v5, 48(r3)
-; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    lxv v2, 32(r4)
+; CHECK-BE-NEXT:    lxv v3, 48(r4)
+; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    lxv v0, 96(r4)
+; CHECK-BE-NEXT:    lxv v1, 112(r4)
+; CHECK-BE-NEXT:    lxv v6, 64(r4)
+; CHECK-BE-NEXT:    lxv v7, 80(r4)
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v5
+; CHECK-BE-NEXT:    xvcvsxdsp vs1, v4
+; CHECK-BE-NEXT:    xvcvsxdsp vs2, v3
+; CHECK-BE-NEXT:    xvcvsxdsp vs3, v2
+; CHECK-BE-NEXT:    xvcvsxdsp vs4, v7
+; CHECK-BE-NEXT:    xvcvsxdsp vs5, v6
+; CHECK-BE-NEXT:    xvcvsxdsp vs6, v1
+; CHECK-BE-NEXT:    xvcvsxdsp vs7, v0
+; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-BE-NEXT:    xxsldwi v0, vs4, vs4, 3
+; CHECK-BE-NEXT:    xxsldwi v1, vs5, vs5, 3
+; CHECK-BE-NEXT:    xxsldwi v6, vs6, vs6, 3
+; CHECK-BE-NEXT:    xxsldwi v7, vs7, vs7, 3
+; CHECK-BE-NEXT:    vpkudum v2, v3, v2
+; CHECK-BE-NEXT:    vpkudum v3, v5, v4
+; CHECK-BE-NEXT:    vpkudum v4, v1, v0
+; CHECK-BE-NEXT:    vpkudum v5, v7, v6
 ; CHECK-BE-NEXT:    stxv v3, 16(r3)
 ; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    stxv v4, 32(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i64>, <16 x i64>* %0, align 128

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll?rev=350155&r1=350154&r2=350155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll Sat Dec 29 05:40:48 2018
@@ -80,89 +80,36 @@ entry:
 define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
-; CHECK-P8-NEXT:    clrldi r4, r3, 56
-; CHECK-P8-NEXT:    rldicl r5, r3, 48, 56
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f0, r4
-; CHECK-P8-NEXT:    rldicl r4, r3, 56, 56
-; CHECK-P8-NEXT:    rldicl r3, r3, 40, 56
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r3, r3, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f1, r5
-; CHECK-P8-NEXT:    mtvsrwz f2, r4
-; CHECK-P8-NEXT:    mtvsrwz f3, r3
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
-; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xscvuxdsp f2, f2
-; CHECK-P8-NEXT:    xscvuxdsp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    addi r3, r4, .LCPI1_0 at toc@l
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    li r4, 2
-; CHECK-P9-NEXT:    li r5, 1
-; CHECK-P9-NEXT:    li r6, 3
-; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P9-NEXT:    mtvsrwz f0, r3
-; CHECK-P9-NEXT:    mtvsrwz f1, r4
-; CHECK-P9-NEXT:    mtvsrwz f2, r5
-; CHECK-P9-NEXT:    mtvsrwz f3, r6
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
-; CHECK-P9-NEXT:    xscvuxdsp f2, f2
-; CHECK-P9-NEXT:    xscvuxdsp f3, f3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v2, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P9-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    li r3, 3
-; CHECK-BE-NEXT:    li r4, 1
-; CHECK-BE-NEXT:    li r5, 2
-; CHECK-BE-NEXT:    li r6, 0
-; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-BE-NEXT:    mtvsrwz f0, r3
-; CHECK-BE-NEXT:    mtvsrwz f1, r4
-; CHECK-BE-NEXT:    mtvsrwz f2, r5
-; CHECK-BE-NEXT:    mtvsrwz f3, r6
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
-; CHECK-BE-NEXT:    xscvuxdsp f2, f2
-; CHECK-BE-NEXT:    xscvuxdsp f3, f3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i32 %a.coerce to <4 x i8>
@@ -173,168 +120,59 @@ entry:
 define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r4
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    mfvsrd r4, f0
-; CHECK-P8-NEXT:    clrldi r6, r4, 56
-; CHECK-P8-NEXT:    rldicl r7, r4, 48, 56
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f0, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f1, r7
-; CHECK-P8-NEXT:    rldicl r7, r4, 40, 56
-; CHECK-P8-NEXT:    mtvsrwz f2, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 32, 56
-; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f3, r7
-; CHECK-P8-NEXT:    rldicl r7, r4, 16, 56
-; CHECK-P8-NEXT:    mtvsrwz f4, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
-; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
-; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f5, r7
-; CHECK-P8-NEXT:    mtvsrwz f6, r6
-; CHECK-P8-NEXT:    mtvsrwz f7, r4
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
-; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xscvuxdsp f2, f2
-; CHECK-P8-NEXT:    xscvuxdsp f3, f3
-; CHECK-P8-NEXT:    xscvuxdsp f4, f4
-; CHECK-P8-NEXT:    xscvuxdsp f5, f5
-; CHECK-P8-NEXT:    xscvuxdsp f6, f6
-; CHECK-P8-NEXT:    xscvuxdsp f7, f7
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs3
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
-; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P8-NEXT:    vperm v3, v4, v3, v5
+; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
+; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
 ; CHECK-P8-NEXT:    stvx v2, 0, r3
-; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    stvx v3, r3, r4
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r4
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    li r5, 2
-; CHECK-P9-NEXT:    li r6, 1
-; CHECK-P9-NEXT:    li r7, 3
-; CHECK-P9-NEXT:    li r8, 4
-; CHECK-P9-NEXT:    li r9, 6
-; CHECK-P9-NEXT:    li r10, 5
-; CHECK-P9-NEXT:    li r11, 7
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    vextubrx r7, r7, v2
-; CHECK-P9-NEXT:    vextubrx r8, r8, v2
-; CHECK-P9-NEXT:    vextubrx r9, r9, v2
-; CHECK-P9-NEXT:    vextubrx r10, r10, v2
-; CHECK-P9-NEXT:    vextubrx r11, r11, v2
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 24, 31
-; CHECK-P9-NEXT:    mtvsrwz f0, r4
-; CHECK-P9-NEXT:    mtvsrwz f1, r5
-; CHECK-P9-NEXT:    mtvsrwz f2, r6
-; CHECK-P9-NEXT:    mtvsrwz f3, r7
-; CHECK-P9-NEXT:    mtvsrwz f4, r8
-; CHECK-P9-NEXT:    mtvsrwz f5, r9
-; CHECK-P9-NEXT:    mtvsrwz f6, r10
-; CHECK-P9-NEXT:    mtvsrwz f7, r11
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
-; CHECK-P9-NEXT:    xscvuxdsp f2, f2
-; CHECK-P9-NEXT:    xscvuxdsp f3, f3
-; CHECK-P9-NEXT:    xscvuxdsp f4, f4
-; CHECK-P9-NEXT:    xscvuxdsp f5, f5
-; CHECK-P9-NEXT:    xscvuxdsp f6, f6
-; CHECK-P9-NEXT:    xscvuxdsp f7, f7
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
-; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    xxlxor v5, v5, v5
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI2_1 at toc@l
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    lxvx v2, 0, r5
+; CHECK-P9-NEXT:    lxvx v3, 0, r6
+; CHECK-P9-NEXT:    vperm v2, v5, v4, v2
+; CHECK-P9-NEXT:    vperm v3, v5, v4, v3
+; CHECK-P9-NEXT:    xvcvuxwsp vs0, v2
+; CHECK-P9-NEXT:    xvcvuxwsp vs1, v3
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r5, 3
-; CHECK-BE-NEXT:    mtvsrd v2, r4
-; CHECK-BE-NEXT:    li r4, 1
-; CHECK-BE-NEXT:    li r6, 2
-; CHECK-BE-NEXT:    li r7, 0
-; CHECK-BE-NEXT:    li r8, 7
-; CHECK-BE-NEXT:    li r9, 5
-; CHECK-BE-NEXT:    li r10, 6
-; CHECK-BE-NEXT:    li r11, 4
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    vextublx r7, r7, v2
-; CHECK-BE-NEXT:    vextublx r8, r8, v2
-; CHECK-BE-NEXT:    vextublx r9, r9, v2
-; CHECK-BE-NEXT:    vextublx r10, r10, v2
-; CHECK-BE-NEXT:    vextublx r11, r11, v2
-; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 24, 31
-; CHECK-BE-NEXT:    mtvsrwz f0, r5
-; CHECK-BE-NEXT:    mtvsrwz f1, r4
-; CHECK-BE-NEXT:    mtvsrwz f2, r6
-; CHECK-BE-NEXT:    mtvsrwz f3, r7
-; CHECK-BE-NEXT:    mtvsrwz f4, r8
-; CHECK-BE-NEXT:    mtvsrwz f5, r9
-; CHECK-BE-NEXT:    mtvsrwz f6, r10
-; CHECK-BE-NEXT:    mtvsrwz f7, r11
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
-; CHECK-BE-NEXT:    xscvuxdsp f2, f2
-; CHECK-BE-NEXT:    xscvuxdsp f3, f3
-; CHECK-BE-NEXT:    xscvuxdsp f4, f4
-; CHECK-BE-NEXT:    xscvuxdsp f5, f5
-; CHECK-BE-NEXT:    xscvuxdsp f6, f6
-; CHECK-BE-NEXT:    xscvuxdsp f7, f7
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
-; CHECK-BE-NEXT:    vmrgew v2, v3, v2
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI2_1 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    lxvx v3, 0, r6
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    vperm v3, v5, v4, v3
+; CHECK-BE-NEXT:    xvcvuxwsp vs0, v2
+; CHECK-BE-NEXT:    xvcvuxwsp vs1, v3
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <8 x i8>
@@ -346,348 +184,92 @@ entry:
 define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i8> %a) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mfvsrd r4, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
-; CHECK-P8-NEXT:    clrldi r5, r4, 56
-; CHECK-P8-NEXT:    rldicl r6, r4, 48, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f0, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 40, 56
-; CHECK-P8-NEXT:    rldicl r7, r4, 56, 56
-; CHECK-P8-NEXT:    mtvsrwz f1, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 32, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f4, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 16, 56
-; CHECK-P8-NEXT:    mtvsrwz f3, r7
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f5, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
-; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
-; CHECK-P8-NEXT:    mfvsrd r7, f2
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f2, r5
-; CHECK-P8-NEXT:    rlwinm r5, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f6, r5
-; CHECK-P8-NEXT:    clrldi r5, r7, 56
-; CHECK-P8-NEXT:    mtvsrwz f7, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 48, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f8, r5
-; CHECK-P8-NEXT:    rldicl r5, r7, 56, 56
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f9, r4
-; CHECK-P8-NEXT:    rlwinm r4, r5, 0, 24, 31
-; CHECK-P8-NEXT:    rldicl r5, r7, 8, 56
-; CHECK-P8-NEXT:    mtvsrwz f10, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 40, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
-; CHECK-P8-NEXT:    mtvsrwz f11, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 32, 56
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xscvuxdsp f3, f3
-; CHECK-P8-NEXT:    xscvuxdsp f4, f4
-; CHECK-P8-NEXT:    mtvsrwz f12, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 16, 56
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    xscvuxdsp f5, f5
-; CHECK-P8-NEXT:    mtvsrwz f13, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 24, 56
-; CHECK-P8-NEXT:    xscvuxdsp f2, f2
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_2 at toc@ha
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_2 at toc@l
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
+; CHECK-P8-NEXT:    lvx v5, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
+; CHECK-P8-NEXT:    lvx v0, 0, r4
+; CHECK-P8-NEXT:    lvx v1, 0, r5
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    mtvsrwz v3, r5
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
 ; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xscvuxdsp f6, f6
-; CHECK-P8-NEXT:    xscvuxdsp f7, f7
-; CHECK-P8-NEXT:    xscvuxdsp f8, f8
-; CHECK-P8-NEXT:    xscvuxdsp f9, f9
-; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
-; CHECK-P8-NEXT:    xscvuxdsp f10, f10
-; CHECK-P8-NEXT:    xscvuxdsp f11, f11
-; CHECK-P8-NEXT:    xscvuxdsp f12, f12
-; CHECK-P8-NEXT:    xscvuxdsp f13, f13
-; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
-; CHECK-P8-NEXT:    xscvuxdsp f1, v2
-; CHECK-P8-NEXT:    xscvuxdsp f4, v3
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xxmrghd vs0, vs9, vs8
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs3
-; CHECK-P8-NEXT:    xxmrghd vs3, vs11, vs10
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P8-NEXT:    xxmrghd vs2, vs13, vs12
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs5
-; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs4, vs1
-; CHECK-P8-NEXT:    xvcvdpsp v1, vs3
-; CHECK-P8-NEXT:    xvcvdpsp v6, vs2
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
-; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
-; CHECK-P8-NEXT:    vmrgew v3, v5, v4
-; CHECK-P8-NEXT:    vmrgew v4, v1, v0
-; CHECK-P8-NEXT:    stvx v2, r3, r5
-; CHECK-P8-NEXT:    vmrgew v5, v7, v6
-; CHECK-P8-NEXT:    stvx v3, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stvx v4, 0, r3
+; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
+; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v1
+; CHECK-P8-NEXT:    xvcvuxwsp v4, v5
+; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
+; CHECK-P8-NEXT:    xvcvuxwsp v5, v0
+; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
+; CHECK-P8-NEXT:    stvx v4, r3, r5
+; CHECK-P8-NEXT:    stvx v3, 0, r3
 ; CHECK-P8-NEXT:    stvx v5, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stvx v2, r3, r4
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    li r5, 2
-; CHECK-P9-NEXT:    li r6, 1
-; CHECK-P9-NEXT:    li r7, 3
-; CHECK-P9-NEXT:    li r8, 4
-; CHECK-P9-NEXT:    li r9, 6
-; CHECK-P9-NEXT:    li r10, 5
-; CHECK-P9-NEXT:    li r11, 7
-; CHECK-P9-NEXT:    li r12, 8
-; CHECK-P9-NEXT:    li r0, 10
-; CHECK-P9-NEXT:    li r30, 9
-; CHECK-P9-NEXT:    li r29, 11
-; CHECK-P9-NEXT:    li r28, 12
-; CHECK-P9-NEXT:    li r27, 14
-; CHECK-P9-NEXT:    li r26, 13
-; CHECK-P9-NEXT:    li r25, 15
-; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    vextubrx r7, r7, v2
-; CHECK-P9-NEXT:    vextubrx r8, r8, v2
-; CHECK-P9-NEXT:    vextubrx r9, r9, v2
-; CHECK-P9-NEXT:    vextubrx r10, r10, v2
-; CHECK-P9-NEXT:    vextubrx r11, r11, v2
-; CHECK-P9-NEXT:    vextubrx r12, r12, v2
-; CHECK-P9-NEXT:    vextubrx r0, r0, v2
-; CHECK-P9-NEXT:    vextubrx r30, r30, v2
-; CHECK-P9-NEXT:    vextubrx r29, r29, v2
-; CHECK-P9-NEXT:    vextubrx r28, r28, v2
-; CHECK-P9-NEXT:    vextubrx r27, r27, v2
-; CHECK-P9-NEXT:    vextubrx r26, r26, v2
-; CHECK-P9-NEXT:    vextubrx r25, r25, v2
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r12, r12, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r0, r0, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r30, r30, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r29, r29, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r28, r28, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r27, r27, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r26, r26, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r25, r25, 0, 24, 31
-; CHECK-P9-NEXT:    mtvsrwz f0, r4
-; CHECK-P9-NEXT:    mtvsrwz f1, r5
-; CHECK-P9-NEXT:    mtvsrwz f2, r6
-; CHECK-P9-NEXT:    mtvsrwz f3, r7
-; CHECK-P9-NEXT:    mtvsrwz f4, r8
-; CHECK-P9-NEXT:    mtvsrwz f5, r9
-; CHECK-P9-NEXT:    mtvsrwz f6, r10
-; CHECK-P9-NEXT:    mtvsrwz f7, r11
-; CHECK-P9-NEXT:    mtvsrwz f8, r12
-; CHECK-P9-NEXT:    mtvsrwz f9, r0
-; CHECK-P9-NEXT:    mtvsrwz f10, r30
-; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f11, r29
-; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f12, r28
-; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f13, r27
-; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz v2, r26
-; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz v3, r25
-; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
-; CHECK-P9-NEXT:    xscvuxdsp f2, f2
-; CHECK-P9-NEXT:    xscvuxdsp f3, f3
-; CHECK-P9-NEXT:    xscvuxdsp f4, f4
-; CHECK-P9-NEXT:    xscvuxdsp f5, f5
-; CHECK-P9-NEXT:    xscvuxdsp f6, f6
-; CHECK-P9-NEXT:    xscvuxdsp f7, f7
-; CHECK-P9-NEXT:    xscvuxdsp f8, f8
-; CHECK-P9-NEXT:    xscvuxdsp f9, f9
-; CHECK-P9-NEXT:    xscvuxdsp f10, f10
-; CHECK-P9-NEXT:    xscvuxdsp f11, f11
-; CHECK-P9-NEXT:    xscvuxdsp f12, f12
-; CHECK-P9-NEXT:    xscvuxdsp f13, f13
-; CHECK-P9-NEXT:    xscvuxdsp f31, v2
-; CHECK-P9-NEXT:    xscvuxdsp f30, v3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
-; CHECK-P9-NEXT:    xvcvdpsp v0, vs4
-; CHECK-P9-NEXT:    xvcvdpsp v1, vs5
-; CHECK-P9-NEXT:    xvcvdpsp v6, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v7, vs7
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    vmrgew v4, v1, v0
-; CHECK-P9-NEXT:    vmrgew v5, v7, v6
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
-; CHECK-P9-NEXT:    stxv v2, 0(r3)
-; CHECK-P9-NEXT:    stxv v5, 48(r3)
-; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
+; CHECK-P9-NEXT:    addis r7, r2, .LCPI3_3 at toc@ha
+; CHECK-P9-NEXT:    xxlxor v1, v1, v1
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
+; CHECK-P9-NEXT:    addi r7, r7, .LCPI3_3 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    lxvx v4, 0, r5
+; CHECK-P9-NEXT:    lxvx v5, 0, r6
+; CHECK-P9-NEXT:    lxvx v0, 0, r7
+; CHECK-P9-NEXT:    vperm v3, v1, v2, v3
+; CHECK-P9-NEXT:    vperm v4, v1, v2, v4
+; CHECK-P9-NEXT:    vperm v5, v1, v2, v5
+; CHECK-P9-NEXT:    vperm v2, v1, v2, v0
+; CHECK-P9-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-P9-NEXT:    xvcvuxwsp vs1, v4
+; CHECK-P9-NEXT:    xvcvuxwsp vs2, v5
+; CHECK-P9-NEXT:    xvcvuxwsp vs3, v2
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    li r4, 3
-; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    li r5, 1
-; CHECK-BE-NEXT:    li r6, 2
-; CHECK-BE-NEXT:    li r7, 0
-; CHECK-BE-NEXT:    li r8, 7
-; CHECK-BE-NEXT:    li r9, 5
-; CHECK-BE-NEXT:    li r10, 6
-; CHECK-BE-NEXT:    li r11, 4
-; CHECK-BE-NEXT:    li r12, 11
-; CHECK-BE-NEXT:    li r0, 9
-; CHECK-BE-NEXT:    li r30, 10
-; CHECK-BE-NEXT:    li r29, 8
-; CHECK-BE-NEXT:    li r28, 15
-; CHECK-BE-NEXT:    li r27, 13
-; CHECK-BE-NEXT:    li r26, 14
-; CHECK-BE-NEXT:    li r25, 12
-; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    vextublx r7, r7, v2
-; CHECK-BE-NEXT:    vextublx r8, r8, v2
-; CHECK-BE-NEXT:    vextublx r9, r9, v2
-; CHECK-BE-NEXT:    vextublx r10, r10, v2
-; CHECK-BE-NEXT:    vextublx r11, r11, v2
-; CHECK-BE-NEXT:    vextublx r12, r12, v2
-; CHECK-BE-NEXT:    vextublx r0, r0, v2
-; CHECK-BE-NEXT:    vextublx r30, r30, v2
-; CHECK-BE-NEXT:    vextublx r29, r29, v2
-; CHECK-BE-NEXT:    vextublx r28, r28, v2
-; CHECK-BE-NEXT:    vextublx r27, r27, v2
-; CHECK-BE-NEXT:    vextublx r26, r26, v2
-; CHECK-BE-NEXT:    vextublx r25, r25, v2
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r12, r12, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r0, r0, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r30, r30, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r29, r29, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r28, r28, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r27, r27, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r26, r26, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r25, r25, 0, 24, 31
-; CHECK-BE-NEXT:    mtvsrwz f0, r4
-; CHECK-BE-NEXT:    mtvsrwz f1, r5
-; CHECK-BE-NEXT:    mtvsrwz f2, r6
-; CHECK-BE-NEXT:    mtvsrwz f3, r7
-; CHECK-BE-NEXT:    mtvsrwz f4, r8
-; CHECK-BE-NEXT:    mtvsrwz f5, r9
-; CHECK-BE-NEXT:    mtvsrwz f6, r10
-; CHECK-BE-NEXT:    mtvsrwz f7, r11
-; CHECK-BE-NEXT:    mtvsrwz f8, r12
-; CHECK-BE-NEXT:    mtvsrwz f9, r0
-; CHECK-BE-NEXT:    mtvsrwz f10, r30
-; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f11, r29
-; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f12, r28
-; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f13, r27
-; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz v2, r26
-; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz v3, r25
-; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
-; CHECK-BE-NEXT:    xscvuxdsp f2, f2
-; CHECK-BE-NEXT:    xscvuxdsp f3, f3
-; CHECK-BE-NEXT:    xscvuxdsp f4, f4
-; CHECK-BE-NEXT:    xscvuxdsp f5, f5
-; CHECK-BE-NEXT:    xscvuxdsp f6, f6
-; CHECK-BE-NEXT:    xscvuxdsp f7, f7
-; CHECK-BE-NEXT:    xscvuxdsp f8, f8
-; CHECK-BE-NEXT:    xscvuxdsp f9, f9
-; CHECK-BE-NEXT:    xscvuxdsp f10, f10
-; CHECK-BE-NEXT:    xscvuxdsp f11, f11
-; CHECK-BE-NEXT:    xscvuxdsp f12, f12
-; CHECK-BE-NEXT:    xscvuxdsp f13, f13
-; CHECK-BE-NEXT:    xscvuxdsp f31, v2
-; CHECK-BE-NEXT:    xscvuxdsp f30, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
-; CHECK-BE-NEXT:    xvcvdpsp v0, vs4
-; CHECK-BE-NEXT:    xvcvdpsp v1, vs5
-; CHECK-BE-NEXT:    xvcvdpsp v6, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
-; CHECK-BE-NEXT:    vmrgew v2, v3, v2
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    vmrgew v4, v1, v0
-; CHECK-BE-NEXT:    vmrgew v5, v7, v6
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v5, 48(r3)
-; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
+; CHECK-BE-NEXT:    addis r7, r2, .LCPI3_3 at toc@ha
+; CHECK-BE-NEXT:    xxlxor v1, v1, v1
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
+; CHECK-BE-NEXT:    addi r7, r7, .LCPI3_3 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    lxvx v4, 0, r5
+; CHECK-BE-NEXT:    lxvx v5, 0, r6
+; CHECK-BE-NEXT:    lxvx v0, 0, r7
+; CHECK-BE-NEXT:    vperm v3, v2, v1, v3
+; CHECK-BE-NEXT:    vperm v4, v1, v2, v4
+; CHECK-BE-NEXT:    vperm v5, v1, v2, v5
+; CHECK-BE-NEXT:    vperm v2, v1, v2, v0
+; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-BE-NEXT:    xvcvuxwsp vs1, v4
+; CHECK-BE-NEXT:    xvcvuxwsp vs2, v5
+; CHECK-BE-NEXT:    xvcvuxwsp vs3, v2
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = uitofp <16 x i8> %a to <16 x float>
@@ -766,89 +348,39 @@ entry:
 define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
-; CHECK-P8-NEXT:    clrldi r4, r3, 56
-; CHECK-P8-NEXT:    rldicl r5, r3, 48, 56
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f0, r4
-; CHECK-P8-NEXT:    rldicl r4, r3, 56, 56
-; CHECK-P8-NEXT:    rldicl r3, r3, 40, 56
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    mtvsrwa f1, r5
-; CHECK-P8-NEXT:    mtvsrwa f2, r4
-; CHECK-P8-NEXT:    mtvsrwa f3, r3
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
-; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xscvsxdsp f2, f2
-; CHECK-P8-NEXT:    xscvsxdsp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
+; CHECK-P8-NEXT:    addi r3, r4, .LCPI5_0 at toc@l
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P8-NEXT:    vspltisw v3, 12
+; CHECK-P8-NEXT:    vadduwm v3, v3, v3
+; CHECK-P8-NEXT:    vslw v2, v2, v3
+; CHECK-P8-NEXT:    vsraw v2, v2, v3
+; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    li r4, 2
-; CHECK-P9-NEXT:    li r5, 1
-; CHECK-P9-NEXT:    li r6, 3
-; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    extsb r4, r4
-; CHECK-P9-NEXT:    extsb r5, r5
-; CHECK-P9-NEXT:    extsb r6, r6
-; CHECK-P9-NEXT:    mtvsrwa f0, r3
-; CHECK-P9-NEXT:    mtvsrwa f1, r4
-; CHECK-P9-NEXT:    mtvsrwa f2, r5
-; CHECK-P9-NEXT:    mtvsrwa f3, r6
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
-; CHECK-P9-NEXT:    xscvsxdsp f2, f2
-; CHECK-P9-NEXT:    xscvsxdsp f3, f3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
+; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v2, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P9-NEXT:    vextsb2w v2, v2
+; CHECK-P9-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    li r3, 3
-; CHECK-BE-NEXT:    li r4, 1
-; CHECK-BE-NEXT:    li r5, 2
-; CHECK-BE-NEXT:    li r6, 0
-; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    extsb r3, r3
-; CHECK-BE-NEXT:    extsb r4, r4
-; CHECK-BE-NEXT:    extsb r5, r5
-; CHECK-BE-NEXT:    extsb r6, r6
-; CHECK-BE-NEXT:    mtvsrwa f0, r3
-; CHECK-BE-NEXT:    mtvsrwa f1, r4
-; CHECK-BE-NEXT:    mtvsrwa f2, r5
-; CHECK-BE-NEXT:    mtvsrwa f3, r6
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
-; CHECK-BE-NEXT:    xscvsxdsp f2, f2
-; CHECK-BE-NEXT:    xscvsxdsp f3, f3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    vmrgew v2, v3, v2
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v3, v3, v2
+; CHECK-BE-NEXT:    vextsb2w v2, v2
+; CHECK-BE-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i32 %a.coerce to <4 x i8>
@@ -859,168 +391,67 @@ entry:
 define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r4
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    mfvsrd r4, f0
-; CHECK-P8-NEXT:    clrldi r6, r4, 56
-; CHECK-P8-NEXT:    rldicl r7, r4, 48, 56
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    extsb r7, r7
-; CHECK-P8-NEXT:    mtvsrwa f0, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f1, r7
-; CHECK-P8-NEXT:    rldicl r7, r4, 40, 56
-; CHECK-P8-NEXT:    mtvsrwa f2, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 32, 56
-; CHECK-P8-NEXT:    extsb r7, r7
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f3, r7
-; CHECK-P8-NEXT:    rldicl r7, r4, 16, 56
-; CHECK-P8-NEXT:    mtvsrwa f4, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
-; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
-; CHECK-P8-NEXT:    extsb r7, r7
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f5, r7
-; CHECK-P8-NEXT:    mtvsrwa f6, r6
-; CHECK-P8-NEXT:    mtvsrwa f7, r4
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
-; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xscvsxdsp f2, f2
-; CHECK-P8-NEXT:    xscvsxdsp f3, f3
-; CHECK-P8-NEXT:    xscvsxdsp f4, f4
-; CHECK-P8-NEXT:    xscvsxdsp f5, f5
-; CHECK-P8-NEXT:    xscvsxdsp f6, f6
-; CHECK-P8-NEXT:    xscvsxdsp f7, f7
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P8-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs3
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
-; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
+; CHECK-P8-NEXT:    vspltisw v5, 12
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
+; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
+; CHECK-P8-NEXT:    vadduwm v4, v5, v5
+; CHECK-P8-NEXT:    vslw v2, v2, v4
+; CHECK-P8-NEXT:    vslw v3, v3, v4
+; CHECK-P8-NEXT:    vsraw v2, v2, v4
+; CHECK-P8-NEXT:    vsraw v3, v3, v4
+; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
+; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
 ; CHECK-P8-NEXT:    stvx v2, 0, r3
-; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    stvx v3, r3, r4
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI6_1 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r4
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    li r5, 2
-; CHECK-P9-NEXT:    li r6, 1
-; CHECK-P9-NEXT:    li r7, 3
-; CHECK-P9-NEXT:    li r8, 4
-; CHECK-P9-NEXT:    li r9, 6
-; CHECK-P9-NEXT:    li r10, 5
-; CHECK-P9-NEXT:    li r11, 7
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    vextubrx r7, r7, v2
-; CHECK-P9-NEXT:    vextubrx r8, r8, v2
-; CHECK-P9-NEXT:    vextubrx r9, r9, v2
-; CHECK-P9-NEXT:    vextubrx r10, r10, v2
-; CHECK-P9-NEXT:    vextubrx r11, r11, v2
-; CHECK-P9-NEXT:    extsb r4, r4
-; CHECK-P9-NEXT:    extsb r5, r5
-; CHECK-P9-NEXT:    extsb r6, r6
-; CHECK-P9-NEXT:    extsb r7, r7
-; CHECK-P9-NEXT:    extsb r8, r8
-; CHECK-P9-NEXT:    extsb r9, r9
-; CHECK-P9-NEXT:    extsb r10, r10
-; CHECK-P9-NEXT:    extsb r11, r11
-; CHECK-P9-NEXT:    mtvsrwa f0, r4
-; CHECK-P9-NEXT:    mtvsrwa f1, r5
-; CHECK-P9-NEXT:    mtvsrwa f2, r6
-; CHECK-P9-NEXT:    mtvsrwa f3, r7
-; CHECK-P9-NEXT:    mtvsrwa f4, r8
-; CHECK-P9-NEXT:    mtvsrwa f5, r9
-; CHECK-P9-NEXT:    mtvsrwa f6, r10
-; CHECK-P9-NEXT:    mtvsrwa f7, r11
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
-; CHECK-P9-NEXT:    xscvsxdsp f2, f2
-; CHECK-P9-NEXT:    xscvsxdsp f3, f3
-; CHECK-P9-NEXT:    xscvsxdsp f4, f4
-; CHECK-P9-NEXT:    xscvsxdsp f5, f5
-; CHECK-P9-NEXT:    xscvsxdsp f6, f6
-; CHECK-P9-NEXT:    xscvsxdsp f7, f7
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
-; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI6_1 at toc@l
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    lxvx v2, 0, r5
+; CHECK-P9-NEXT:    lxvx v3, 0, r6
+; CHECK-P9-NEXT:    vperm v2, v4, v4, v2
+; CHECK-P9-NEXT:    vperm v3, v4, v4, v3
+; CHECK-P9-NEXT:    vextsb2w v2, v2
+; CHECK-P9-NEXT:    vextsb2w v3, v3
+; CHECK-P9-NEXT:    xvcvsxwsp vs0, v2
+; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r5, 3
-; CHECK-BE-NEXT:    mtvsrd v2, r4
-; CHECK-BE-NEXT:    li r4, 1
-; CHECK-BE-NEXT:    li r6, 2
-; CHECK-BE-NEXT:    li r7, 0
-; CHECK-BE-NEXT:    li r8, 7
-; CHECK-BE-NEXT:    li r9, 5
-; CHECK-BE-NEXT:    li r10, 6
-; CHECK-BE-NEXT:    li r11, 4
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    vextublx r7, r7, v2
-; CHECK-BE-NEXT:    vextublx r8, r8, v2
-; CHECK-BE-NEXT:    vextublx r9, r9, v2
-; CHECK-BE-NEXT:    vextublx r10, r10, v2
-; CHECK-BE-NEXT:    vextublx r11, r11, v2
-; CHECK-BE-NEXT:    extsb r5, r5
-; CHECK-BE-NEXT:    extsb r4, r4
-; CHECK-BE-NEXT:    extsb r6, r6
-; CHECK-BE-NEXT:    extsb r7, r7
-; CHECK-BE-NEXT:    extsb r8, r8
-; CHECK-BE-NEXT:    extsb r9, r9
-; CHECK-BE-NEXT:    extsb r10, r10
-; CHECK-BE-NEXT:    extsb r11, r11
-; CHECK-BE-NEXT:    mtvsrwa f0, r5
-; CHECK-BE-NEXT:    mtvsrwa f1, r4
-; CHECK-BE-NEXT:    mtvsrwa f2, r6
-; CHECK-BE-NEXT:    mtvsrwa f3, r7
-; CHECK-BE-NEXT:    mtvsrwa f4, r8
-; CHECK-BE-NEXT:    mtvsrwa f5, r9
-; CHECK-BE-NEXT:    mtvsrwa f6, r10
-; CHECK-BE-NEXT:    mtvsrwa f7, r11
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
-; CHECK-BE-NEXT:    xscvsxdsp f2, f2
-; CHECK-BE-NEXT:    xscvsxdsp f3, f3
-; CHECK-BE-NEXT:    xscvsxdsp f4, f4
-; CHECK-BE-NEXT:    xscvsxdsp f5, f5
-; CHECK-BE-NEXT:    xscvsxdsp f6, f6
-; CHECK-BE-NEXT:    xscvsxdsp f7, f7
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
-; CHECK-BE-NEXT:    vmrgew v2, v3, v2
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI6_1 at toc@ha
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI6_1 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    lxvx v3, 0, r6
+; CHECK-BE-NEXT:    vperm v2, v5, v4, v2
+; CHECK-BE-NEXT:    vperm v3, v4, v4, v3
+; CHECK-BE-NEXT:    vextsb2w v2, v2
+; CHECK-BE-NEXT:    vextsb2w v3, v3
+; CHECK-BE-NEXT:    xvcvsxwsp vs0, v2
+; CHECK-BE-NEXT:    xvcvsxwsp vs1, v3
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <8 x i8>
@@ -1032,348 +463,108 @@ entry:
 define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result, <16 x i8> %a) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mfvsrd r4, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
-; CHECK-P8-NEXT:    clrldi r5, r4, 56
-; CHECK-P8-NEXT:    rldicl r6, r4, 48, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f0, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 40, 56
-; CHECK-P8-NEXT:    rldicl r7, r4, 56, 56
-; CHECK-P8-NEXT:    mtvsrwa f1, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 32, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    extsb r7, r7
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f4, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 16, 56
-; CHECK-P8-NEXT:    mtvsrwa f3, r7
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f5, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
-; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
-; CHECK-P8-NEXT:    mfvsrd r7, f2
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f2, r5
-; CHECK-P8-NEXT:    extsb r5, r6
-; CHECK-P8-NEXT:    mtvsrwa f6, r5
-; CHECK-P8-NEXT:    clrldi r5, r7, 56
-; CHECK-P8-NEXT:    mtvsrwa f7, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 48, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f8, r5
-; CHECK-P8-NEXT:    rldicl r5, r7, 56, 56
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f9, r4
-; CHECK-P8-NEXT:    extsb r4, r5
-; CHECK-P8-NEXT:    rldicl r5, r7, 8, 56
-; CHECK-P8-NEXT:    mtvsrwa f10, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 40, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
-; CHECK-P8-NEXT:    mtvsrwa f11, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 32, 56
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xscvsxdsp f3, f3
-; CHECK-P8-NEXT:    xscvsxdsp f4, f4
-; CHECK-P8-NEXT:    mtvsrwa f12, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 16, 56
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    xscvsxdsp f5, f5
-; CHECK-P8-NEXT:    mtvsrwa f13, r4
-; CHECK-P8-NEXT:    rldicl r4, r7, 24, 56
-; CHECK-P8-NEXT:    xscvsxdsp f2, f2
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    mtvsrwa v2, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_2 at toc@ha
+; CHECK-P8-NEXT:    vspltisw v1, 12
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_2 at toc@l
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
+; CHECK-P8-NEXT:    lvx v4, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
+; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    lvx v0, 0, r5
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    mtvsrwa v3, r5
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
 ; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xscvsxdsp f6, f6
-; CHECK-P8-NEXT:    xscvsxdsp f7, f7
-; CHECK-P8-NEXT:    xscvsxdsp f8, f8
-; CHECK-P8-NEXT:    xscvsxdsp f9, f9
-; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
-; CHECK-P8-NEXT:    xscvsxdsp f10, f10
-; CHECK-P8-NEXT:    xscvsxdsp f11, f11
-; CHECK-P8-NEXT:    xscvsxdsp f12, f12
-; CHECK-P8-NEXT:    xscvsxdsp f13, f13
-; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
-; CHECK-P8-NEXT:    xscvsxdsp f1, v2
-; CHECK-P8-NEXT:    xscvsxdsp f4, v3
-; CHECK-P8-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P8-NEXT:    xxmrghd vs0, vs9, vs8
-; CHECK-P8-NEXT:    xvcvdpsp v3, vs3
-; CHECK-P8-NEXT:    xxmrghd vs3, vs11, vs10
-; CHECK-P8-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P8-NEXT:    xxmrghd vs2, vs13, vs12
-; CHECK-P8-NEXT:    xvcvdpsp v5, vs5
-; CHECK-P8-NEXT:    xvcvdpsp v0, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs4, vs1
-; CHECK-P8-NEXT:    xvcvdpsp v1, vs3
-; CHECK-P8-NEXT:    xvcvdpsp v6, vs2
-; CHECK-P8-NEXT:    vmrgew v2, v3, v2
-; CHECK-P8-NEXT:    xvcvdpsp v7, vs1
-; CHECK-P8-NEXT:    vmrgew v3, v5, v4
-; CHECK-P8-NEXT:    vmrgew v4, v1, v0
-; CHECK-P8-NEXT:    stvx v2, r3, r5
-; CHECK-P8-NEXT:    vmrgew v5, v7, v6
-; CHECK-P8-NEXT:    stvx v3, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stvx v4, 0, r3
+; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v0
+; CHECK-P8-NEXT:    vadduwm v0, v1, v1
+; CHECK-P8-NEXT:    vslw v3, v3, v0
+; CHECK-P8-NEXT:    vslw v4, v4, v0
+; CHECK-P8-NEXT:    vslw v5, v5, v0
+; CHECK-P8-NEXT:    vslw v2, v2, v0
+; CHECK-P8-NEXT:    vsraw v3, v3, v0
+; CHECK-P8-NEXT:    vsraw v4, v4, v0
+; CHECK-P8-NEXT:    vsraw v5, v5, v0
+; CHECK-P8-NEXT:    vsraw v2, v2, v0
+; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
+; CHECK-P8-NEXT:    xvcvsxwsp v4, v4
+; CHECK-P8-NEXT:    xvcvsxwsp v5, v5
+; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
+; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    stvx v4, r3, r5
 ; CHECK-P8-NEXT:    stvx v5, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stvx v2, r3, r4
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    li r5, 2
-; CHECK-P9-NEXT:    li r6, 1
-; CHECK-P9-NEXT:    li r7, 3
-; CHECK-P9-NEXT:    li r8, 4
-; CHECK-P9-NEXT:    li r9, 6
-; CHECK-P9-NEXT:    li r10, 5
-; CHECK-P9-NEXT:    li r11, 7
-; CHECK-P9-NEXT:    li r12, 8
-; CHECK-P9-NEXT:    li r0, 10
-; CHECK-P9-NEXT:    li r30, 9
-; CHECK-P9-NEXT:    li r29, 11
-; CHECK-P9-NEXT:    li r28, 12
-; CHECK-P9-NEXT:    li r27, 14
-; CHECK-P9-NEXT:    li r26, 13
-; CHECK-P9-NEXT:    li r25, 15
-; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    vextubrx r7, r7, v2
-; CHECK-P9-NEXT:    vextubrx r8, r8, v2
-; CHECK-P9-NEXT:    vextubrx r9, r9, v2
-; CHECK-P9-NEXT:    vextubrx r10, r10, v2
-; CHECK-P9-NEXT:    vextubrx r11, r11, v2
-; CHECK-P9-NEXT:    vextubrx r12, r12, v2
-; CHECK-P9-NEXT:    vextubrx r0, r0, v2
-; CHECK-P9-NEXT:    vextubrx r30, r30, v2
-; CHECK-P9-NEXT:    vextubrx r29, r29, v2
-; CHECK-P9-NEXT:    vextubrx r28, r28, v2
-; CHECK-P9-NEXT:    vextubrx r27, r27, v2
-; CHECK-P9-NEXT:    vextubrx r26, r26, v2
-; CHECK-P9-NEXT:    vextubrx r25, r25, v2
-; CHECK-P9-NEXT:    extsb r4, r4
-; CHECK-P9-NEXT:    extsb r5, r5
-; CHECK-P9-NEXT:    extsb r6, r6
-; CHECK-P9-NEXT:    extsb r7, r7
-; CHECK-P9-NEXT:    extsb r8, r8
-; CHECK-P9-NEXT:    extsb r9, r9
-; CHECK-P9-NEXT:    extsb r10, r10
-; CHECK-P9-NEXT:    extsb r11, r11
-; CHECK-P9-NEXT:    extsb r12, r12
-; CHECK-P9-NEXT:    extsb r0, r0
-; CHECK-P9-NEXT:    extsb r30, r30
-; CHECK-P9-NEXT:    extsb r29, r29
-; CHECK-P9-NEXT:    extsb r28, r28
-; CHECK-P9-NEXT:    extsb r27, r27
-; CHECK-P9-NEXT:    extsb r26, r26
-; CHECK-P9-NEXT:    extsb r25, r25
-; CHECK-P9-NEXT:    mtvsrwa f0, r4
-; CHECK-P9-NEXT:    mtvsrwa f1, r5
-; CHECK-P9-NEXT:    mtvsrwa f2, r6
-; CHECK-P9-NEXT:    mtvsrwa f3, r7
-; CHECK-P9-NEXT:    mtvsrwa f4, r8
-; CHECK-P9-NEXT:    mtvsrwa f5, r9
-; CHECK-P9-NEXT:    mtvsrwa f6, r10
-; CHECK-P9-NEXT:    mtvsrwa f7, r11
-; CHECK-P9-NEXT:    mtvsrwa f8, r12
-; CHECK-P9-NEXT:    mtvsrwa f9, r0
-; CHECK-P9-NEXT:    mtvsrwa f10, r30
-; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f11, r29
-; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f12, r28
-; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f13, r27
-; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa v2, r26
-; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa v3, r25
-; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
-; CHECK-P9-NEXT:    xscvsxdsp f2, f2
-; CHECK-P9-NEXT:    xscvsxdsp f3, f3
-; CHECK-P9-NEXT:    xscvsxdsp f4, f4
-; CHECK-P9-NEXT:    xscvsxdsp f5, f5
-; CHECK-P9-NEXT:    xscvsxdsp f6, f6
-; CHECK-P9-NEXT:    xscvsxdsp f7, f7
-; CHECK-P9-NEXT:    xscvsxdsp f8, f8
-; CHECK-P9-NEXT:    xscvsxdsp f9, f9
-; CHECK-P9-NEXT:    xscvsxdsp f10, f10
-; CHECK-P9-NEXT:    xscvsxdsp f11, f11
-; CHECK-P9-NEXT:    xscvsxdsp f12, f12
-; CHECK-P9-NEXT:    xscvsxdsp f13, f13
-; CHECK-P9-NEXT:    xscvsxdsp f31, v2
-; CHECK-P9-NEXT:    xscvsxdsp f30, v3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xvcvdpsp v2, vs0
-; CHECK-P9-NEXT:    xvcvdpsp v3, vs1
-; CHECK-P9-NEXT:    xvcvdpsp v4, vs2
-; CHECK-P9-NEXT:    xvcvdpsp v5, vs3
-; CHECK-P9-NEXT:    xvcvdpsp v0, vs4
-; CHECK-P9-NEXT:    xvcvdpsp v1, vs5
-; CHECK-P9-NEXT:    xvcvdpsp v6, vs6
-; CHECK-P9-NEXT:    xvcvdpsp v7, vs7
-; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    vmrgew v4, v1, v0
-; CHECK-P9-NEXT:    vmrgew v5, v7, v6
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
-; CHECK-P9-NEXT:    stxv v2, 0(r3)
-; CHECK-P9-NEXT:    stxv v5, 48(r3)
-; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
+; CHECK-P9-NEXT:    addis r7, r2, .LCPI7_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
+; CHECK-P9-NEXT:    addi r7, r7, .LCPI7_3 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    lxvx v4, 0, r5
+; CHECK-P9-NEXT:    lxvx v5, 0, r6
+; CHECK-P9-NEXT:    lxvx v0, 0, r7
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P9-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v0
+; CHECK-P9-NEXT:    vextsb2w v3, v3
+; CHECK-P9-NEXT:    vextsb2w v4, v4
+; CHECK-P9-NEXT:    vextsb2w v5, v5
+; CHECK-P9-NEXT:    vextsb2w v2, v2
+; CHECK-P9-NEXT:    xvcvsxwsp vs0, v3
+; CHECK-P9-NEXT:    xvcvsxwsp vs1, v4
+; CHECK-P9-NEXT:    xvcvsxwsp vs2, v5
+; CHECK-P9-NEXT:    xvcvsxwsp vs3, v2
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    li r4, 3
-; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    li r5, 1
-; CHECK-BE-NEXT:    li r6, 2
-; CHECK-BE-NEXT:    li r7, 0
-; CHECK-BE-NEXT:    li r8, 7
-; CHECK-BE-NEXT:    li r9, 5
-; CHECK-BE-NEXT:    li r10, 6
-; CHECK-BE-NEXT:    li r11, 4
-; CHECK-BE-NEXT:    li r12, 11
-; CHECK-BE-NEXT:    li r0, 9
-; CHECK-BE-NEXT:    li r30, 10
-; CHECK-BE-NEXT:    li r29, 8
-; CHECK-BE-NEXT:    li r28, 15
-; CHECK-BE-NEXT:    li r27, 13
-; CHECK-BE-NEXT:    li r26, 14
-; CHECK-BE-NEXT:    li r25, 12
-; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    vextublx r7, r7, v2
-; CHECK-BE-NEXT:    vextublx r8, r8, v2
-; CHECK-BE-NEXT:    vextublx r9, r9, v2
-; CHECK-BE-NEXT:    vextublx r10, r10, v2
-; CHECK-BE-NEXT:    vextublx r11, r11, v2
-; CHECK-BE-NEXT:    vextublx r12, r12, v2
-; CHECK-BE-NEXT:    vextublx r0, r0, v2
-; CHECK-BE-NEXT:    vextublx r30, r30, v2
-; CHECK-BE-NEXT:    vextublx r29, r29, v2
-; CHECK-BE-NEXT:    vextublx r28, r28, v2
-; CHECK-BE-NEXT:    vextublx r27, r27, v2
-; CHECK-BE-NEXT:    vextublx r26, r26, v2
-; CHECK-BE-NEXT:    vextublx r25, r25, v2
-; CHECK-BE-NEXT:    extsb r4, r4
-; CHECK-BE-NEXT:    extsb r5, r5
-; CHECK-BE-NEXT:    extsb r6, r6
-; CHECK-BE-NEXT:    extsb r7, r7
-; CHECK-BE-NEXT:    extsb r8, r8
-; CHECK-BE-NEXT:    extsb r9, r9
-; CHECK-BE-NEXT:    extsb r10, r10
-; CHECK-BE-NEXT:    extsb r11, r11
-; CHECK-BE-NEXT:    extsb r12, r12
-; CHECK-BE-NEXT:    extsb r0, r0
-; CHECK-BE-NEXT:    extsb r30, r30
-; CHECK-BE-NEXT:    extsb r29, r29
-; CHECK-BE-NEXT:    extsb r28, r28
-; CHECK-BE-NEXT:    extsb r27, r27
-; CHECK-BE-NEXT:    extsb r26, r26
-; CHECK-BE-NEXT:    extsb r25, r25
-; CHECK-BE-NEXT:    mtvsrwa f0, r4
-; CHECK-BE-NEXT:    mtvsrwa f1, r5
-; CHECK-BE-NEXT:    mtvsrwa f2, r6
-; CHECK-BE-NEXT:    mtvsrwa f3, r7
-; CHECK-BE-NEXT:    mtvsrwa f4, r8
-; CHECK-BE-NEXT:    mtvsrwa f5, r9
-; CHECK-BE-NEXT:    mtvsrwa f6, r10
-; CHECK-BE-NEXT:    mtvsrwa f7, r11
-; CHECK-BE-NEXT:    mtvsrwa f8, r12
-; CHECK-BE-NEXT:    mtvsrwa f9, r0
-; CHECK-BE-NEXT:    mtvsrwa f10, r30
-; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f11, r29
-; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f12, r28
-; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f13, r27
-; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa v2, r26
-; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa v3, r25
-; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
-; CHECK-BE-NEXT:    xscvsxdsp f2, f2
-; CHECK-BE-NEXT:    xscvsxdsp f3, f3
-; CHECK-BE-NEXT:    xscvsxdsp f4, f4
-; CHECK-BE-NEXT:    xscvsxdsp f5, f5
-; CHECK-BE-NEXT:    xscvsxdsp f6, f6
-; CHECK-BE-NEXT:    xscvsxdsp f7, f7
-; CHECK-BE-NEXT:    xscvsxdsp f8, f8
-; CHECK-BE-NEXT:    xscvsxdsp f9, f9
-; CHECK-BE-NEXT:    xscvsxdsp f10, f10
-; CHECK-BE-NEXT:    xscvsxdsp f11, f11
-; CHECK-BE-NEXT:    xscvsxdsp f12, f12
-; CHECK-BE-NEXT:    xscvsxdsp f13, f13
-; CHECK-BE-NEXT:    xscvsxdsp f31, v2
-; CHECK-BE-NEXT:    xscvsxdsp f30, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    xvcvdpsp v2, vs0
-; CHECK-BE-NEXT:    xvcvdpsp v3, vs1
-; CHECK-BE-NEXT:    xvcvdpsp v4, vs2
-; CHECK-BE-NEXT:    xvcvdpsp v5, vs3
-; CHECK-BE-NEXT:    xvcvdpsp v0, vs4
-; CHECK-BE-NEXT:    xvcvdpsp v1, vs5
-; CHECK-BE-NEXT:    xvcvdpsp v6, vs6
-; CHECK-BE-NEXT:    xvcvdpsp v7, vs7
-; CHECK-BE-NEXT:    vmrgew v2, v3, v2
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    vmrgew v4, v1, v0
-; CHECK-BE-NEXT:    vmrgew v5, v7, v6
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v5, 48(r3)
-; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
+; CHECK-BE-NEXT:    addis r7, r2, .LCPI7_3 at toc@ha
+; CHECK-BE-NEXT:    xxlxor v1, v1, v1
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
+; CHECK-BE-NEXT:    addi r7, r7, .LCPI7_3 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    lxvx v4, 0, r5
+; CHECK-BE-NEXT:    lxvx v5, 0, r6
+; CHECK-BE-NEXT:    lxvx v0, 0, r7
+; CHECK-BE-NEXT:    vperm v3, v1, v2, v3
+; CHECK-BE-NEXT:    vperm v4, v1, v2, v4
+; CHECK-BE-NEXT:    vperm v5, v2, v2, v5
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v0
+; CHECK-BE-NEXT:    vextsb2w v3, v3
+; CHECK-BE-NEXT:    vextsb2w v4, v4
+; CHECK-BE-NEXT:    vextsb2w v5, v5
+; CHECK-BE-NEXT:    vextsb2w v2, v2
+; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
+; CHECK-BE-NEXT:    xvcvsxwsp vs1, v4
+; CHECK-BE-NEXT:    xvcvsxwsp vs2, v5
+; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2
+; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = sitofp <16 x i8> %a to <16 x float>

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll?rev=350155&r1=350154&r2=350155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll Sat Dec 29 05:40:48 2018
@@ -12,49 +12,36 @@
 define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
-; CHECK-P8-NEXT:    clrldi r4, r3, 56
-; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r3, r3, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f0, r4
-; CHECK-P8-NEXT:    mtvsrwz f1, r3
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
-; CHECK-P8-NEXT:    xscvuxddp f1, f1
-; CHECK-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P8-NEXT:    addi r3, r4, .LCPI0_0 at toc@l
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P8-NEXT:    xvcvuxddp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    li r4, 1
-; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P9-NEXT:    mtvsrwz f0, r3
-; CHECK-P9-NEXT:    mtvsrwz f1, r4
-; CHECK-P9-NEXT:    xscvuxddp f0, f0
-; CHECK-P9-NEXT:    xscvuxddp f1, f1
-; CHECK-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v2, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P9-NEXT:    xvcvuxddp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    li r3, 1
-; CHECK-BE-NEXT:    li r4, 0
-; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-BE-NEXT:    mtvsrwz f0, r3
-; CHECK-BE-NEXT:    mtvsrwz f1, r4
-; CHECK-BE-NEXT:    xscvuxddp f0, f0
-; CHECK-BE-NEXT:    xscvuxddp f1, f1
-; CHECK-BE-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    xvcvuxddp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i16 %a.coerce to <2 x i8>
@@ -65,27 +52,20 @@ entry:
 define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r4
-; CHECK-P8-NEXT:    mfvsrd r4, f0
-; CHECK-P8-NEXT:    clrldi r5, r4, 56
-; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f0, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 48, 56
-; CHECK-P8-NEXT:    rldicl r4, r4, 40, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f1, r6
-; CHECK-P8-NEXT:    mtvsrwz f2, r5
-; CHECK-P8-NEXT:    mtvsrwz f3, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lvx v5, 0, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
-; CHECK-P8-NEXT:    xscvuxddp f1, f1
-; CHECK-P8-NEXT:    xscvuxddp f2, f2
-; CHECK-P8-NEXT:    xscvuxddp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P8-NEXT:    vperm v3, v4, v3, v5
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -94,58 +74,36 @@ define void @test4elt(<4 x double>* noal
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r4
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    li r5, 1
-; CHECK-P9-NEXT:    li r6, 2
-; CHECK-P9-NEXT:    li r7, 3
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    vextubrx r7, r7, v2
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P9-NEXT:    mtvsrwz f0, r4
-; CHECK-P9-NEXT:    mtvsrwz f1, r5
-; CHECK-P9-NEXT:    mtvsrwz f2, r6
-; CHECK-P9-NEXT:    mtvsrwz f3, r7
-; CHECK-P9-NEXT:    xscvuxddp f0, f0
-; CHECK-P9-NEXT:    xscvuxddp f1, f1
-; CHECK-P9-NEXT:    xscvuxddp f2, f2
-; CHECK-P9-NEXT:    xscvuxddp f3, f3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
+; CHECK-P9-NEXT:    mtvsrws v4, r4
+; CHECK-P9-NEXT:    xxlxor v5, v5, v5
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI1_1 at toc@l
+; CHECK-P9-NEXT:    lxvx v2, 0, r5
+; CHECK-P9-NEXT:    lxvx v3, 0, r6
+; CHECK-P9-NEXT:    vperm v2, v5, v4, v2
+; CHECK-P9-NEXT:    vperm v3, v5, v4, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v2
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v3
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r4
-; CHECK-BE-NEXT:    li r4, 1
-; CHECK-BE-NEXT:    li r5, 0
-; CHECK-BE-NEXT:    li r6, 3
-; CHECK-BE-NEXT:    li r7, 2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    vextublx r7, r7, v2
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-BE-NEXT:    mtvsrwz f0, r4
-; CHECK-BE-NEXT:    mtvsrwz f1, r5
-; CHECK-BE-NEXT:    mtvsrwz f2, r6
-; CHECK-BE-NEXT:    mtvsrwz f3, r7
-; CHECK-BE-NEXT:    xscvuxddp f0, f0
-; CHECK-BE-NEXT:    xscvuxddp f1, f1
-; CHECK-BE-NEXT:    xscvuxddp f2, f2
-; CHECK-BE-NEXT:    xscvuxddp f3, f3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
+; CHECK-BE-NEXT:    mtvsrws v4, r4
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI1_1 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    lxvx v3, 0, r6
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    vperm v3, v5, v4, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v2
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v3
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
@@ -159,105 +117,67 @@ entry:
 define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r4
-; CHECK-P8-NEXT:    mfvsrd r4, f0
-; CHECK-P8-NEXT:    clrldi r5, r4, 56
-; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f0, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 48, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f1, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 40, 56
-; CHECK-P8-NEXT:    mtvsrwz f2, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 32, 56
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f3, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
-; CHECK-P8-NEXT:    mtvsrwz f4, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 16, 56
-; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f5, r6
-; CHECK-P8-NEXT:    mtvsrwz f6, r5
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    mtvsrwz f7, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_2 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_2 at toc@l
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_3 at toc@ha
+; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_3 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-P8-NEXT:    lvx v0, 0, r5
+; CHECK-P8-NEXT:    lvx v1, 0, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xscvuxddp f4, f4
-; CHECK-P8-NEXT:    xscvuxddp f5, f5
-; CHECK-P8-NEXT:    xscvuxddp f6, f6
-; CHECK-P8-NEXT:    xscvuxddp f7, f7
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
-; CHECK-P8-NEXT:    xscvuxddp f1, f1
-; CHECK-P8-NEXT:    xscvuxddp f2, f2
-; CHECK-P8-NEXT:    xscvuxddp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs4, vs5, vs4
-; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P8-NEXT:    xxswapd vs2, vs5
-; CHECK-P8-NEXT:    xxswapd vs3, vs4
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P8-NEXT:    vperm v5, v4, v3, v5
+; CHECK-P8-NEXT:    vperm v0, v4, v3, v0
+; CHECK-P8-NEXT:    vperm v3, v4, v3, v1
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v5
+; CHECK-P8-NEXT:    xvcvuxddp vs2, v0
+; CHECK-P8-NEXT:    xvcvuxddp vs3, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
+; CHECK-P9-NEXT:    addis r7, r2, .LCPI2_2 at toc@ha
+; CHECK-P9-NEXT:    addis r8, r2, .LCPI2_3 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r4
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    li r5, 1
-; CHECK-P9-NEXT:    li r6, 2
-; CHECK-P9-NEXT:    li r7, 3
-; CHECK-P9-NEXT:    li r8, 4
-; CHECK-P9-NEXT:    li r9, 5
-; CHECK-P9-NEXT:    li r10, 6
-; CHECK-P9-NEXT:    li r11, 7
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    vextubrx r7, r7, v2
-; CHECK-P9-NEXT:    vextubrx r8, r8, v2
-; CHECK-P9-NEXT:    vextubrx r9, r9, v2
-; CHECK-P9-NEXT:    vextubrx r10, r10, v2
-; CHECK-P9-NEXT:    vextubrx r11, r11, v2
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 24, 31
-; CHECK-P9-NEXT:    mtvsrwz f0, r4
-; CHECK-P9-NEXT:    mtvsrwz f1, r5
-; CHECK-P9-NEXT:    mtvsrwz f2, r6
-; CHECK-P9-NEXT:    mtvsrwz f3, r7
-; CHECK-P9-NEXT:    mtvsrwz f4, r8
-; CHECK-P9-NEXT:    mtvsrwz f5, r9
-; CHECK-P9-NEXT:    mtvsrwz f6, r10
-; CHECK-P9-NEXT:    mtvsrwz f7, r11
-; CHECK-P9-NEXT:    xscvuxddp f0, f0
-; CHECK-P9-NEXT:    xscvuxddp f1, f1
-; CHECK-P9-NEXT:    xscvuxddp f2, f2
-; CHECK-P9-NEXT:    xscvuxddp f3, f3
-; CHECK-P9-NEXT:    xscvuxddp f4, f4
-; CHECK-P9-NEXT:    xscvuxddp f5, f5
-; CHECK-P9-NEXT:    xscvuxddp f6, f6
-; CHECK-P9-NEXT:    xscvuxddp f7, f7
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    xxlxor v1, v1, v1
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI2_1 at toc@l
+; CHECK-P9-NEXT:    addi r7, r7, .LCPI2_2 at toc@l
+; CHECK-P9-NEXT:    addi r8, r8, .LCPI2_3 at toc@l
+; CHECK-P9-NEXT:    xxswapd v0, vs0
+; CHECK-P9-NEXT:    lxvx v2, 0, r5
+; CHECK-P9-NEXT:    lxvx v3, 0, r6
+; CHECK-P9-NEXT:    lxvx v4, 0, r7
+; CHECK-P9-NEXT:    lxvx v5, 0, r8
+; CHECK-P9-NEXT:    vperm v2, v1, v0, v2
+; CHECK-P9-NEXT:    vperm v3, v1, v0, v3
+; CHECK-P9-NEXT:    vperm v4, v1, v0, v4
+; CHECK-P9-NEXT:    vperm v5, v1, v0, v5
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v2
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v4
+; CHECK-P9-NEXT:    xvcvuxddp vs3, v5
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
 ; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
@@ -266,55 +186,32 @@ define void @test8elt(<8 x double>* noal
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r5, 1
-; CHECK-BE-NEXT:    mtvsrd v2, r4
-; CHECK-BE-NEXT:    li r4, 0
-; CHECK-BE-NEXT:    li r6, 3
-; CHECK-BE-NEXT:    li r7, 2
-; CHECK-BE-NEXT:    li r8, 5
-; CHECK-BE-NEXT:    li r9, 4
-; CHECK-BE-NEXT:    li r10, 7
-; CHECK-BE-NEXT:    li r11, 6
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    vextublx r7, r7, v2
-; CHECK-BE-NEXT:    vextublx r8, r8, v2
-; CHECK-BE-NEXT:    vextublx r9, r9, v2
-; CHECK-BE-NEXT:    vextublx r10, r10, v2
-; CHECK-BE-NEXT:    vextublx r11, r11, v2
-; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 24, 31
-; CHECK-BE-NEXT:    mtvsrwz f0, r5
-; CHECK-BE-NEXT:    mtvsrwz f1, r4
-; CHECK-BE-NEXT:    mtvsrwz f2, r6
-; CHECK-BE-NEXT:    mtvsrwz f3, r7
-; CHECK-BE-NEXT:    mtvsrwz f4, r8
-; CHECK-BE-NEXT:    mtvsrwz f5, r9
-; CHECK-BE-NEXT:    mtvsrwz f6, r10
-; CHECK-BE-NEXT:    mtvsrwz f7, r11
-; CHECK-BE-NEXT:    xscvuxddp f0, f0
-; CHECK-BE-NEXT:    xscvuxddp f1, f1
-; CHECK-BE-NEXT:    xscvuxddp f2, f2
-; CHECK-BE-NEXT:    xscvuxddp f3, f3
-; CHECK-BE-NEXT:    xscvuxddp f4, f4
-; CHECK-BE-NEXT:    xscvuxddp f5, f5
-; CHECK-BE-NEXT:    xscvuxddp f6, f6
-; CHECK-BE-NEXT:    xscvuxddp f7, f7
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-NEXT:    addis r7, r2, .LCPI2_2 at toc@ha
+; CHECK-BE-NEXT:    addis r8, r2, .LCPI2_3 at toc@ha
+; CHECK-BE-NEXT:    mtvsrd v0, r4
+; CHECK-BE-NEXT:    xxlxor v1, v1, v1
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI2_1 at toc@l
+; CHECK-BE-NEXT:    addi r7, r7, .LCPI2_2 at toc@l
+; CHECK-BE-NEXT:    addi r8, r8, .LCPI2_3 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    lxvx v3, 0, r6
+; CHECK-BE-NEXT:    lxvx v4, 0, r7
+; CHECK-BE-NEXT:    lxvx v5, 0, r8
+; CHECK-BE-NEXT:    vperm v2, v0, v1, v2
+; CHECK-BE-NEXT:    vperm v3, v1, v0, v3
+; CHECK-BE-NEXT:    vperm v4, v1, v0, v4
+; CHECK-BE-NEXT:    vperm v5, v1, v0, v5
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v2
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v4
+; CHECK-BE-NEXT:    xvcvuxddp vs3, v5
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <8 x i8>
@@ -326,338 +223,176 @@ entry:
 define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x i8> %a) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mfvsrd r5, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-P8-NEXT:    lvx v5, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_4 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_4 at toc@l
+; CHECK-P8-NEXT:    lvx v0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_6 at toc@ha
+; CHECK-P8-NEXT:    lvx v1, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_7 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_6 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_7 at toc@l
+; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P8-NEXT:    lvx v6, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_5 at toc@ha
+; CHECK-P8-NEXT:    lvx v7, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_3 at toc@ha
+; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_5 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_3 at toc@l
+; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
+; CHECK-P8-NEXT:    lvx v8, 0, r4
+; CHECK-P8-NEXT:    lvx v9, 0, r5
+; CHECK-P8-NEXT:    vperm v1, v4, v2, v1
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    clrldi r6, r5, 56
-; CHECK-P8-NEXT:    rldicl r7, r5, 56, 56
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f0, r6
-; CHECK-P8-NEXT:    rldicl r6, r5, 40, 56
-; CHECK-P8-NEXT:    rldicl r8, r5, 48, 56
-; CHECK-P8-NEXT:    mtvsrwz f1, r7
-; CHECK-P8-NEXT:    rldicl r7, r5, 32, 56
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r8, r8, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f4, r6
-; CHECK-P8-NEXT:    rldicl r6, r5, 24, 56
-; CHECK-P8-NEXT:    mtvsrwz f3, r8
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f5, r7
-; CHECK-P8-NEXT:    rldicl r7, r5, 16, 56
-; CHECK-P8-NEXT:    rldicl r5, r5, 8, 56
-; CHECK-P8-NEXT:    mfvsrd r8, f2
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f2, r6
-; CHECK-P8-NEXT:    rlwinm r6, r7, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f6, r6
-; CHECK-P8-NEXT:    clrldi r6, r8, 56
-; CHECK-P8-NEXT:    mtvsrwz f7, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 56, 56
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f8, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 48, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f9, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 40, 56
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f10, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 32, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f11, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 24, 56
-; CHECK-P8-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz f12, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 16, 56
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    xscvuxddp f6, f6
-; CHECK-P8-NEXT:    xscvuxddp f7, f7
-; CHECK-P8-NEXT:    mtvsrwz f13, r5
-; CHECK-P8-NEXT:    rlwinm r5, r6, 0, 24, 31
-; CHECK-P8-NEXT:    mtvsrwz v2, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 8, 56
-; CHECK-P8-NEXT:    xscvuxddp f5, f5
-; CHECK-P8-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P8-NEXT:    xscvuxddp f2, f2
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
-; CHECK-P8-NEXT:    xscvuxddp f1, f1
-; CHECK-P8-NEXT:    xxmrghd vs6, vs7, vs6
-; CHECK-P8-NEXT:    mtvsrwz v3, r5
-; CHECK-P8-NEXT:    li r5, 64
-; CHECK-P8-NEXT:    xscvuxddp f3, f3
-; CHECK-P8-NEXT:    xscvuxddp f4, f4
-; CHECK-P8-NEXT:    xscvuxddp f31, v2
-; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
-; CHECK-P8-NEXT:    xscvuxddp f7, v3
-; CHECK-P8-NEXT:    xscvuxddp f8, f8
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xscvuxddp f9, f9
-; CHECK-P8-NEXT:    xxswapd vs1, vs6
-; CHECK-P8-NEXT:    xscvuxddp f10, f10
+; CHECK-P8-NEXT:    li r5, 96
+; CHECK-P8-NEXT:    vperm v6, v4, v2, v6
+; CHECK-P8-NEXT:    vperm v7, v4, v2, v7
+; CHECK-P8-NEXT:    vperm v8, v4, v2, v8
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v9
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v0
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v1
+; CHECK-P8-NEXT:    xvcvuxddp vs2, v6
+; CHECK-P8-NEXT:    xvcvuxddp vs3, v7
+; CHECK-P8-NEXT:    xvcvuxddp vs4, v8
+; CHECK-P8-NEXT:    xvcvuxddp vs5, v2
+; CHECK-P8-NEXT:    xvcvuxddp vs6, v3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xvcvuxddp vs7, v5
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xscvuxddp f12, f12
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
-; CHECK-P8-NEXT:    xscvuxddp f13, f13
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    li r4, 96
-; CHECK-P8-NEXT:    xscvuxddp f11, f11
-; CHECK-P8-NEXT:    xxmrghd vs6, vs7, vs31
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xxswapd vs2, vs6
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    li r5, 64
+; CHECK-P8-NEXT:    xxswapd vs2, vs7
+; CHECK-P8-NEXT:    xxswapd vs3, vs6
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
 ; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xxmrghd vs5, vs13, vs12
-; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xxmrghd vs1, vs11, vs10
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    stxvd2x vs5, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs4, 0, r3
-; CHECK-P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    li r5, 1
-; CHECK-P9-NEXT:    li r6, 2
-; CHECK-P9-NEXT:    li r7, 3
-; CHECK-P9-NEXT:    li r8, 4
-; CHECK-P9-NEXT:    li r9, 5
-; CHECK-P9-NEXT:    li r10, 6
-; CHECK-P9-NEXT:    li r11, 7
-; CHECK-P9-NEXT:    li r12, 8
-; CHECK-P9-NEXT:    li r0, 9
-; CHECK-P9-NEXT:    li r30, 10
-; CHECK-P9-NEXT:    li r29, 11
-; CHECK-P9-NEXT:    li r28, 12
-; CHECK-P9-NEXT:    li r27, 13
-; CHECK-P9-NEXT:    li r26, 14
-; CHECK-P9-NEXT:    li r25, 15
-; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    vextubrx r7, r7, v2
-; CHECK-P9-NEXT:    vextubrx r8, r8, v2
-; CHECK-P9-NEXT:    vextubrx r9, r9, v2
-; CHECK-P9-NEXT:    vextubrx r10, r10, v2
-; CHECK-P9-NEXT:    vextubrx r11, r11, v2
-; CHECK-P9-NEXT:    vextubrx r12, r12, v2
-; CHECK-P9-NEXT:    vextubrx r0, r0, v2
-; CHECK-P9-NEXT:    vextubrx r30, r30, v2
-; CHECK-P9-NEXT:    vextubrx r29, r29, v2
-; CHECK-P9-NEXT:    vextubrx r28, r28, v2
-; CHECK-P9-NEXT:    vextubrx r27, r27, v2
-; CHECK-P9-NEXT:    vextubrx r26, r26, v2
-; CHECK-P9-NEXT:    vextubrx r25, r25, v2
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r8, r8, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r9, r9, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r10, r10, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r11, r11, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r12, r12, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r0, r0, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r30, r30, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r29, r29, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r28, r28, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r27, r27, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r26, r26, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r25, r25, 0, 24, 31
-; CHECK-P9-NEXT:    mtvsrwz f0, r4
-; CHECK-P9-NEXT:    mtvsrwz f1, r5
-; CHECK-P9-NEXT:    mtvsrwz f2, r6
-; CHECK-P9-NEXT:    mtvsrwz f3, r7
-; CHECK-P9-NEXT:    mtvsrwz f4, r8
-; CHECK-P9-NEXT:    mtvsrwz f5, r9
-; CHECK-P9-NEXT:    mtvsrwz f6, r10
-; CHECK-P9-NEXT:    mtvsrwz f7, r11
-; CHECK-P9-NEXT:    mtvsrwz f8, r12
-; CHECK-P9-NEXT:    mtvsrwz f9, r0
-; CHECK-P9-NEXT:    mtvsrwz f10, r30
-; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f11, r29
-; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f12, r28
-; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz f13, r27
-; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz v2, r26
-; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwz v3, r25
-; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xscvuxddp f0, f0
-; CHECK-P9-NEXT:    xscvuxddp f1, f1
-; CHECK-P9-NEXT:    xscvuxddp f2, f2
-; CHECK-P9-NEXT:    xscvuxddp f3, f3
-; CHECK-P9-NEXT:    xscvuxddp f4, f4
-; CHECK-P9-NEXT:    xscvuxddp f5, f5
-; CHECK-P9-NEXT:    xscvuxddp f6, f6
-; CHECK-P9-NEXT:    xscvuxddp f7, f7
-; CHECK-P9-NEXT:    xscvuxddp f8, f8
-; CHECK-P9-NEXT:    xscvuxddp f9, f9
-; CHECK-P9-NEXT:    xscvuxddp f10, f10
-; CHECK-P9-NEXT:    xscvuxddp f11, f11
-; CHECK-P9-NEXT:    xscvuxddp f12, f12
-; CHECK-P9-NEXT:    xscvuxddp f13, f13
-; CHECK-P9-NEXT:    xscvuxddp f31, v2
-; CHECK-P9-NEXT:    xscvuxddp f30, v3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
+; CHECK-P9-NEXT:    addis r7, r2, .LCPI3_3 at toc@ha
+; CHECK-P9-NEXT:    addis r8, r2, .LCPI3_4 at toc@ha
+; CHECK-P9-NEXT:    addis r9, r2, .LCPI3_5 at toc@ha
+; CHECK-P9-NEXT:    addis r10, r2, .LCPI3_6 at toc@ha
+; CHECK-P9-NEXT:    addis r11, r2, .LCPI3_7 at toc@ha
+; CHECK-P9-NEXT:    xxlxor v9, v9, v9
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
+; CHECK-P9-NEXT:    addi r7, r7, .LCPI3_3 at toc@l
+; CHECK-P9-NEXT:    addi r8, r8, .LCPI3_4 at toc@l
+; CHECK-P9-NEXT:    addi r9, r9, .LCPI3_5 at toc@l
+; CHECK-P9-NEXT:    addi r10, r10, .LCPI3_6 at toc@l
+; CHECK-P9-NEXT:    addi r11, r11, .LCPI3_7 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    lxvx v4, 0, r5
+; CHECK-P9-NEXT:    lxvx v5, 0, r6
+; CHECK-P9-NEXT:    lxvx v0, 0, r7
+; CHECK-P9-NEXT:    lxvx v1, 0, r8
+; CHECK-P9-NEXT:    lxvx v6, 0, r9
+; CHECK-P9-NEXT:    lxvx v7, 0, r10
+; CHECK-P9-NEXT:    lxvx v8, 0, r11
+; CHECK-P9-NEXT:    vperm v3, v9, v2, v3
+; CHECK-P9-NEXT:    vperm v4, v9, v2, v4
+; CHECK-P9-NEXT:    vperm v5, v9, v2, v5
+; CHECK-P9-NEXT:    vperm v0, v9, v2, v0
+; CHECK-P9-NEXT:    vperm v1, v9, v2, v1
+; CHECK-P9-NEXT:    vperm v6, v9, v2, v6
+; CHECK-P9-NEXT:    vperm v7, v9, v2, v7
+; CHECK-P9-NEXT:    vperm v2, v9, v2, v8
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v4
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v5
+; CHECK-P9-NEXT:    xvcvuxddp vs3, v0
+; CHECK-P9-NEXT:    xvcvuxddp vs4, v1
+; CHECK-P9-NEXT:    xvcvuxddp vs5, v6
+; CHECK-P9-NEXT:    xvcvuxddp vs6, v7
+; CHECK-P9-NEXT:    xvcvuxddp vs7, v2
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
 ; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    stxv vs7, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs6, 96(r3)
 ; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    li r4, 1
-; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    li r5, 0
-; CHECK-BE-NEXT:    li r6, 3
-; CHECK-BE-NEXT:    li r7, 2
-; CHECK-BE-NEXT:    li r8, 5
-; CHECK-BE-NEXT:    li r9, 4
-; CHECK-BE-NEXT:    li r10, 7
-; CHECK-BE-NEXT:    li r11, 6
-; CHECK-BE-NEXT:    li r12, 9
-; CHECK-BE-NEXT:    li r0, 8
-; CHECK-BE-NEXT:    li r30, 11
-; CHECK-BE-NEXT:    li r29, 10
-; CHECK-BE-NEXT:    li r28, 13
-; CHECK-BE-NEXT:    li r27, 12
-; CHECK-BE-NEXT:    li r26, 15
-; CHECK-BE-NEXT:    li r25, 14
-; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    vextublx r7, r7, v2
-; CHECK-BE-NEXT:    vextublx r8, r8, v2
-; CHECK-BE-NEXT:    vextublx r9, r9, v2
-; CHECK-BE-NEXT:    vextublx r10, r10, v2
-; CHECK-BE-NEXT:    vextublx r11, r11, v2
-; CHECK-BE-NEXT:    vextublx r12, r12, v2
-; CHECK-BE-NEXT:    vextublx r0, r0, v2
-; CHECK-BE-NEXT:    vextublx r30, r30, v2
-; CHECK-BE-NEXT:    vextublx r29, r29, v2
-; CHECK-BE-NEXT:    vextublx r28, r28, v2
-; CHECK-BE-NEXT:    vextublx r27, r27, v2
-; CHECK-BE-NEXT:    vextublx r26, r26, v2
-; CHECK-BE-NEXT:    vextublx r25, r25, v2
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r6, r6, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r7, r7, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r8, r8, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r9, r9, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r10, r10, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r11, r11, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r12, r12, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r0, r0, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r30, r30, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r29, r29, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r28, r28, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r27, r27, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r26, r26, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r25, r25, 0, 24, 31
-; CHECK-BE-NEXT:    mtvsrwz f0, r4
-; CHECK-BE-NEXT:    mtvsrwz f1, r5
-; CHECK-BE-NEXT:    mtvsrwz f2, r6
-; CHECK-BE-NEXT:    mtvsrwz f3, r7
-; CHECK-BE-NEXT:    mtvsrwz f4, r8
-; CHECK-BE-NEXT:    mtvsrwz f5, r9
-; CHECK-BE-NEXT:    mtvsrwz f6, r10
-; CHECK-BE-NEXT:    mtvsrwz f7, r11
-; CHECK-BE-NEXT:    mtvsrwz f8, r12
-; CHECK-BE-NEXT:    mtvsrwz f9, r0
-; CHECK-BE-NEXT:    mtvsrwz f10, r30
-; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f11, r29
-; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f12, r28
-; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz f13, r27
-; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz v2, r26
-; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwz v3, r25
-; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    xscvuxddp f0, f0
-; CHECK-BE-NEXT:    xscvuxddp f1, f1
-; CHECK-BE-NEXT:    xscvuxddp f2, f2
-; CHECK-BE-NEXT:    xscvuxddp f3, f3
-; CHECK-BE-NEXT:    xscvuxddp f4, f4
-; CHECK-BE-NEXT:    xscvuxddp f5, f5
-; CHECK-BE-NEXT:    xscvuxddp f6, f6
-; CHECK-BE-NEXT:    xscvuxddp f7, f7
-; CHECK-BE-NEXT:    xscvuxddp f8, f8
-; CHECK-BE-NEXT:    xscvuxddp f9, f9
-; CHECK-BE-NEXT:    xscvuxddp f10, f10
-; CHECK-BE-NEXT:    xscvuxddp f11, f11
-; CHECK-BE-NEXT:    xscvuxddp f12, f12
-; CHECK-BE-NEXT:    xscvuxddp f13, f13
-; CHECK-BE-NEXT:    xscvuxddp f31, v2
-; CHECK-BE-NEXT:    xscvuxddp f30, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
+; CHECK-BE-NEXT:    addis r7, r2, .LCPI3_3 at toc@ha
+; CHECK-BE-NEXT:    addis r8, r2, .LCPI3_4 at toc@ha
+; CHECK-BE-NEXT:    addis r9, r2, .LCPI3_5 at toc@ha
+; CHECK-BE-NEXT:    addis r10, r2, .LCPI3_6 at toc@ha
+; CHECK-BE-NEXT:    addis r11, r2, .LCPI3_7 at toc@ha
+; CHECK-BE-NEXT:    xxlxor v9, v9, v9
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
+; CHECK-BE-NEXT:    addi r7, r7, .LCPI3_3 at toc@l
+; CHECK-BE-NEXT:    addi r8, r8, .LCPI3_4 at toc@l
+; CHECK-BE-NEXT:    addi r9, r9, .LCPI3_5 at toc@l
+; CHECK-BE-NEXT:    addi r10, r10, .LCPI3_6 at toc@l
+; CHECK-BE-NEXT:    addi r11, r11, .LCPI3_7 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    lxvx v4, 0, r5
+; CHECK-BE-NEXT:    lxvx v5, 0, r6
+; CHECK-BE-NEXT:    lxvx v0, 0, r7
+; CHECK-BE-NEXT:    lxvx v1, 0, r8
+; CHECK-BE-NEXT:    lxvx v6, 0, r9
+; CHECK-BE-NEXT:    lxvx v7, 0, r10
+; CHECK-BE-NEXT:    lxvx v8, 0, r11
+; CHECK-BE-NEXT:    vperm v3, v2, v9, v3
+; CHECK-BE-NEXT:    vperm v4, v9, v2, v4
+; CHECK-BE-NEXT:    vperm v5, v9, v2, v5
+; CHECK-BE-NEXT:    vperm v0, v9, v2, v0
+; CHECK-BE-NEXT:    vperm v1, v9, v2, v1
+; CHECK-BE-NEXT:    vperm v6, v9, v2, v6
+; CHECK-BE-NEXT:    vperm v7, v9, v2, v7
+; CHECK-BE-NEXT:    vperm v2, v9, v2, v8
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v4
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v5
+; CHECK-BE-NEXT:    xvcvuxddp vs3, v0
+; CHECK-BE-NEXT:    xvcvuxddp vs4, v1
+; CHECK-BE-NEXT:    xvcvuxddp vs5, v6
+; CHECK-BE-NEXT:    xvcvuxddp vs6, v7
+; CHECK-BE-NEXT:    xvcvuxddp vs7, v2
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    stxv vs7, 112(r3)
 ; CHECK-BE-NEXT:    stxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = uitofp <16 x i8> %a to <16 x double>
@@ -668,49 +403,41 @@ entry:
 define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
-; CHECK-P8-NEXT:    clrldi r4, r3, 56
-; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    mtvsrwa f0, r4
-; CHECK-P8-NEXT:    mtvsrwa f1, r3
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
-; CHECK-P8-NEXT:    xscvsxddp f1, f1
-; CHECK-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P8-NEXT:    addi r3, r4, .LCPI4_0 at toc@l
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI4_1 at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, .LCPI4_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    vsld v2, v2, v3
+; CHECK-P8-NEXT:    vsrad v2, v2, v3
+; CHECK-P8-NEXT:    xvcvsxddp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    li r4, 1
-; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    extsb r4, r4
-; CHECK-P9-NEXT:    mtvsrwa f0, r3
-; CHECK-P9-NEXT:    mtvsrwa f1, r4
-; CHECK-P9-NEXT:    xscvsxddp f0, f0
-; CHECK-P9-NEXT:    xscvsxddp f1, f1
-; CHECK-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
+; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v2, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P9-NEXT:    vextsb2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    li r3, 1
-; CHECK-BE-NEXT:    li r4, 0
-; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    extsb r3, r3
-; CHECK-BE-NEXT:    extsb r4, r4
-; CHECK-BE-NEXT:    mtvsrwa f0, r3
-; CHECK-BE-NEXT:    mtvsrwa f1, r4
-; CHECK-BE-NEXT:    xscvsxddp f0, f0
-; CHECK-BE-NEXT:    xscvsxddp f1, f1
-; CHECK-BE-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v3, v3, v2
+; CHECK-BE-NEXT:    vextsb2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i16 %a.coerce to <2 x i8>
@@ -721,27 +448,27 @@ entry:
 define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd f0, r4
-; CHECK-P8-NEXT:    mfvsrd r4, f0
-; CHECK-P8-NEXT:    clrldi r5, r4, 56
-; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f0, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 48, 56
-; CHECK-P8-NEXT:    rldicl r4, r4, 40, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f1, r6
-; CHECK-P8-NEXT:    mtvsrwa f2, r5
-; CHECK-P8-NEXT:    mtvsrwa f3, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_2 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_2 at toc@l
+; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
-; CHECK-P8-NEXT:    xscvsxddp f1, f1
-; CHECK-P8-NEXT:    xscvsxddp f2, f2
-; CHECK-P8-NEXT:    xscvsxddp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    vsld v2, v2, v4
+; CHECK-P8-NEXT:    vsld v3, v3, v4
+; CHECK-P8-NEXT:    vsrad v2, v2, v4
+; CHECK-P8-NEXT:    vsrad v3, v3, v4
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -750,60 +477,41 @@ define void @test4elt_signed(<4 x double
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r4
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    li r5, 1
-; CHECK-P9-NEXT:    li r6, 2
-; CHECK-P9-NEXT:    li r7, 3
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    vextubrx r7, r7, v2
-; CHECK-P9-NEXT:    extsb r4, r4
-; CHECK-P9-NEXT:    extsb r5, r5
-; CHECK-P9-NEXT:    extsb r6, r6
-; CHECK-P9-NEXT:    extsb r7, r7
-; CHECK-P9-NEXT:    mtvsrwa f0, r4
-; CHECK-P9-NEXT:    mtvsrwa f1, r5
-; CHECK-P9-NEXT:    mtvsrwa f2, r6
-; CHECK-P9-NEXT:    mtvsrwa f3, r7
-; CHECK-P9-NEXT:    xscvsxddp f0, f0
-; CHECK-P9-NEXT:    xscvsxddp f1, f1
-; CHECK-P9-NEXT:    xscvsxddp f2, f2
-; CHECK-P9-NEXT:    xscvsxddp f3, f3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI5_1 at toc@ha
+; CHECK-P9-NEXT:    mtvsrws v4, r4
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI5_1 at toc@l
+; CHECK-P9-NEXT:    lxvx v2, 0, r5
+; CHECK-P9-NEXT:    lxvx v3, 0, r6
+; CHECK-P9-NEXT:    vperm v2, v4, v4, v2
+; CHECK-P9-NEXT:    vperm v3, v4, v4, v3
+; CHECK-P9-NEXT:    vextsb2d v2, v2
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v3
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r4
-; CHECK-BE-NEXT:    li r4, 1
-; CHECK-BE-NEXT:    li r5, 0
-; CHECK-BE-NEXT:    li r6, 3
-; CHECK-BE-NEXT:    li r7, 2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    vextublx r7, r7, v2
-; CHECK-BE-NEXT:    extsb r4, r4
-; CHECK-BE-NEXT:    extsb r5, r5
-; CHECK-BE-NEXT:    extsb r6, r6
-; CHECK-BE-NEXT:    extsb r7, r7
-; CHECK-BE-NEXT:    mtvsrwa f0, r4
-; CHECK-BE-NEXT:    mtvsrwa f1, r5
-; CHECK-BE-NEXT:    mtvsrwa f2, r6
-; CHECK-BE-NEXT:    mtvsrwa f3, r7
-; CHECK-BE-NEXT:    xscvsxddp f0, f0
-; CHECK-BE-NEXT:    xscvsxddp f1, f1
-; CHECK-BE-NEXT:    xscvsxddp f2, f2
-; CHECK-BE-NEXT:    xscvsxddp f3, f3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI5_1 at toc@ha
+; CHECK-BE-NEXT:    mtvsrws v4, r4
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI5_1 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    lxvx v3, 0, r6
+; CHECK-BE-NEXT:    vperm v2, v5, v4, v2
+; CHECK-BE-NEXT:    vperm v3, v4, v4, v3
+; CHECK-BE-NEXT:    vextsb2d v2, v2
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i32 %a.coerce to <4 x i8>
@@ -816,104 +524,80 @@ define void @test8elt_signed(<8 x double
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtvsrd f0, r4
-; CHECK-P8-NEXT:    mfvsrd r4, f0
-; CHECK-P8-NEXT:    clrldi r5, r4, 56
-; CHECK-P8-NEXT:    rldicl r6, r4, 56, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f0, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 48, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f1, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 40, 56
-; CHECK-P8-NEXT:    mtvsrwa f2, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 32, 56
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f3, r6
-; CHECK-P8-NEXT:    rldicl r6, r4, 24, 56
-; CHECK-P8-NEXT:    mtvsrwa f4, r5
-; CHECK-P8-NEXT:    rldicl r5, r4, 16, 56
-; CHECK-P8-NEXT:    rldicl r4, r4, 8, 56
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    extsb r4, r4
-; CHECK-P8-NEXT:    mtvsrwa f5, r6
-; CHECK-P8-NEXT:    mtvsrwa f6, r5
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    mtvsrwa f7, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_2 at toc@ha
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_3 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_2 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-P8-NEXT:    addi r6, r6, .LCPI6_3 at toc@l
+; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_4 at toc@ha
+; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lvx v5, 0, r6
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_4 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_1 at toc@l
+; CHECK-P8-NEXT:    lvx v0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xscvsxddp f4, f4
-; CHECK-P8-NEXT:    xscvsxddp f5, f5
-; CHECK-P8-NEXT:    xscvsxddp f6, f6
-; CHECK-P8-NEXT:    xscvsxddp f7, f7
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
-; CHECK-P8-NEXT:    xscvsxddp f1, f1
-; CHECK-P8-NEXT:    xscvsxddp f2, f2
-; CHECK-P8-NEXT:    xscvsxddp f3, f3
-; CHECK-P8-NEXT:    xxmrghd vs4, vs5, vs4
-; CHECK-P8-NEXT:    xxmrghd vs5, vs7, vs6
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P8-NEXT:    xxswapd vs2, vs5
-; CHECK-P8-NEXT:    xxswapd vs3, vs4
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    vperm v4, v3, v3, v4
+; CHECK-P8-NEXT:    vperm v5, v3, v3, v5
+; CHECK-P8-NEXT:    vperm v3, v3, v3, v0
+; CHECK-P8-NEXT:    xxswapd v0, vs0
+; CHECK-P8-NEXT:    vsld v2, v2, v0
+; CHECK-P8-NEXT:    vsld v4, v4, v0
+; CHECK-P8-NEXT:    vsld v5, v5, v0
+; CHECK-P8-NEXT:    vsld v3, v3, v0
+; CHECK-P8-NEXT:    vsrad v2, v2, v0
+; CHECK-P8-NEXT:    vsrad v3, v3, v0
+; CHECK-P8-NEXT:    vsrad v4, v4, v0
+; CHECK-P8-NEXT:    vsrad v5, v5, v0
+; CHECK-P8-NEXT:    xvcvsxddp vs2, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v5
+; CHECK-P8-NEXT:    xvcvsxddp vs3, v4
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI6_1 at toc@ha
+; CHECK-P9-NEXT:    addis r7, r2, .LCPI6_2 at toc@ha
+; CHECK-P9-NEXT:    addis r8, r2, .LCPI6_3 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r4
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    li r5, 1
-; CHECK-P9-NEXT:    li r6, 2
-; CHECK-P9-NEXT:    li r7, 3
-; CHECK-P9-NEXT:    li r8, 4
-; CHECK-P9-NEXT:    li r9, 5
-; CHECK-P9-NEXT:    li r10, 6
-; CHECK-P9-NEXT:    li r11, 7
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    vextubrx r7, r7, v2
-; CHECK-P9-NEXT:    vextubrx r8, r8, v2
-; CHECK-P9-NEXT:    vextubrx r9, r9, v2
-; CHECK-P9-NEXT:    vextubrx r10, r10, v2
-; CHECK-P9-NEXT:    vextubrx r11, r11, v2
-; CHECK-P9-NEXT:    extsb r4, r4
-; CHECK-P9-NEXT:    extsb r5, r5
-; CHECK-P9-NEXT:    extsb r6, r6
-; CHECK-P9-NEXT:    extsb r7, r7
-; CHECK-P9-NEXT:    extsb r8, r8
-; CHECK-P9-NEXT:    extsb r9, r9
-; CHECK-P9-NEXT:    extsb r10, r10
-; CHECK-P9-NEXT:    extsb r11, r11
-; CHECK-P9-NEXT:    mtvsrwa f0, r4
-; CHECK-P9-NEXT:    mtvsrwa f1, r5
-; CHECK-P9-NEXT:    mtvsrwa f2, r6
-; CHECK-P9-NEXT:    mtvsrwa f3, r7
-; CHECK-P9-NEXT:    mtvsrwa f4, r8
-; CHECK-P9-NEXT:    mtvsrwa f5, r9
-; CHECK-P9-NEXT:    mtvsrwa f6, r10
-; CHECK-P9-NEXT:    mtvsrwa f7, r11
-; CHECK-P9-NEXT:    xscvsxddp f0, f0
-; CHECK-P9-NEXT:    xscvsxddp f1, f1
-; CHECK-P9-NEXT:    xscvsxddp f2, f2
-; CHECK-P9-NEXT:    xscvsxddp f3, f3
-; CHECK-P9-NEXT:    xscvsxddp f4, f4
-; CHECK-P9-NEXT:    xscvsxddp f5, f5
-; CHECK-P9-NEXT:    xscvsxddp f6, f6
-; CHECK-P9-NEXT:    xscvsxddp f7, f7
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI6_1 at toc@l
+; CHECK-P9-NEXT:    addi r7, r7, .LCPI6_2 at toc@l
+; CHECK-P9-NEXT:    addi r8, r8, .LCPI6_3 at toc@l
+; CHECK-P9-NEXT:    xxswapd v0, vs0
+; CHECK-P9-NEXT:    lxvx v2, 0, r5
+; CHECK-P9-NEXT:    lxvx v3, 0, r6
+; CHECK-P9-NEXT:    lxvx v4, 0, r7
+; CHECK-P9-NEXT:    lxvx v5, 0, r8
+; CHECK-P9-NEXT:    vperm v2, v0, v0, v2
+; CHECK-P9-NEXT:    vperm v3, v0, v0, v3
+; CHECK-P9-NEXT:    vperm v4, v0, v0, v4
+; CHECK-P9-NEXT:    vperm v5, v0, v0, v5
+; CHECK-P9-NEXT:    vextsb2d v2, v2
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    vextsb2d v4, v4
+; CHECK-P9-NEXT:    vextsb2d v5, v5
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v4
+; CHECK-P9-NEXT:    xvcvsxddp vs3, v5
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
 ; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
@@ -922,55 +606,36 @@ define void @test8elt_signed(<8 x double
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li r5, 1
-; CHECK-BE-NEXT:    mtvsrd v2, r4
-; CHECK-BE-NEXT:    li r4, 0
-; CHECK-BE-NEXT:    li r6, 3
-; CHECK-BE-NEXT:    li r7, 2
-; CHECK-BE-NEXT:    li r8, 5
-; CHECK-BE-NEXT:    li r9, 4
-; CHECK-BE-NEXT:    li r10, 7
-; CHECK-BE-NEXT:    li r11, 6
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    vextublx r7, r7, v2
-; CHECK-BE-NEXT:    vextublx r8, r8, v2
-; CHECK-BE-NEXT:    vextublx r9, r9, v2
-; CHECK-BE-NEXT:    vextublx r10, r10, v2
-; CHECK-BE-NEXT:    vextublx r11, r11, v2
-; CHECK-BE-NEXT:    extsb r5, r5
-; CHECK-BE-NEXT:    extsb r4, r4
-; CHECK-BE-NEXT:    extsb r6, r6
-; CHECK-BE-NEXT:    extsb r7, r7
-; CHECK-BE-NEXT:    extsb r8, r8
-; CHECK-BE-NEXT:    extsb r9, r9
-; CHECK-BE-NEXT:    extsb r10, r10
-; CHECK-BE-NEXT:    extsb r11, r11
-; CHECK-BE-NEXT:    mtvsrwa f0, r5
-; CHECK-BE-NEXT:    mtvsrwa f1, r4
-; CHECK-BE-NEXT:    mtvsrwa f2, r6
-; CHECK-BE-NEXT:    mtvsrwa f3, r7
-; CHECK-BE-NEXT:    mtvsrwa f4, r8
-; CHECK-BE-NEXT:    mtvsrwa f5, r9
-; CHECK-BE-NEXT:    mtvsrwa f6, r10
-; CHECK-BE-NEXT:    mtvsrwa f7, r11
-; CHECK-BE-NEXT:    xscvsxddp f0, f0
-; CHECK-BE-NEXT:    xscvsxddp f1, f1
-; CHECK-BE-NEXT:    xscvsxddp f2, f2
-; CHECK-BE-NEXT:    xscvsxddp f3, f3
-; CHECK-BE-NEXT:    xscvsxddp f4, f4
-; CHECK-BE-NEXT:    xscvsxddp f5, f5
-; CHECK-BE-NEXT:    xscvsxddp f6, f6
-; CHECK-BE-NEXT:    xscvsxddp f7, f7
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI6_1 at toc@ha
+; CHECK-BE-NEXT:    addis r7, r2, .LCPI6_2 at toc@ha
+; CHECK-BE-NEXT:    addis r8, r2, .LCPI6_3 at toc@ha
+; CHECK-BE-NEXT:    mtvsrd v0, r4
+; CHECK-BE-NEXT:    xxlxor v1, v1, v1
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI6_1 at toc@l
+; CHECK-BE-NEXT:    addi r7, r7, .LCPI6_2 at toc@l
+; CHECK-BE-NEXT:    addi r8, r8, .LCPI6_3 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    lxvx v3, 0, r6
+; CHECK-BE-NEXT:    lxvx v4, 0, r7
+; CHECK-BE-NEXT:    lxvx v5, 0, r8
+; CHECK-BE-NEXT:    vperm v2, v1, v0, v2
+; CHECK-BE-NEXT:    vperm v3, v1, v0, v3
+; CHECK-BE-NEXT:    vperm v4, v0, v0, v4
+; CHECK-BE-NEXT:    vperm v5, v0, v0, v5
+; CHECK-BE-NEXT:    vextsb2d v2, v2
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    vextsb2d v4, v4
+; CHECK-BE-NEXT:    vextsb2d v5, v5
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v5
+; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <8 x i8>
@@ -982,338 +647,210 @@ entry:
 define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result, <16 x i8> %a) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mfvsrd r5, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_2 at toc@ha
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI7_3 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_2 at toc@l
+; CHECK-P8-NEXT:    addi r6, r6, .LCPI7_3 at toc@l
+; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_4 at toc@ha
+; CHECK-P8-NEXT:    lvx v4, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_5 at toc@ha
+; CHECK-P8-NEXT:    lvx v5, 0, r6
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI7_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_4 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_5 at toc@l
+; CHECK-P8-NEXT:    addi r6, r6, .LCPI7_1 at toc@l
+; CHECK-P8-NEXT:    lvx v0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_6 at toc@ha
+; CHECK-P8-NEXT:    lvx v1, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_7 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r6
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_6 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_7 at toc@l
+; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P8-NEXT:    lvx v6, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_8 at toc@ha
+; CHECK-P8-NEXT:    lvx v7, 0, r5
+; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P8-NEXT:    li r5, 96
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_8 at toc@l
+; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P8-NEXT:    xxswapd v9, vs0
+; CHECK-P8-NEXT:    lvx v8, 0, r4
+; CHECK-P8-NEXT:    vperm v0, v2, v2, v0
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    clrldi r6, r5, 56
-; CHECK-P8-NEXT:    rldicl r7, r5, 56, 56
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    extsb r7, r7
-; CHECK-P8-NEXT:    mtvsrwa f0, r6
-; CHECK-P8-NEXT:    rldicl r6, r5, 40, 56
-; CHECK-P8-NEXT:    rldicl r8, r5, 48, 56
-; CHECK-P8-NEXT:    mtvsrwa f1, r7
-; CHECK-P8-NEXT:    rldicl r7, r5, 32, 56
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    extsb r8, r8
-; CHECK-P8-NEXT:    extsb r7, r7
-; CHECK-P8-NEXT:    mtvsrwa f4, r6
-; CHECK-P8-NEXT:    rldicl r6, r5, 24, 56
-; CHECK-P8-NEXT:    mtvsrwa f3, r8
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f5, r7
-; CHECK-P8-NEXT:    rldicl r7, r5, 16, 56
-; CHECK-P8-NEXT:    rldicl r5, r5, 8, 56
-; CHECK-P8-NEXT:    mfvsrd r8, f2
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f2, r6
-; CHECK-P8-NEXT:    extsb r6, r7
-; CHECK-P8-NEXT:    mtvsrwa f6, r6
-; CHECK-P8-NEXT:    clrldi r6, r8, 56
-; CHECK-P8-NEXT:    mtvsrwa f7, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 56, 56
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f8, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 48, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f9, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 40, 56
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f10, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 32, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    mtvsrwa f11, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 24, 56
-; CHECK-P8-NEXT:    extsb r6, r6
-; CHECK-P8-NEXT:    mtvsrwa f12, r6
-; CHECK-P8-NEXT:    rldicl r6, r8, 16, 56
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    xscvsxddp f6, f6
-; CHECK-P8-NEXT:    xscvsxddp f7, f7
-; CHECK-P8-NEXT:    mtvsrwa f13, r5
-; CHECK-P8-NEXT:    extsb r5, r6
-; CHECK-P8-NEXT:    mtvsrwa v2, r5
-; CHECK-P8-NEXT:    rldicl r5, r8, 8, 56
-; CHECK-P8-NEXT:    xscvsxddp f5, f5
-; CHECK-P8-NEXT:    extsb r5, r5
-; CHECK-P8-NEXT:    xscvsxddp f2, f2
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
-; CHECK-P8-NEXT:    xscvsxddp f1, f1
-; CHECK-P8-NEXT:    xxmrghd vs6, vs7, vs6
-; CHECK-P8-NEXT:    mtvsrwa v3, r5
-; CHECK-P8-NEXT:    li r5, 64
-; CHECK-P8-NEXT:    xscvsxddp f3, f3
-; CHECK-P8-NEXT:    xscvsxddp f4, f4
-; CHECK-P8-NEXT:    xscvsxddp f31, v2
-; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs5
-; CHECK-P8-NEXT:    xscvsxddp f7, v3
-; CHECK-P8-NEXT:    xscvsxddp f8, f8
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xscvsxddp f9, f9
-; CHECK-P8-NEXT:    xxswapd vs1, vs6
-; CHECK-P8-NEXT:    xscvsxddp f10, f10
+; CHECK-P8-NEXT:    vperm v1, v2, v2, v1
+; CHECK-P8-NEXT:    vperm v6, v2, v2, v6
+; CHECK-P8-NEXT:    vperm v7, v2, v2, v7
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v8
+; CHECK-P8-NEXT:    vsld v3, v3, v9
+; CHECK-P8-NEXT:    vsld v0, v0, v9
+; CHECK-P8-NEXT:    vsld v1, v1, v9
+; CHECK-P8-NEXT:    vsld v6, v6, v9
+; CHECK-P8-NEXT:    vsld v7, v7, v9
+; CHECK-P8-NEXT:    vsld v2, v2, v9
+; CHECK-P8-NEXT:    vsrad v7, v7, v9
+; CHECK-P8-NEXT:    vsrad v2, v2, v9
+; CHECK-P8-NEXT:    vsld v4, v4, v9
+; CHECK-P8-NEXT:    vsld v5, v5, v9
+; CHECK-P8-NEXT:    vsrad v6, v6, v9
+; CHECK-P8-NEXT:    vsrad v0, v0, v9
+; CHECK-P8-NEXT:    vsrad v1, v1, v9
+; CHECK-P8-NEXT:    xvcvsxddp vs2, v7
+; CHECK-P8-NEXT:    xvcvsxddp vs3, v2
+; CHECK-P8-NEXT:    vsrad v3, v3, v9
+; CHECK-P8-NEXT:    vsrad v4, v4, v9
+; CHECK-P8-NEXT:    vsrad v5, v5, v9
+; CHECK-P8-NEXT:    xvcvsxddp vs4, v6
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v1
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xscvsxddp f12, f12
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
-; CHECK-P8-NEXT:    xscvsxddp f13, f13
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    li r4, 96
-; CHECK-P8-NEXT:    xscvsxddp f11, f11
-; CHECK-P8-NEXT:    xxmrghd vs6, vs7, vs31
+; CHECK-P8-NEXT:    xvcvsxddp vs5, v0
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xxswapd vs2, vs6
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v5
+; CHECK-P8-NEXT:    xvcvsxddp vs6, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs7, v4
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xxswapd vs4, vs4
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
+; CHECK-P8-NEXT:    li r5, 64
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    xxswapd vs3, vs6
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
 ; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xxmrghd vs5, vs13, vs12
-; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xxmrghd vs1, vs11, vs10
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs2, vs7
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    stxvd2x vs5, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs4, 0, r3
-; CHECK-P8-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    li r4, 0
-; CHECK-P9-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    li r5, 1
-; CHECK-P9-NEXT:    li r6, 2
-; CHECK-P9-NEXT:    li r7, 3
-; CHECK-P9-NEXT:    li r8, 4
-; CHECK-P9-NEXT:    li r9, 5
-; CHECK-P9-NEXT:    li r10, 6
-; CHECK-P9-NEXT:    li r11, 7
-; CHECK-P9-NEXT:    li r12, 8
-; CHECK-P9-NEXT:    li r0, 9
-; CHECK-P9-NEXT:    li r30, 10
-; CHECK-P9-NEXT:    li r29, 11
-; CHECK-P9-NEXT:    li r28, 12
-; CHECK-P9-NEXT:    li r27, 13
-; CHECK-P9-NEXT:    li r26, 14
-; CHECK-P9-NEXT:    li r25, 15
-; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
-; CHECK-P9-NEXT:    vextubrx r5, r5, v2
-; CHECK-P9-NEXT:    vextubrx r6, r6, v2
-; CHECK-P9-NEXT:    vextubrx r7, r7, v2
-; CHECK-P9-NEXT:    vextubrx r8, r8, v2
-; CHECK-P9-NEXT:    vextubrx r9, r9, v2
-; CHECK-P9-NEXT:    vextubrx r10, r10, v2
-; CHECK-P9-NEXT:    vextubrx r11, r11, v2
-; CHECK-P9-NEXT:    vextubrx r12, r12, v2
-; CHECK-P9-NEXT:    vextubrx r0, r0, v2
-; CHECK-P9-NEXT:    vextubrx r30, r30, v2
-; CHECK-P9-NEXT:    vextubrx r29, r29, v2
-; CHECK-P9-NEXT:    vextubrx r28, r28, v2
-; CHECK-P9-NEXT:    vextubrx r27, r27, v2
-; CHECK-P9-NEXT:    vextubrx r26, r26, v2
-; CHECK-P9-NEXT:    vextubrx r25, r25, v2
-; CHECK-P9-NEXT:    extsb r4, r4
-; CHECK-P9-NEXT:    extsb r5, r5
-; CHECK-P9-NEXT:    extsb r6, r6
-; CHECK-P9-NEXT:    extsb r7, r7
-; CHECK-P9-NEXT:    extsb r8, r8
-; CHECK-P9-NEXT:    extsb r9, r9
-; CHECK-P9-NEXT:    extsb r10, r10
-; CHECK-P9-NEXT:    extsb r11, r11
-; CHECK-P9-NEXT:    extsb r12, r12
-; CHECK-P9-NEXT:    extsb r0, r0
-; CHECK-P9-NEXT:    extsb r30, r30
-; CHECK-P9-NEXT:    extsb r29, r29
-; CHECK-P9-NEXT:    extsb r28, r28
-; CHECK-P9-NEXT:    extsb r27, r27
-; CHECK-P9-NEXT:    extsb r26, r26
-; CHECK-P9-NEXT:    extsb r25, r25
-; CHECK-P9-NEXT:    mtvsrwa f0, r4
-; CHECK-P9-NEXT:    mtvsrwa f1, r5
-; CHECK-P9-NEXT:    mtvsrwa f2, r6
-; CHECK-P9-NEXT:    mtvsrwa f3, r7
-; CHECK-P9-NEXT:    mtvsrwa f4, r8
-; CHECK-P9-NEXT:    mtvsrwa f5, r9
-; CHECK-P9-NEXT:    mtvsrwa f6, r10
-; CHECK-P9-NEXT:    mtvsrwa f7, r11
-; CHECK-P9-NEXT:    mtvsrwa f8, r12
-; CHECK-P9-NEXT:    mtvsrwa f9, r0
-; CHECK-P9-NEXT:    mtvsrwa f10, r30
-; CHECK-P9-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f11, r29
-; CHECK-P9-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f12, r28
-; CHECK-P9-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa f13, r27
-; CHECK-P9-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa v2, r26
-; CHECK-P9-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrwa v3, r25
-; CHECK-P9-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xscvsxddp f0, f0
-; CHECK-P9-NEXT:    xscvsxddp f1, f1
-; CHECK-P9-NEXT:    xscvsxddp f2, f2
-; CHECK-P9-NEXT:    xscvsxddp f3, f3
-; CHECK-P9-NEXT:    xscvsxddp f4, f4
-; CHECK-P9-NEXT:    xscvsxddp f5, f5
-; CHECK-P9-NEXT:    xscvsxddp f6, f6
-; CHECK-P9-NEXT:    xscvsxddp f7, f7
-; CHECK-P9-NEXT:    xscvsxddp f8, f8
-; CHECK-P9-NEXT:    xscvsxddp f9, f9
-; CHECK-P9-NEXT:    xscvsxddp f10, f10
-; CHECK-P9-NEXT:    xscvsxddp f11, f11
-; CHECK-P9-NEXT:    xscvsxddp f12, f12
-; CHECK-P9-NEXT:    xscvsxddp f13, f13
-; CHECK-P9-NEXT:    xscvsxddp f31, v2
-; CHECK-P9-NEXT:    xscvsxddp f30, v3
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-P9-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-P9-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-P9-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-P9-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
+; CHECK-P9-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
+; CHECK-P9-NEXT:    addis r7, r2, .LCPI7_3 at toc@ha
+; CHECK-P9-NEXT:    addis r8, r2, .LCPI7_4 at toc@ha
+; CHECK-P9-NEXT:    addis r9, r2, .LCPI7_5 at toc@ha
+; CHECK-P9-NEXT:    addis r10, r2, .LCPI7_6 at toc@ha
+; CHECK-P9-NEXT:    addis r11, r2, .LCPI7_7 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
+; CHECK-P9-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
+; CHECK-P9-NEXT:    addi r7, r7, .LCPI7_3 at toc@l
+; CHECK-P9-NEXT:    addi r8, r8, .LCPI7_4 at toc@l
+; CHECK-P9-NEXT:    addi r9, r9, .LCPI7_5 at toc@l
+; CHECK-P9-NEXT:    addi r10, r10, .LCPI7_6 at toc@l
+; CHECK-P9-NEXT:    addi r11, r11, .LCPI7_7 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    lxvx v4, 0, r5
+; CHECK-P9-NEXT:    lxvx v5, 0, r6
+; CHECK-P9-NEXT:    lxvx v0, 0, r7
+; CHECK-P9-NEXT:    lxvx v1, 0, r8
+; CHECK-P9-NEXT:    lxvx v6, 0, r9
+; CHECK-P9-NEXT:    lxvx v7, 0, r10
+; CHECK-P9-NEXT:    lxvx v8, 0, r11
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P9-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P9-NEXT:    vperm v0, v2, v2, v0
+; CHECK-P9-NEXT:    vperm v1, v2, v2, v1
+; CHECK-P9-NEXT:    vperm v6, v2, v2, v6
+; CHECK-P9-NEXT:    vperm v7, v2, v2, v7
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v8
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    vextsb2d v4, v4
+; CHECK-P9-NEXT:    vextsb2d v5, v5
+; CHECK-P9-NEXT:    vextsb2d v0, v0
+; CHECK-P9-NEXT:    vextsb2d v1, v1
+; CHECK-P9-NEXT:    vextsb2d v6, v6
+; CHECK-P9-NEXT:    vextsb2d v7, v7
+; CHECK-P9-NEXT:    vextsb2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v4
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v5
+; CHECK-P9-NEXT:    xvcvsxddp vs3, v0
+; CHECK-P9-NEXT:    xvcvsxddp vs4, v1
+; CHECK-P9-NEXT:    xvcvsxddp vs5, v6
+; CHECK-P9-NEXT:    xvcvsxddp vs6, v7
+; CHECK-P9-NEXT:    xvcvsxddp vs7, v2
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
 ; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    stxv vs7, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs6, 96(r3)
 ; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    std r25, -72(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r26, -64(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    li r4, 1
-; CHECK-BE-NEXT:    std r30, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    li r5, 0
-; CHECK-BE-NEXT:    li r6, 3
-; CHECK-BE-NEXT:    li r7, 2
-; CHECK-BE-NEXT:    li r8, 5
-; CHECK-BE-NEXT:    li r9, 4
-; CHECK-BE-NEXT:    li r10, 7
-; CHECK-BE-NEXT:    li r11, 6
-; CHECK-BE-NEXT:    li r12, 9
-; CHECK-BE-NEXT:    li r0, 8
-; CHECK-BE-NEXT:    li r30, 11
-; CHECK-BE-NEXT:    li r29, 10
-; CHECK-BE-NEXT:    li r28, 13
-; CHECK-BE-NEXT:    li r27, 12
-; CHECK-BE-NEXT:    li r26, 15
-; CHECK-BE-NEXT:    li r25, 14
-; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
-; CHECK-BE-NEXT:    vextublx r5, r5, v2
-; CHECK-BE-NEXT:    vextublx r6, r6, v2
-; CHECK-BE-NEXT:    vextublx r7, r7, v2
-; CHECK-BE-NEXT:    vextublx r8, r8, v2
-; CHECK-BE-NEXT:    vextublx r9, r9, v2
-; CHECK-BE-NEXT:    vextublx r10, r10, v2
-; CHECK-BE-NEXT:    vextublx r11, r11, v2
-; CHECK-BE-NEXT:    vextublx r12, r12, v2
-; CHECK-BE-NEXT:    vextublx r0, r0, v2
-; CHECK-BE-NEXT:    vextublx r30, r30, v2
-; CHECK-BE-NEXT:    vextublx r29, r29, v2
-; CHECK-BE-NEXT:    vextublx r28, r28, v2
-; CHECK-BE-NEXT:    vextublx r27, r27, v2
-; CHECK-BE-NEXT:    vextublx r26, r26, v2
-; CHECK-BE-NEXT:    vextublx r25, r25, v2
-; CHECK-BE-NEXT:    extsb r4, r4
-; CHECK-BE-NEXT:    extsb r5, r5
-; CHECK-BE-NEXT:    extsb r6, r6
-; CHECK-BE-NEXT:    extsb r7, r7
-; CHECK-BE-NEXT:    extsb r8, r8
-; CHECK-BE-NEXT:    extsb r9, r9
-; CHECK-BE-NEXT:    extsb r10, r10
-; CHECK-BE-NEXT:    extsb r11, r11
-; CHECK-BE-NEXT:    extsb r12, r12
-; CHECK-BE-NEXT:    extsb r0, r0
-; CHECK-BE-NEXT:    extsb r30, r30
-; CHECK-BE-NEXT:    extsb r29, r29
-; CHECK-BE-NEXT:    extsb r28, r28
-; CHECK-BE-NEXT:    extsb r27, r27
-; CHECK-BE-NEXT:    extsb r26, r26
-; CHECK-BE-NEXT:    extsb r25, r25
-; CHECK-BE-NEXT:    mtvsrwa f0, r4
-; CHECK-BE-NEXT:    mtvsrwa f1, r5
-; CHECK-BE-NEXT:    mtvsrwa f2, r6
-; CHECK-BE-NEXT:    mtvsrwa f3, r7
-; CHECK-BE-NEXT:    mtvsrwa f4, r8
-; CHECK-BE-NEXT:    mtvsrwa f5, r9
-; CHECK-BE-NEXT:    mtvsrwa f6, r10
-; CHECK-BE-NEXT:    mtvsrwa f7, r11
-; CHECK-BE-NEXT:    mtvsrwa f8, r12
-; CHECK-BE-NEXT:    mtvsrwa f9, r0
-; CHECK-BE-NEXT:    mtvsrwa f10, r30
-; CHECK-BE-NEXT:    ld r30, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f11, r29
-; CHECK-BE-NEXT:    ld r29, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f12, r28
-; CHECK-BE-NEXT:    ld r28, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa f13, r27
-; CHECK-BE-NEXT:    ld r27, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa v2, r26
-; CHECK-BE-NEXT:    ld r26, -64(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrwa v3, r25
-; CHECK-BE-NEXT:    ld r25, -72(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    xscvsxddp f0, f0
-; CHECK-BE-NEXT:    xscvsxddp f1, f1
-; CHECK-BE-NEXT:    xscvsxddp f2, f2
-; CHECK-BE-NEXT:    xscvsxddp f3, f3
-; CHECK-BE-NEXT:    xscvsxddp f4, f4
-; CHECK-BE-NEXT:    xscvsxddp f5, f5
-; CHECK-BE-NEXT:    xscvsxddp f6, f6
-; CHECK-BE-NEXT:    xscvsxddp f7, f7
-; CHECK-BE-NEXT:    xscvsxddp f8, f8
-; CHECK-BE-NEXT:    xscvsxddp f9, f9
-; CHECK-BE-NEXT:    xscvsxddp f10, f10
-; CHECK-BE-NEXT:    xscvsxddp f11, f11
-; CHECK-BE-NEXT:    xscvsxddp f12, f12
-; CHECK-BE-NEXT:    xscvsxddp f13, f13
-; CHECK-BE-NEXT:    xscvsxddp f31, v2
-; CHECK-BE-NEXT:    xscvsxddp f30, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xxmrghd vs4, vs9, vs8
-; CHECK-BE-NEXT:    xxmrghd vs5, vs11, vs10
-; CHECK-BE-NEXT:    xxmrghd vs6, vs13, vs12
-; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs4, 64(r3)
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs7, 112(r3)
-; CHECK-BE-NEXT:    stxv vs6, 96(r3)
-; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
+; CHECK-BE-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
+; CHECK-BE-NEXT:    addis r7, r2, .LCPI7_3 at toc@ha
+; CHECK-BE-NEXT:    addis r8, r2, .LCPI7_4 at toc@ha
+; CHECK-BE-NEXT:    addis r9, r2, .LCPI7_5 at toc@ha
+; CHECK-BE-NEXT:    addis r10, r2, .LCPI7_6 at toc@ha
+; CHECK-BE-NEXT:    addis r11, r2, .LCPI7_7 at toc@ha
+; CHECK-BE-NEXT:    xxlxor v9, v9, v9
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
+; CHECK-BE-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
+; CHECK-BE-NEXT:    addi r7, r7, .LCPI7_3 at toc@l
+; CHECK-BE-NEXT:    addi r8, r8, .LCPI7_4 at toc@l
+; CHECK-BE-NEXT:    addi r9, r9, .LCPI7_5 at toc@l
+; CHECK-BE-NEXT:    addi r10, r10, .LCPI7_6 at toc@l
+; CHECK-BE-NEXT:    addi r11, r11, .LCPI7_7 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    lxvx v4, 0, r5
+; CHECK-BE-NEXT:    lxvx v5, 0, r6
+; CHECK-BE-NEXT:    lxvx v0, 0, r7
+; CHECK-BE-NEXT:    lxvx v1, 0, r8
+; CHECK-BE-NEXT:    lxvx v6, 0, r9
+; CHECK-BE-NEXT:    lxvx v7, 0, r10
+; CHECK-BE-NEXT:    lxvx v8, 0, r11
+; CHECK-BE-NEXT:    vperm v3, v9, v2, v3
+; CHECK-BE-NEXT:    vperm v4, v9, v2, v4
+; CHECK-BE-NEXT:    vperm v5, v9, v2, v5
+; CHECK-BE-NEXT:    vperm v0, v9, v2, v0
+; CHECK-BE-NEXT:    vperm v1, v2, v2, v1
+; CHECK-BE-NEXT:    vperm v6, v2, v2, v6
+; CHECK-BE-NEXT:    vperm v7, v2, v2, v7
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v8
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    vextsb2d v4, v4
+; CHECK-BE-NEXT:    vextsb2d v5, v5
+; CHECK-BE-NEXT:    vextsb2d v0, v0
+; CHECK-BE-NEXT:    vextsb2d v1, v1
+; CHECK-BE-NEXT:    vextsb2d v6, v6
+; CHECK-BE-NEXT:    vextsb2d v7, v7
+; CHECK-BE-NEXT:    vextsb2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v5
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v0
+; CHECK-BE-NEXT:    xvcvsxddp vs4, v1
+; CHECK-BE-NEXT:    xvcvsxddp vs5, v6
+; CHECK-BE-NEXT:    xvcvsxddp vs6, v7
+; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
+; CHECK-BE-NEXT:    stxv vs3, 112(r3)
+; CHECK-BE-NEXT:    stxv vs2, 80(r3)
+; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs7, 96(r3)
+; CHECK-BE-NEXT:    stxv vs6, 64(r3)
+; CHECK-BE-NEXT:    stxv vs5, 32(r3)
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = sitofp <16 x i8> %a to <16 x double>

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx.ll?rev=350155&r1=350154&r2=350155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx.ll Sat Dec 29 05:40:48 2018
@@ -1099,20 +1099,15 @@ define <2 x double> @test69(<2 x i16> %a
   ret <2 x double> %w
 
 ; CHECK-LABEL: @test69
-; CHECK-DAG: lfiwax f0, 0, r3
-; CHECK-DAG: lfiwax f1, 0, r3
-; CHECK-DAG: xscvsxddp f0, f0
-; CHECK-DAG: xscvsxddp f1, f1
-; CHECK: xxmrghd v2, vs1, vs0
+; CHECK-DAG: lxvd2x v2, 0, r3
+; CHECK-DAG: xvcvsxddp v2, v2
 ; CHECK: blr
 
 ; CHECK-LE-LABEL: @test69
-; CHECK-LE: mfvsrd
-; CHECK-LE: mtvsrwa
-; CHECK-LE: mtvsrwa
-; CHECK-LE: xscvsxddp
-; CHECK-LE: xscvsxddp
-; CHECK-LE: xxmrghd
+; CHECK-LE: vperm
+; CHECK-LE: vsld
+; CHECK-LE: vsrad
+; CHECK-LE: xvcvsxddp v2, v2
 ; CHECK-LE: blr
 }
 
@@ -1122,20 +1117,15 @@ define <2 x double> @test70(<2 x i8> %a)
   ret <2 x double> %w
 
 ; CHECK-LABEL: @test70
-; CHECK-DAG: lfiwax f0, 0, r3
-; CHECK-DAG: lfiwax f1, 0, r3
-; CHECK-DAG: xscvsxddp f0, f0
-; CHECK-DAG: xscvsxddp f1, f1
-; CHECK: xxmrghd v2, vs1, vs0
+; CHECK-DAG: lxvd2x v2, 0, r3
+; CHECK-DAG: xvcvsxddp v2, v2
 ; CHECK: blr
 
 ; CHECK-LE-LABEL: @test70
-; CHECK-LE: mfvsrd
-; CHECK-LE: mtvsrwa
-; CHECK-LE: mtvsrwa
-; CHECK-LE: xscvsxddp
-; CHECK-LE: xscvsxddp
-; CHECK-LE: xxmrghd
+; CHECK-LE: vperm
+; CHECK-LE: vsld
+; CHECK-LE: vsrad
+; CHECK-LE: xvcvsxddp v2, v2
 ; CHECK-LE: blr
 }
 




More information about the llvm-commits mailing list