[llvm] 03e7fef - [PowerPC] Canonicalize shuffles on big endian targets as well

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 20 05:29:56 PDT 2021


Author: Nemanja Ivanovic
Date: 2021-04-20T07:29:47-05:00
New Revision: 03e7fefff8caa6891cbb510283fa8c40247a9b0c

URL: https://github.com/llvm/llvm-project/commit/03e7fefff8caa6891cbb510283fa8c40247a9b0c
DIFF: https://github.com/llvm/llvm-project/commit/03e7fefff8caa6891cbb510283fa8c40247a9b0c.diff

LOG: [PowerPC] Canonicalize shuffles on big endian targets as well

Extend shuffle canonicalization and conversion of shuffles fed by vectorized
scalars to big endian subtargets. For big endian subtargets, loads and direct
moves of scalars into vector registers put the data in the correct element for
SCALAR_TO_VECTOR if the data type is 8 bytes wide. However, if the data type is
narrower, the value still ends up in the wrong place - althouth a different
wrong place than on little endian targets.

This patch extends the combine that keeps values where they are if they feed a
shuffle to big endian targets.

Differential revision: https://reviews.llvm.org/D100478

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
    llvm/test/CodeGen/PowerPC/build-vector-tests.ll
    llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
    llvm/test/CodeGen/PowerPC/fp-strict-round.ll
    llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
    llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
    llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll
    llvm/test/CodeGen/PowerPC/pr25080.ll
    llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
    llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
    llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
    llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
    llvm/test/CodeGen/PowerPC/vec-itofp.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_int_ext.ll
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
    llvm/test/CodeGen/PowerPC/vsx.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c62fedf93fc9..37e156808f6e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9562,7 +9562,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
     // which is strictly wider than the loaded value by 8 bytes. So we need to
     // adjust the splat index to point to the correct address in memory.
     if (IsPermutedLoad) {
-      assert(isLittleEndian && "Unexpected permuted load on big endian target");
+      assert((isLittleEndian || IsFourByte) &&
+             "Unexpected size for permuted load on big endian target");
       SplatIdx += IsFourByte ? 2 : 1;
       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
              "Splat of a value outside of the loaded memory");
@@ -9577,6 +9578,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
       else
         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
 
+      // If the width of the load is the same as the width of the splat,
+      // loading with an offset would load the wrong memory.
+      if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
+        Offset = 0;
+
       SDValue BasePtr = LD->getBasePtr();
       if (Offset != 0)
         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
@@ -14200,13 +14206,24 @@ static SDValue isScalarToVec(SDValue Op) {
   return SDValue();
 }
 
+// Fix up the shuffle mask to account for the fact that the result of
+// scalar_to_vector is not in lane zero. This just takes all values in
+// the ranges specified by the min/max indices and adds the number of
+// elements required to ensure each element comes from the respective
+// position in the valid lane.
+// On little endian, that's just the corresponding element in the other
+// half of the vector. On big endian, it is in the same half but right
+// justified rather than left justified in that half.
 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
                                             int LHSMaxIdx, int RHSMinIdx,
-                                            int RHSMaxIdx, int HalfVec) {
+                                            int RHSMaxIdx, int HalfVec,
+                                            unsigned ValidLaneWidth,
+                                            const PPCSubtarget &Subtarget) {
   for (int i = 0, e = ShuffV.size(); i < e; i++) {
     int Idx = ShuffV[i];
     if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
-      ShuffV[i] += HalfVec;
+      ShuffV[i] +=
+          Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
   }
 }
 
@@ -14215,7 +14232,8 @@ static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
 // In such a case, just change the shuffle mask to extract the element
 // from the permuted index.
-static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
+static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
+                               const PPCSubtarget &Subtarget) {
   SDLoc dl(OrigSToV);
   EVT VT = OrigSToV.getValueType();
   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
@@ -14229,8 +14247,14 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
     // Can't handle non-const element indices or 
diff erent vector types
     // for the input to the extract and the output of the scalar_to_vector.
     if (Idx && VT == OrigVector.getValueType()) {
-      SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
-      NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
+      unsigned NumElts = VT.getVectorNumElements();
+      assert(
+          NumElts > 1 &&
+          "Cannot produce a permuted scalar_to_vector for one element vector");
+      SmallVector<int, 16> NewMask(NumElts, -1);
+      unsigned ResultInElt = NumElts / 2;
+      ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
+      NewMask[ResultInElt] = Idx->getZExtValue();
       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
     }
   }
@@ -14246,6 +14270,10 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
 // to put the value into element zero. Adjust the shuffle mask so that the
 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
+// On big endian targets, this is still useful for SCALAR_TO_VECTOR
+// nodes with elements smaller than doubleword because all the ways
+// of getting scalar data into a vector register put the value in the
+// rightmost element of the left half of the vector.
 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
                                                 SelectionDAG &DAG) const {
   SDValue LHS = SVN->getOperand(0);
@@ -14254,10 +14282,12 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
   int NumElts = LHS.getValueType().getVectorNumElements();
   SDValue Res(SVN, 0);
   SDLoc dl(SVN);
+  bool IsLittleEndian = Subtarget.isLittleEndian();
 
-  // None of these combines are useful on big endian systems since the ISA
-  // already has a big endian bias.
-  if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
+  // On little endian targets, do these combines on all VSX targets since
+  // canonical shuffles match efficient permutes. On big endian targets,
+  // this is only useful for targets with direct moves.
+  if (!Subtarget.hasDirectMove() && !(IsLittleEndian && Subtarget.hasVSX()))
     return Res;
 
   // If this is not a shuffle of a shuffle and the first element comes from
@@ -14280,6 +14310,18 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
                             : SToVRHS.getValueType().getVectorNumElements();
     int NumEltsOut = ShuffV.size();
+    unsigned InElemSizeInBits =
+        SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits()
+                : SToVRHS.getValueType().getScalarSizeInBits();
+    unsigned OutElemSizeInBits = SToVLHS
+                                     ? LHS.getValueType().getScalarSizeInBits()
+                                     : RHS.getValueType().getScalarSizeInBits();
+
+    // The width of the "valid lane" (i.e. the lane that contains the value that
+    // is vectorized) needs to be expressed in terms of the number of elements
+    // of the shuffle. It is thereby the ratio of the values before and after
+    // any bitcast.
+    unsigned ValidLaneWidth = InElemSizeInBits / OutElemSizeInBits;
 
     // Initially assume that neither input is permuted. These will be adjusted
     // accordingly if either input is.
@@ -14290,18 +14332,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
 
     // Get the permuted scalar to vector nodes for the source(s) that come from
     // ISD::SCALAR_TO_VECTOR.
+    // On big endian systems, this only makes sense for element sizes smaller
+    // than 64 bits since for 64-bit elements, all instructions already put
+    // the value into element zero.
     if (SToVLHS) {
+      if (!IsLittleEndian && InElemSizeInBits >= 64)
+        return Res;
       // Set up the values for the shuffle vector fixup.
       LHSMaxIdx = NumEltsOut / NumEltsIn;
-      SToVLHS = getSToVPermuted(SToVLHS, DAG);
+      SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
       if (SToVLHS.getValueType() != LHS.getValueType())
         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
       LHS = SToVLHS;
     }
     if (SToVRHS) {
+      if (!IsLittleEndian && InElemSizeInBits >= 64)
+        return Res;
       RHSMinIdx = NumEltsOut;
       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
-      SToVRHS = getSToVPermuted(SToVRHS, DAG);
+      SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
       if (SToVRHS.getValueType() != RHS.getValueType())
         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
       RHS = SToVRHS;
@@ -14311,10 +14360,9 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
     // The minimum and maximum indices that correspond to element zero for both
     // the LHS and RHS are computed and will control which shuffle mask entries
     // are to be changed. For example, if the RHS is permuted, any shuffle mask
-    // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
-    // HalfVec to refer to the corresponding element in the permuted vector.
+    // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
     fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
-                                    HalfVec);
+                                    HalfVec, ValidLaneWidth, Subtarget);
     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
 
     // We may have simplified away the shuffle. We won't be able to do anything
@@ -14324,12 +14372,13 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
   }
 
+  SDValue TheSplat = IsLittleEndian ? RHS : LHS;
   // The common case after we commuted the shuffle is that the RHS is a splat
   // and we have elements coming in from the splat at indices that are not
   // conducive to using a merge.
   // Example:
   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
-  if (!isSplatBV(RHS))
+  if (!isSplatBV(TheSplat))
     return Res;
 
   // We are looking for a mask such that all even elements are from
@@ -14339,24 +14388,41 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
 
   // Adjust the mask so we are pulling in the same index from the splat
   // as the index from the interesting vector in consecutive elements.
-  // Example (even elements from first vector):
-  // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
-  if (Mask[0] < NumElts)
-    for (int i = 1, e = Mask.size(); i < e; i += 2)
-      ShuffV[i] = (ShuffV[i - 1] + NumElts);
-  // Example (odd elements from first vector):
-  // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
-  else
-    for (int i = 0, e = Mask.size(); i < e; i += 2)
-      ShuffV[i] = (ShuffV[i + 1] + NumElts);
+  if (IsLittleEndian) {
+    // Example (even elements from first vector):
+    // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
+    if (Mask[0] < NumElts)
+      for (int i = 1, e = Mask.size(); i < e; i += 2)
+        ShuffV[i] = (ShuffV[i - 1] + NumElts);
+    // Example (odd elements from first vector):
+    // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
+    else
+      for (int i = 0, e = Mask.size(); i < e; i += 2)
+        ShuffV[i] = (ShuffV[i + 1] + NumElts);
+  } else {
+    // Example (even elements from first vector):
+    // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
+    if (Mask[0] < NumElts)
+      for (int i = 0, e = Mask.size(); i < e; i += 2)
+        ShuffV[i] = ShuffV[i + 1] - NumElts;
+    // Example (odd elements from first vector):
+    // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
+    else
+      for (int i = 1, e = Mask.size(); i < e; i += 2)
+        ShuffV[i] = ShuffV[i - 1] - NumElts;
+  }
 
   // If the RHS has undefs, we need to remove them since we may have created
   // a shuffle that adds those instead of the splat value.
-  SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
-  RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
+  SDValue SplatVal =
+      cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
+  TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
 
-  Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
-  return Res;
+  if (IsLittleEndian)
+    RHS = TheSplat;
+  else
+    LHS = TheSplat;
+  return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
 }
 
 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 869e06c49365..e57f299dd895 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3088,6 +3088,8 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in {
   def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
   def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
             (STXVW4X $rS, xoaddr:$dst)>;
+  def : Pat<(v2i64 (scalar_to_vector (i64 (load xoaddr:$src)))),
+           (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>;
 } // HasVSX, HasOnlySwappingMemOps, IsBigEndian
 
 // Any Power8 VSX subtarget.
@@ -3181,8 +3183,7 @@ def : Pat<DWToSPExtractConv.El1US1,
           (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
 
 // v4f32 scalar <-> vector conversions (BE)
-def : Pat<(v4f32 (scalar_to_vector f32:$A)),
-          (v4f32 (XSCVDPSPN $A))>;
+defm : ScalToVecWPermute<v4f32, (f32 f32:$A), (XSCVDPSPN $A), (XSCVDPSPN $A)>;
 def : Pat<(f32 (vector_extract v4f32:$S, 0)),
           (f32 (XSCVSPDPN $S))>;
 def : Pat<(f32 (vector_extract v4f32:$S, 1)),
@@ -3228,10 +3229,14 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
           (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64))>;
 def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
           (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64))>;
-def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
-          (v4i32 (XXSLDWIs (LIWZX xoaddr:$src), 1))>;
-def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
-          (v4f32 (XXSLDWIs (LIWZX xoaddr:$src), 1))>;
+defm : ScalToVecWPermute<
+  v4i32, (i32 (load xoaddr:$src)),
+  (XXSLDWIs (LIWZX xoaddr:$src), 1),
+  (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
+defm : ScalToVecWPermute<
+  v4f32, (f32 (load xoaddr:$src)),
+  (XXSLDWIs (LIWZX xoaddr:$src), 1),
+  (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
 
 def : Pat<DWToSPExtractConv.BVU,
           (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
@@ -3272,12 +3277,9 @@ def : Pat<DWToSPExtractConv.El1US1,
                             (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
 
 // v4f32 scalar <-> vector conversions (LE)
-  // The permuted version is no better than the version that puts the value
-  // into the right element because XSCVDPSPN is 
diff erent from all the other
-  // instructions used for PPCSToV.
   defm : ScalToVecWPermute<v4f32, (f32 f32:$A),
                            (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1),
-                           (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 3)>;
+                           (XSCVDPSPN $A)>;
 def : Pat<(f32 (vector_extract v4f32:$S, 0)),
           (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
 def : Pat<(f32 (vector_extract v4f32:$S, 1)),
@@ -3439,12 +3441,18 @@ def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
 // Big endian VSX subtarget with direct moves.
 let Predicates = [HasVSX, HasDirectMove, IsBigEndian] in {
 // v16i8 scalar <-> vector conversions (BE)
-def : Pat<(v16i8 (scalar_to_vector i32:$A)),
-          (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
-def : Pat<(v8i16 (scalar_to_vector i32:$A)),
-          (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
-def : Pat<(v4i32 (scalar_to_vector i32:$A)),
-          (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
+defm : ScalToVecWPermute<
+  v16i8, (i32 i32:$A),
+  (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64),
+  (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
+defm : ScalToVecWPermute<
+  v8i16, (i32 i32:$A),
+  (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64),
+  (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
+defm : ScalToVecWPermute<
+  v4i32, (i32 i32:$A),
+  (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64),
+  (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
 def : Pat<(v2i64 (scalar_to_vector i64:$A)),
           (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
 
@@ -3770,33 +3778,39 @@ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
 // Build vectors from i8 loads
 defm : ScalToVecWPermute<v8i16, ScalarLoads.ZELi8,
                          (VSPLTHs 3, (LXSIBZX xoaddr:$src)),
-                         (VSPLTHs 3, (LXSIBZX xoaddr:$src))>;
+                         (SUBREG_TO_REG (i64 1), (LXSIBZX xoaddr:$src), sub_64)>;
 defm : ScalToVecWPermute<v4i32, ScalarLoads.ZELi8,
                          (XXSPLTWs (LXSIBZX xoaddr:$src), 1),
-                         (XXSPLTWs (LXSIBZX xoaddr:$src), 1)>;
+                         (SUBREG_TO_REG (i64 1), (LXSIBZX xoaddr:$src), sub_64)>;
 defm : ScalToVecWPermute<v2i64, ScalarLoads.ZELi8i64,
                          (XXPERMDIs (LXSIBZX xoaddr:$src), 0),
-                         (XXPERMDIs (LXSIBZX xoaddr:$src), 0)>;
-defm : ScalToVecWPermute<v4i32, ScalarLoads.SELi8,
-                         (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1),
-                         (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1)>;
-defm : ScalToVecWPermute<v2i64, ScalarLoads.SELi8i64,
-                         (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0),
-                         (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0)>;
+                         (SUBREG_TO_REG (i64 1), (LXSIBZX xoaddr:$src), sub_64)>;
+defm : ScalToVecWPermute<
+  v4i32, ScalarLoads.SELi8,
+  (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1),
+  (SUBREG_TO_REG (i64 1), (VEXTSB2Ws (LXSIBZX xoaddr:$src)), sub_64)>;
+defm : ScalToVecWPermute<
+  v2i64, ScalarLoads.SELi8i64,
+  (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0),
+  (SUBREG_TO_REG (i64 1), (VEXTSB2Ds (LXSIBZX xoaddr:$src)), sub_64)>;
 
 // Build vectors from i16 loads
-defm : ScalToVecWPermute<v4i32, ScalarLoads.ZELi16,
-                         (XXSPLTWs (LXSIHZX xoaddr:$src), 1),
-                         (XXSPLTWs (LXSIHZX xoaddr:$src), 1)>;
-defm : ScalToVecWPermute<v2i64, ScalarLoads.ZELi16i64,
-                         (XXPERMDIs (LXSIHZX xoaddr:$src), 0),
-                         (XXPERMDIs (LXSIHZX xoaddr:$src), 0)>;
-defm : ScalToVecWPermute<v4i32, ScalarLoads.SELi16,
-                         (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1),
-                         (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1)>;
-defm : ScalToVecWPermute<v2i64, ScalarLoads.SELi16i64,
-                         (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0),
-                         (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0)>;
+defm : ScalToVecWPermute<
+  v4i32, ScalarLoads.ZELi16,
+  (XXSPLTWs (LXSIHZX xoaddr:$src), 1),
+  (SUBREG_TO_REG (i64 1), (LXSIHZX xoaddr:$src), sub_64)>;
+defm : ScalToVecWPermute<
+  v2i64, ScalarLoads.ZELi16i64,
+  (XXPERMDIs (LXSIHZX xoaddr:$src), 0),
+  (SUBREG_TO_REG (i64 1), (LXSIHZX xoaddr:$src), sub_64)>;
+defm : ScalToVecWPermute<
+  v4i32, ScalarLoads.SELi16,
+  (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1),
+  (SUBREG_TO_REG (i64 1), (VEXTSH2Ws (LXSIHZX xoaddr:$src)), sub_64)>;
+defm : ScalToVecWPermute<
+  v2i64, ScalarLoads.SELi16i64,
+  (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0),
+  (SUBREG_TO_REG (i64 1), (VEXTSH2Ds (LXSIHZX xoaddr:$src)), sub_64)>;
 
 // Load/convert and convert/store patterns for f16.
 def : Pat<(f64 (extloadf16 xoaddr:$src)),
@@ -3938,7 +3952,8 @@ def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
                                  VSSRC))>;
 
 // Endianness-neutral patterns for const splats with ISA 3.0 instructions.
-defm : ScalToVecWPermute<v4i32, (i32 i32:$A), (MTVSRWS $A), (MTVSRWS $A)>;
+defm : ScalToVecWPermute<v4i32, (i32 i32:$A), (MTVSRWS $A),
+                         (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
 def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
           (v4i32 (MTVSRWS $A))>;
 def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
@@ -3950,12 +3965,14 @@ def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
                                immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
                                immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
           (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
-defm : ScalToVecWPermute<v4i32, FltToIntLoad.A,
-                         (XVCVSPSXWS (LXVWSX xoaddr:$A)),
-                         (XVCVSPSXWS (LXVWSX xoaddr:$A))>;
-defm : ScalToVecWPermute<v4i32, FltToUIntLoad.A,
-                         (XVCVSPUXWS (LXVWSX xoaddr:$A)),
-                         (XVCVSPUXWS (LXVWSX xoaddr:$A))>;
+defm : ScalToVecWPermute<
+  v4i32, FltToIntLoad.A,
+  (XVCVSPSXWS (LXVWSX xoaddr:$A)),
+  (XVCVSPSXWS (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$A), sub_64))>;
+defm : ScalToVecWPermute<
+  v4i32, FltToUIntLoad.A,
+  (XVCVSPUXWS (LXVWSX xoaddr:$A)),
+  (XVCVSPUXWS (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$A), sub_64))>;
 defm : ScalToVecWPermute<
   v4i32, DblToIntLoadP9.A,
   (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64), 1),
@@ -3991,13 +4008,15 @@ let Predicates = [HasVSX, HasP9Vector, NoP10Vector] in {
 // COPY_TO_REGCLASS. The COPY_TO_REGCLASS makes it appear to need two instructions 
 // to perform the operation, when only one instruction is produced in practice.
 // The NoP10Vector predicate excludes these patterns from Power10 VSX subtargets.
-defm : ScalToVecWPermute<v16i8, ScalarLoads.Li8,
-                         (VSPLTBs 7, (LXSIBZX xoaddr:$src)),
-                         (VSPLTBs 7, (LXSIBZX xoaddr:$src))>;
+defm : ScalToVecWPermute<
+  v16i8, ScalarLoads.Li8,
+  (VSPLTBs 7, (LXSIBZX xoaddr:$src)),
+  (SUBREG_TO_REG (i64 1), (LXSIBZX xoaddr:$src), sub_64)>;
 // Build vectors from i16 loads
-defm : ScalToVecWPermute<v8i16, ScalarLoads.Li16,
-                         (VSPLTHs 3, (LXSIHZX xoaddr:$src)),
-                         (VSPLTHs 3, (LXSIHZX xoaddr:$src))>;
+defm : ScalToVecWPermute<
+  v8i16, ScalarLoads.Li16,
+  (VSPLTHs 3, (LXSIHZX xoaddr:$src)),
+  (SUBREG_TO_REG (i64 1), (LXSIHZX xoaddr:$src), sub_64)>;
 } // HasVSX, HasP9Vector, NoP10Vector
 
 // Any big endian Power9 VSX subtarget
@@ -4005,13 +4024,15 @@ let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in {
 // Power10 VSX subtargets produce a shorter pattern for little endian targets
 // but this is still the best pattern for Power9 and Power10 VSX big endian
 // Build vectors from i8 loads
-defm : ScalToVecWPermute<v16i8, ScalarLoads.Li8,
-                         (VSPLTBs 7, (LXSIBZX xoaddr:$src)),
-                         (VSPLTBs 7, (LXSIBZX xoaddr:$src))>;
+defm : ScalToVecWPermute<
+  v16i8, ScalarLoads.Li8,
+  (VSPLTBs 7, (LXSIBZX xoaddr:$src)),
+  (SUBREG_TO_REG (i64 1), (LXSIBZX xoaddr:$src), sub_64)>;
 // Build vectors from i16 loads
-defm : ScalToVecWPermute<v8i16, ScalarLoads.Li16,
-                         (VSPLTHs 3, (LXSIHZX xoaddr:$src)),
-                         (VSPLTHs 3, (LXSIHZX xoaddr:$src))>;
+defm : ScalToVecWPermute<
+  v8i16, ScalarLoads.Li16,
+  (VSPLTHs 3, (LXSIHZX xoaddr:$src)),
+  (SUBREG_TO_REG (i64 1), (LXSIHZX xoaddr:$src), sub_64)>;
 } // HasVSX, HasP9Vector, NoP10Vector
 
 // Big endian 64Bit Power9 subtarget.

diff  --git a/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
index 921c3be960f6..11325b55e05f 100644
--- a/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
@@ -12,10 +12,8 @@
 define <16 x i8> @buildc(i8 zeroext %a) {
 ; CHECK-LABEL: buildc:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi 4, 1, -16
-; CHECK-NEXT:    stb 3, -16(1)
-; CHECK-NEXT:    lxvw4x 34, 0, 4
-; CHECK-NEXT:    vspltb 2, 2, 0
+; CHECK-NEXT:    mtvsrwz 34, 3
+; CHECK-NEXT:    vspltb 2, 2, 7
 ; CHECK-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <16 x i8> undef, i8 %a, i32 0
@@ -78,7 +76,7 @@ define <4 x float> @buildf(float %a) {
 ; CHECK-LABEL: buildf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpspn 0, 1
-; CHECK-NEXT:    xxspltw 34, 0, 0
+; CHECK-NEXT:    xxspltw 34, 0, 1
 ; CHECK-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <4 x float> undef, float %a, i32 0

diff  --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index 1cb7d7b62055..ecc75307a596 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -1275,8 +1275,7 @@ define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) {
 ; P8BE-LABEL: spltMemVali:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    lfiwzx f0, 0, r3
-; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
-; P8BE-NEXT:    xxspltw v2, vs0, 0
+; P8BE-NEXT:    xxspltw v2, vs0, 1
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltMemVali:
@@ -1712,14 +1711,16 @@ entry:
 define <4 x i32> @spltMemValConvftoi(float* nocapture readonly %ptr) {
 ; P9BE-LABEL: spltMemValConvftoi:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    lxvwsx vs0, 0, r3
-; P9BE-NEXT:    xvcvspsxws v2, vs0
+; P9BE-NEXT:    lfiwzx f0, 0, r3
+; P9BE-NEXT:    xvcvspsxws vs0, vs0
+; P9BE-NEXT:    xxspltw v2, vs0, 1
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: spltMemValConvftoi:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    lxvwsx vs0, 0, r3
-; P9LE-NEXT:    xvcvspsxws v2, vs0
+; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-NEXT:    xvcvspsxws vs0, vs0
+; P9LE-NEXT:    xxspltw v2, vs0, 1
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: spltMemValConvftoi:
@@ -2793,8 +2794,7 @@ define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) {
 ; P8BE-LABEL: spltMemValui:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    lfiwzx f0, 0, r3
-; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
-; P8BE-NEXT:    xxspltw v2, vs0, 0
+; P8BE-NEXT:    xxspltw v2, vs0, 1
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltMemValui:
@@ -3231,14 +3231,16 @@ entry:
 define <4 x i32> @spltMemValConvftoui(float* nocapture readonly %ptr) {
 ; P9BE-LABEL: spltMemValConvftoui:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    lxvwsx vs0, 0, r3
-; P9BE-NEXT:    xvcvspuxws v2, vs0
+; P9BE-NEXT:    lfiwzx f0, 0, r3
+; P9BE-NEXT:    xvcvspuxws vs0, vs0
+; P9BE-NEXT:    xxspltw v2, vs0, 1
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: spltMemValConvftoui:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    lxvwsx vs0, 0, r3
-; P9LE-NEXT:    xvcvspuxws v2, vs0
+; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-NEXT:    xvcvspuxws vs0, vs0
+; P9LE-NEXT:    xxspltw v2, vs0, 1
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: spltMemValConvftoui:

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 35b590dec1b1..8cf354e38cd7 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -5,6 +5,9 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
 ; RUN: FileCheck %s --check-prefix=CHECK-P9
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-P9-BE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:     -mcpu=pwr8 -mattr=-vsx -ppc-asm-full-reg-names \
 ; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-NOVSX
@@ -23,6 +26,11 @@ define dso_local <16 x i8> @testmrghb(<16 x i8> %a, <16 x i8> %b) local_unnamed_
 ; CHECK-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrghb:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrghb:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    vmrghb v2, v3, v2
@@ -47,6 +55,11 @@ define dso_local <16 x i8> @testmrghb2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P9-NEXT:    vmrghb v2, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrghb2:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrghb2:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
@@ -74,6 +87,11 @@ define dso_local <16 x i8> @testmrghh(<16 x i8> %a, <16 x i8> %b) local_unnamed_
 ; CHECK-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrghh:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrghh:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    vmrghh v2, v3, v2
@@ -98,6 +116,11 @@ define dso_local <16 x i8> @testmrghh2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrghh2:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrghh2:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
@@ -125,6 +148,11 @@ define dso_local <16 x i8> @testmrglb(<16 x i8> %a, <16 x i8> %b) local_unnamed_
 ; CHECK-P9-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrglb:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrglb:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    vmrglb v2, v3, v2
@@ -149,6 +177,11 @@ define dso_local <16 x i8> @testmrglb2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrglb2:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrghb v2, v3, v2
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrglb2:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
@@ -176,6 +209,11 @@ define dso_local <16 x i8> @testmrglh(<16 x i8> %a, <16 x i8> %b) local_unnamed_
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrglh:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrglh:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    vmrglh v2, v3, v2
@@ -200,6 +238,11 @@ define dso_local <16 x i8> @testmrglh2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrglh2:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrglh2:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
@@ -227,6 +270,11 @@ define dso_local <16 x i8> @testmrghw(<16 x i8> %a, <16 x i8> %b) local_unnamed_
 ; CHECK-P9-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrghw:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrglw v2, v2, v3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrghw:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    vmrghw v2, v3, v2
@@ -251,6 +299,11 @@ define dso_local <16 x i8> @testmrghw2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrghw2:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrghw2:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
@@ -278,6 +331,11 @@ define dso_local <16 x i8> @testmrglw(<16 x i8> %a, <16 x i8> %b) local_unnamed_
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrglw:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrglw:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    vmrglw v2, v3, v2
@@ -302,6 +360,11 @@ define dso_local <16 x i8> @testmrglw2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P9-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrglw2:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrglw2:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI11_0 at toc@ha
@@ -334,6 +397,16 @@ define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_un
 ; CHECK-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testmrglb3:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    lxsd v2, 0(r3)
+; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI12_0 at toc@ha
+; CHECK-P9-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI12_0 at toc@l
+; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
+; CHECK-P9-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testmrglb3:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    vxor v2, v2, v2
@@ -389,6 +462,20 @@ define dso_local void @no_crash_elt0_from_RHS(<2 x double>* noalias nocapture de
 ; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-P9-NEXT:    stxv vs0, 0(r30)
 ;
+; CHECK-P9-BE-LABEL: no_crash_elt0_from_RHS:
+; CHECK-P9-BE:       # %bb.0: # %test_entry
+; CHECK-P9-BE-NEXT:    mflr r0
+; CHECK-P9-BE-NEXT:    std r0, 16(r1)
+; CHECK-P9-BE-NEXT:    stdu r1, -128(r1)
+; CHECK-P9-BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; CHECK-P9-BE-NEXT:    mr r30, r3
+; CHECK-P9-BE-NEXT:    bl dummy
+; CHECK-P9-BE-NEXT:    nop
+; CHECK-P9-BE-NEXT:    xxlxor f0, f0, f0
+; CHECK-P9-BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; CHECK-P9-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-P9-BE-NEXT:    stxv vs0, 0(r30)
+;
 ; CHECK-NOVSX-LABEL: no_crash_elt0_from_RHS:
 ; CHECK-NOVSX:       # %bb.0: # %test_entry
 ; CHECK-NOVSX-NEXT:    mflr r0
@@ -435,9 +522,15 @@ define dso_local <16 x i8> @no_crash_bitcast(i32 %a) {
 ;
 ; CHECK-P9-LABEL: no_crash_bitcast:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: no_crash_bitcast:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-P9-BE-NEXT:    vmrghw v2, v2, v2
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: no_crash_bitcast:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    addis r4, r2, .LCPI14_0 at toc@ha
@@ -482,6 +575,17 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_
 ; CHECK-P9-NEXT:    vmrgow v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: replace_undefs_in_splat:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI15_0 at toc@ha
+; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI15_0 at toc@l
+; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
+; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI15_1 at toc@ha
+; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI15_1 at toc@l
+; CHECK-P9-BE-NEXT:    lxvx v4, 0, r3
+; CHECK-P9-BE-NEXT:    vperm v2, v2, v4, v3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: replace_undefs_in_splat:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI15_0 at toc@ha
@@ -531,6 +635,14 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture re
 ; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: no_RAUW_in_combine_during_legalize:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    sldi r4, r4, 2
+; CHECK-P9-BE-NEXT:    xxlxor v3, v3, v3
+; CHECK-P9-BE-NEXT:    lxsiwzx v2, r3, r4
+; CHECK-P9-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: no_RAUW_in_combine_during_legalize:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    sldi r4, r4, 2
@@ -577,6 +689,12 @@ define dso_local <4 x i32> @testSplat4Low(<8 x i8>* nocapture readonly %ptr) loc
 ; CHECK-P9-NEXT:    lxvwsx v2, 0, r3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testSplat4Low:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    addi r3, r3, 4
+; CHECK-P9-BE-NEXT:    lxvwsx v2, 0, r3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testSplat4Low:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    ld r3, 0(r3)
@@ -611,6 +729,11 @@ define dso_local <4 x i32> @testSplat4hi(<8 x i8>* nocapture readonly %ptr) loca
 ; CHECK-P9-NEXT:    lxvwsx v2, 0, r3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testSplat4hi:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    lxvwsx v2, 0, r3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testSplat4hi:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    ld r3, 0(r3)
@@ -644,6 +767,11 @@ define dso_local <2 x i64> @testSplat8(<8 x i8>* nocapture readonly %ptr) local_
 ; CHECK-P9-NEXT:    lxvdsx v2, 0, r3
 ; CHECK-P9-NEXT:    blr
 ;
+; CHECK-P9-BE-LABEL: testSplat8:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    lxvdsx v2, 0, r3
+; CHECK-P9-BE-NEXT:    blr
+;
 ; CHECK-NOVSX-LABEL: testSplat8:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    ld r3, 0(r3)

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
index 2e226a386549..a88c6201cf9a 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
@@ -482,18 +482,16 @@ define <2 x float> @fptrunc_v2f32_v2f64(<2 x double> %vf1) {
 ; P8-NEXT:    xsrsp f0, f0
 ; P8-NEXT:    xscvdpspn v2, f1
 ; P8-NEXT:    xscvdpspn v3, f0
-; P8-NEXT:    vmrghw v2, v2, v3
+; P8-NEXT:    vmrgow v2, v2, v3
 ; P8-NEXT:    blr
 ;
 ; P9-LABEL: fptrunc_v2f32_v2f64:
 ; P9:       # %bb.0:
 ; P9-NEXT:    xsrsp f0, v2
-; P9-NEXT:    xscvdpspn vs0, f0
-; P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; P9-NEXT:    xscvdpspn v3, f0
 ; P9-NEXT:    xxswapd vs0, v2
 ; P9-NEXT:    xsrsp f0, f0
-; P9-NEXT:    xscvdpspn vs0, f0
-; P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; P9-NEXT:    xscvdpspn v2, f0
 ; P9-NEXT:    vmrghw v2, v3, v2
 ; P9-NEXT:    blr
   %res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(

diff  --git a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
index a03ab5f9519e..42b449f22776 100644
--- a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
+++ b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
@@ -15,8 +15,7 @@ define <16 x i8> @test(i32* %s, i32* %t) {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lfiwzx f0, 0, r3
-; CHECK-NEXT:    xxsldwi vs0, f0, f0, 1
-; CHECK-NEXT:    xxspltw v2, vs0, 0
+; CHECK-NEXT:    xxspltw v2, vs0, 1
 ; CHECK-NEXT:    blr
 entry:
   %0 = bitcast i32* %s to <4 x i8>*

diff  --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
index 1fe63271add4..6a251e076005 100644
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:       -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr8 -relocation-model=pic \
 ; RUN:       | FileCheck %s
@@ -16,505 +17,747 @@
 
 ; Function Attrs: norecurse nounwind readnone
 define <16 x i8> @buildc(i8 zeroext %a) {
+; CHECK-LABEL: buildc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mtvsrwz v2, r3
+; CHECK-NEXT:    vspltb v2, v2, 7
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: buildc:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtvsrd v2, r3
+; CHECK-LE-NEXT:    vspltb v2, v2, 7
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: buildc:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mtvsrwz 34, 3
+; CHECK-AIX-NEXT:    vspltb 2, 2, 7
+; CHECK-AIX-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <16 x i8> undef, i8 %a, i32 0
   %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
   ret <16 x i8> %splat.splat
-; CHECK-LABEL: buildc
-; CHECK: sldi r3, r3, 56
-; CHECK: mtvsrd v2, r3
 
-; CHECK-LE-LABEL: buildc
-; CHECK-LE: mtvsrd v2, r3
-; CHECK-LE: vspltb v2, v2, 7
 
-; CHECK-AIX-LABEL: buildc:
-; CHECK-AIX: mtvsrd 34, 3
-; CHECK-AIX: vspltb 2, 2, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <8 x i16> @builds(i16 zeroext %a) {
+; CHECK-LABEL: builds:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mtvsrwz v2, r3
+; CHECK-NEXT:    vsplth v2, v2, 3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: builds:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtvsrd v2, r3
+; CHECK-LE-NEXT:    vsplth v2, v2, 3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: builds:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mtvsrwz 34, 3
+; CHECK-AIX-NEXT:    vsplth 2, 2, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <8 x i16> undef, i16 %a, i32 0
   %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
   ret <8 x i16> %splat.splat
-; CHECK-LABEL: builds
-; CHECK: sldi r3, r3, 48
-; CHECK: mtvsrd v2, r3
 
-; CHECK-LE-LABEL: builds
-; CHECK-LE: mtvsrd v2, r3
-; CHECK-LE: vsplth v2, v2, 3
 
-; CHECK-AIX-LABEL: builds:
-; CHECK-AIX: mtvsrd 34, 3
-; CHECK-AIX: vsplth 2, 2, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x i32> @buildi(i32 zeroext %a) {
+; CHECK-LABEL: buildi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    xxspltw v2, vs0, 1
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: buildi:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtfprwz f0, r3
+; CHECK-LE-NEXT:    xxspltw v2, vs0, 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: buildi:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mtfprwz 0, 3
+; CHECK-AIX-NEXT:    xxspltw 34, 0, 1
+; CHECK-AIX-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %a, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; CHECK-LABEL: buildi
-; CHECK: mtfprwz f0, r3
-; CHECK: xxspltw v2, vs0, 1
 
-; CHECK-LE-LABEL: buildi
-; CHECK-LE: mtfprwz f0, r3
-; CHECK-LE: xxspltw v2, vs0, 1
 
-; CHECK-AIX-LABEL: buildi:
-; CHECK-AIX: mtfprwz 0, 3
-; CHECK-AIX: xxspltw 34, 0, 1
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <2 x i64> @buildl(i64 %a) {
+; CHECK-LABEL: buildl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    xxspltd v2, vs0, 0
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: buildl:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtfprd f0, r3
+; CHECK-LE-NEXT:    xxspltd v2, vs0, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: buildl:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mtfprd 0, 3
+; CHECK-AIX-NEXT:    xxmrghd 34, 0, 0
+; CHECK-AIX-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %a, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
-; CHECK-LABEL: buildl
-; CHECK: mtfprd f0, r3
 
-; CHECK-LE-LABEL: buildl
-; CHECK-LE: mtfprd f0, r3
-; CHECK-LE: xxspltd v2, vs0, 0
 
-; CHECK-AIX-LABEL: buildl:
-; CHECK-AIX: mtfprd 0, 3
-; CHECK-AIX: xxmrghd 34, 0, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define <4 x float> @buildf(float %a) {
+; CHECK-LABEL: buildf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xscvdpspn vs0, f1
+; CHECK-NEXT:    xxspltw v2, vs0, 0
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: buildf:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xscvdpspn vs0, f1
+; CHECK-LE-NEXT:    xxspltw v2, vs0, 0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: buildf:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xscvdpspn 0, 1
+; CHECK-AIX-NEXT:    xxspltw 34, 0, 0
+; CHECK-AIX-NEXT:    blr
 entry:
   %splat.splatinsert = insertelement <4 x float> undef, float %a, i32 0
   %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
   ret <4 x float> %splat.splat
-; CHECK-LABEL: buildf
-; CHECK: xscvdpspn vs0, f1
-; CHECK: xxspltw v2, vs0, 0
 
-; CHECK-LE-LABEL: buildf
-; CHECK-LE: xscvdpspn vs0, f1
-; CHECK-LE: xxspltw v2, vs0, 0
 
-; CHECK-AIX-LABEL: buildf:
-; CHECK-AIX: xscvdpspn 0, 1
-; CHECK-AIX: xxspltw 34, 0, 0
 }
 
 ; The optimization to remove stack operations from PPCDAGToDAGISel::Select
 ; should still trigger for v2f64, producing an lxvdsx.
 ; Function Attrs: norecurse nounwind readonly
 define <2 x double> @buildd() {
+; CHECK-LABEL: buildd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT:    lxvdsx v2, 0, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: buildd:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-LE-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-LE-NEXT:    lxvdsx v2, 0, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: buildd:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    ld 3, L..C0(2)
+; CHECK-AIX-NEXT:    lxvdsx 34, 0, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %0 = load double, double* @d, align 8
   %splat.splatinsert = insertelement <2 x double> undef, double %0, i32 0
   %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
   ret <2 x double> %splat.splat
-; CHECK-LABEL: buildd
-; CHECK: ld r3, .LC0 at toc@l(r3)
-; CHECK: lxvdsx v2, 0, r3
 
-; CHECK-LE-LABEL: buildd
-; CHECK-LE: ld r3, .LC0 at toc@l(r3)
-; CHECK-LE: lxvdsx v2, 0, r3
 
-; CHECK-AIX-LABEL: buildd:
-; CHECK-AIX: ld 3, L..C0(2)
-; CHECK-AIX: lxvdsx 34, 0, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc0(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 8, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc0:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 0
   ret i8 %vecext
-; CHECK-LABEL: @getsc0
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 8, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc0
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: clrldi r3, r3, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc0:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 8, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc1(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 16, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc1:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 16, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 1
   ret i8 %vecext
-; CHECK-LABEL: @getsc1
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 16, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc1
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 56, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc1:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 16, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc2(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 24, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 48, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc2:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 24, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 2
   ret i8 %vecext
-; CHECK-LABEL: @getsc2
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 24, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc2
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 48, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc2:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 24, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc3(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 32, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc3:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 40, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc3:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 32, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 3
   ret i8 %vecext
-; CHECK-LABEL: @getsc3
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 32, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc3
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 40, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc3:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 32, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc4(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 40, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc4:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 32, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc4:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 40, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 4
   ret i8 %vecext
-; CHECK-LABEL: @getsc4
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 40, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc4
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 32, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc4:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 40, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc5(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 48, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc5:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 24, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc5:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 48, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 5
   ret i8 %vecext
-; CHECK-LABEL: @getsc5
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 48, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc5
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 24, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc5:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 48, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc6(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc6:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 16, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc6:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 56, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 6
   ret i8 %vecext
-; CHECK-LABEL: @getsc6
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 56, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc6
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 16, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc6:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 56, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc7(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc7:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 8, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc7:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 7
   ret i8 %vecext
-; CHECK-LABEL: @getsc7
-; CHECK: mfvsrd r3, v2
-; CHECK: clrldi r3, r3, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc7
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 8, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc7:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: clrldi 3, 3, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc8(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 8, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc8:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc8:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 8
   ret i8 %vecext
-; CHECK-LABEL: @getsc8
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 8, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc8
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: clrldi r3, r3, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc8:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 8, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc9(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc9:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 16, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc9:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc9:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 16, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 9
   ret i8 %vecext
-; CHECK-LABEL: @getsc9
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 16, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc9
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 56, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc9:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 16, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc10(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc10:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 24, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc10:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 48, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc10:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 24, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 10
   ret i8 %vecext
-; CHECK-LABEL: @getsc10
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 24, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc10
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 48, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc10:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 24, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc11(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc11:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 32, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc11:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 40, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc11:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 32, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 11
   ret i8 %vecext
-; CHECK-LABEL: @getsc11
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 32, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc11
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 40, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc11:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 32, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc12(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc12:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 40, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc12:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 32, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc12:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 40, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 12
   ret i8 %vecext
-; CHECK-LABEL: @getsc12
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 40, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc12
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 32, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc12:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 40, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc13(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc13:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 48, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc13:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 24, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc13:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 48, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 13
   ret i8 %vecext
-; CHECK-LABEL: @getsc13
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 48, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc13
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 24, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc13:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 48, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc14(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc14:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc14:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 16, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc14:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 56, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 14
   ret i8 %vecext
-; CHECK-LABEL: @getsc14
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 56, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc14
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 16, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc14:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 56, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getsc15(<16 x i8> %vsc) {
+; CHECK-LABEL: getsc15:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsc15:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 8, 56
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsc15:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 15
   ret i8 %vecext
-; CHECK-LABEL: @getsc15
-; CHECK: mffprd r3, f0
-; CHECK: clrldi  r3, r3, 56
-; CHECK: extsb r3, r3
 
-; CHECK-LE-LABEL: @getsc15
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 8, 56
-; CHECK-LE: extsb r3, r3
 
-; CHECK-AIX-LABEL: getsc15:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: clrldi 3, 3, 56
-; CHECK-AIX: extsb 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc0(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 8, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc0:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 0
   ret i8 %vecext
-; CHECK-LABEL: @getuc0
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 8, 56
 
-; CHECK-LE-LABEL: @getuc0
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: clrldi r3, r3, 56
 
-; CHECK-AIX-LABEL: getuc0:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 8, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc1(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 16, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc1:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 16, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 1
   ret i8 %vecext
-; CHECK-LABEL: @getuc1
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 16, 56
 
-; CHECK-LE-LABEL: @getuc1
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 56, 56
 
-; CHECK-AIX-LABEL: getuc1:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 16, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc2(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 24, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 48, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc2:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 24, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 2
   ret i8 %vecext
-; CHECK-LABEL: @getuc2
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 24, 56
 
-; CHECK-LE-LABEL: @getuc2
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 48, 56
 
-; CHECK-AIX-LABEL: getuc2:
 ; CHECK-AIX mfvsrd 3, 34
 ; CHECK-AIX rldicl 3, 3, 24, 56
 ; CHECK-AIX clrldi 3, 3, 56
@@ -522,273 +765,445 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc3(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 32, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc3:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 40, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc3:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 32, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 3
   ret i8 %vecext
-; CHECK-LABEL: @getuc3
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 32, 56
 
-; CHECK-LE-LABEL: @getuc3
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 40, 56
 
-; CHECK-AIX-LABEL: getuc3:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 32, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc4(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 40, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc4:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 32, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc4:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 40, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 4
   ret i8 %vecext
-; CHECK-LABEL: @getuc4
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 40, 56
 
-; CHECK-LE-LABEL: @getuc4
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 32, 56
 
-; CHECK-AIX-LABEL: getuc4:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 40, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc5(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 48, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc5:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 24, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc5:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 48, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 5
   ret i8 %vecext
-; CHECK-LABEL: @getuc5
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 48, 56
 
-; CHECK-LE-LABEL: @getuc5
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 24, 56
 
-; CHECK-AIX-LABEL: getuc5:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 48, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc6(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc6:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 16, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc6:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 56, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 6
   ret i8 %vecext
-; CHECK-LABEL: @getuc6
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 56, 56
 
-; CHECK-LE-LABEL: @getuc6
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 16, 56
 
-; CHECK-AIX-LABEL: getuc6:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 56, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc7(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc7:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 8, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc7:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 7
   ret i8 %vecext
-; CHECK-LABEL: @getuc7
-; CHECK: mfvsrd r3, v2
-; CHECK: clrldi   r3, r3, 56
 
-; CHECK-LE-LABEL: @getuc7
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 8, 56
 
-; CHECK-AIX-LABEL: getuc7:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: clrldi 3, 3, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc8(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 8, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc8:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc8:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 8
   ret i8 %vecext
-; CHECK-LABEL: @getuc8
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 8, 56
 
-; CHECK-LE-LABEL: @getuc8
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: clrldi r3, r3, 56
 
-; CHECK-AIX-LABEL: getuc8:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 8, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc9(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc9:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 16, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc9:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc9:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 16, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 9
   ret i8 %vecext
-; CHECK-LABEL: @getuc9
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 16, 56
 
-; CHECK-LE-LABEL: @getuc9
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 56, 56
 
-; CHECK-AIX-LABEL: getuc9:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 16, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc10(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc10:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 24, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc10:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 48, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc10:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 24, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 10
   ret i8 %vecext
-; CHECK-LABEL: @getuc10
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 24, 56
 
-; CHECK-LE-LABEL: @getuc10
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 48, 56
 
-; CHECK-AIX-LABEL: getuc10:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 24, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc11(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc11:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 32, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc11:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 40, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc11:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 32, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 11
   ret i8 %vecext
-; CHECK-LABEL: @getuc11
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 32, 56
 
-; CHECK-LE-LABEL: @getuc11
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 40, 56
 
-; CHECK-AIX-LABEL: getuc11:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 32, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc12(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc12:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 40, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc12:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 32, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc12:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 40, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 12
   ret i8 %vecext
-; CHECK-LABEL: @getuc12
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 40, 56
 
-; CHECK-LE-LABEL: @getuc12
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 32, 56
 
-; CHECK-AIX-LABEL: getuc12:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 40, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc13(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc13:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 48, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc13:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 24, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc13:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 48, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 13
   ret i8 %vecext
-; CHECK-LABEL: @getuc13
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 48, 56
 
-; CHECK-LE-LABEL: @getuc13
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 24, 56
 
-; CHECK-AIX-LABEL: getuc13:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 48, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc14(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc14:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc14:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 16, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc14:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 56, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 14
   ret i8 %vecext
-; CHECK-LABEL: @getuc14
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 56, 56
 
-; CHECK-LE-LABEL: @getuc14
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 16, 56
 
-; CHECK-AIX-LABEL: getuc14
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 56, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getuc15(<16 x i8> %vuc) {
+; CHECK-LABEL: getuc15:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getuc15:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 8, 56
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getuc15:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 15
   ret i8 %vecext
-; CHECK-LABEL: @getuc15
-; CHECK: mffprd r3, f0
-; CHECK: clrldi   r3, r3, 56
 
-; CHECK-LE-LABEL: @getuc15
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 8, 56
 
-; CHECK-AIX-LABEL: getuc15:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: clrldi 3, 3, 56
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) {
-; CHECK-LABEL: @getvelsc
-; CHECK: andi. r4, r5, 8
-; CHECK: li r3, 7
-; CHECK: lvsl v3, 0, r4
-; CHECK: andc r3, r3, r5
-; CHECK: sldi r3, r3, 3
-; CHECK: vperm v2, v2, v2, v3
-; CHECK: mfvsrd r4, v2
-; CHECK: srd r3, r4, r3
-; CHECK: extsb r3, r3
-
-; CHECK-LE-LABEL: @getvelsc
-; CHECK-LE: li r3, 8
-; CHECK-LE: andc r3, r3, r5
-; CHECK-LE: lvsl v3, 0, r3
-; CHECK-LE: li r3, 7
-; CHECK-LE: and r3, r3, r5
-; CHECK-LE: vperm v2, v2, v2, v3
-; CHECK-LE: sldi r3, r3, 3
-; CHECK-LE: mfvsrd r4, v2
-; CHECK-LE: srd r3, r4, r3
-; CHECK-LE: extsb r3, r3
-
+; CHECK-LABEL: getvelsc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andi. r4, r5, 8
+; CHECK-NEXT:    li r3, 7
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    andc r3, r3, r5
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    vperm v2, v2, v2, v3
+; CHECK-NEXT:    mfvsrd r4, v2
+; CHECK-NEXT:    srd r3, r4, r3
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getvelsc:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 8
+; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    lvsl v3, 0, r3
+; CHECK-LE-NEXT:    li r3, 7
+; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    sldi r3, r3, 3
+; CHECK-LE-NEXT:    mfvsrd r4, v2
+; CHECK-LE-NEXT:    srd r3, r4, r3
+; CHECK-LE-NEXT:    extsb r3, r3
+; CHECK-LE-NEXT:    blr
+;
 ; CHECK-AIX-LABEL: getvelsc:
-; CHECK-AIX: andi. 5, 3, 8
-; CHECK-AIX: li 4, 7
-; CHECK-AIX: lvsl 3, 0, 5
-; CHECK-AIX: andc 3, 4, 3
-; CHECK-AIX: sldi 3, 3, 3
-; CHECK-AIX: vperm 2, 2, 2, 3
-; CHECK-AIX: mfvsrd 4, 34
-; CHECK-AIX: srd 3, 4, 3
-; CHECK-AIX: extsb 3, 3
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    andi. 5, 3, 8
+; CHECK-AIX-NEXT:    li 4, 7
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    andc 3, 4, 3
+; CHECK-AIX-NEXT:    sldi 3, 3, 3
+; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
+; CHECK-AIX-NEXT:    mfvsrd 4, 34
+; CHECK-AIX-NEXT:    srd 3, 4, 3
+; CHECK-AIX-NEXT:    extsb 3, 3
+; CHECK-AIX-NEXT:    blr
+
+
 entry:
   %vecext = extractelement <16 x i8> %vsc, i32 %i
   ret i8 %vecext
@@ -796,39 +1211,47 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) {
-; CHECK-LABEL: @getveluc
-; CHECK: andi. r4, r5, 8
-; CHECK: li r3, 7
-; CHECK: lvsl v3, 0, r4
-; CHECK: andc r3, r3, r5
-; CHECK: sldi r3, r3, 3
-; CHECK: vperm v2, v2, v2, v3
-; CHECK: mfvsrd r4, v2
-; CHECK: srd r3, r4, r3
-; CHECK: clrldi  r3, r3, 5
-
-; CHECK-LE-LABEL: @getveluc
-; CHECK-LE: li r3, 8
-; CHECK-LE: andc r3, r3, r5
-; CHECK-LE: lvsl v3, 0, r3
-; CHECK-LE: li r3, 7
-; CHECK-LE: and r3, r3, r5
-; CHECK-LE: vperm v2, v2, v2, v3
-; CHECK-LE: sldi r3, r3, 3
-; CHECK-LE: mfvsrd r4, v2
-; CHECK-LE: srd r3, r4, r3
-; CHECK-LE: clrldi r3, r3, 56
-
+; CHECK-LABEL: getveluc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andi. r4, r5, 8
+; CHECK-NEXT:    li r3, 7
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    andc r3, r3, r5
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    vperm v2, v2, v2, v3
+; CHECK-NEXT:    mfvsrd r4, v2
+; CHECK-NEXT:    srd r3, r4, r3
+; CHECK-NEXT:    clrldi r3, r3, 56
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getveluc:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 8
+; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    lvsl v3, 0, r3
+; CHECK-LE-NEXT:    li r3, 7
+; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    sldi r3, r3, 3
+; CHECK-LE-NEXT:    mfvsrd r4, v2
+; CHECK-LE-NEXT:    srd r3, r4, r3
+; CHECK-LE-NEXT:    clrldi r3, r3, 56
+; CHECK-LE-NEXT:    blr
+;
 ; CHECK-AIX-LABEL: getveluc:
-; CHECK-AIX: andi. 5, 3, 8
-; CHECK-AIX: li 4, 7
-; CHECK-AIX: lvsl 3, 0, 5
-; CHECK-AIX: andc 3, 4, 3
-; CHECK-AIX: sldi 3, 3, 3
-; CHECK-AIX: vperm 2, 2, 2, 3
-; CHECK-AIX: mfvsrd 4, 34
-; CHECK-AIX: srd 3, 4, 3
-; CHECK-AIX: clrldi 3, 3, 56
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    andi. 5, 3, 8
+; CHECK-AIX-NEXT:    li 4, 7
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    andc 3, 4, 3
+; CHECK-AIX-NEXT:    sldi 3, 3, 3
+; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
+; CHECK-AIX-NEXT:    mfvsrd 4, 34
+; CHECK-AIX-NEXT:    srd 3, 4, 3
+; CHECK-AIX-NEXT:    clrldi 3, 3, 56
+; CHECK-AIX-NEXT:    blr
+
+
 entry:
   %vecext = extractelement <16 x i8> %vuc, i32 %i
   ret i8 %vecext
@@ -836,354 +1259,538 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i16 @getss0(<8 x i16> %vss) {
+; CHECK-LABEL: getss0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 16, 48
+; CHECK-NEXT:    extsh r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getss0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    extsh r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getss0:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 16, 48
+; CHECK-AIX-NEXT:    extsh 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vss, i32 0
   ret i16 %vecext
-; CHECK-LABEL: @getss0
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 16, 48
-; CHECK: extsh r3, r3
 
-; CHECK-LE-LABEL: @getss0
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: clrldi r3, r3, 48
-; CHECK-LE: extsh r3, r3
 
-; CHECK-AIX-LABEL: getss0:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 16, 48
-; CHECK-AIX: extsh 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i16 @getss1(<8 x i16> %vss) {
+; CHECK-LABEL: getss1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 32, 48
+; CHECK-NEXT:    extsh r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getss1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-LE-NEXT:    extsh r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getss1:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 32, 48
+; CHECK-AIX-NEXT:    extsh 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vss, i32 1
   ret i16 %vecext
-; CHECK-LABEL: @getss1
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 32, 48
-; CHECK: extsh r3, r3
 
-; CHECK-LE-LABEL: @getss1
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 48, 48
-; CHECK-LE: extsh r3, r3
 
-; CHECK-AIX-LABEL: getss1:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 32, 48
-; CHECK-AIX: extsh 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i16 @getss2(<8 x i16> %vss) {
+; CHECK-LABEL: getss2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-NEXT:    extsh r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getss2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 32, 48
+; CHECK-LE-NEXT:    extsh r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getss2:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 48, 48
+; CHECK-AIX-NEXT:    extsh 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vss, i32 2
   ret i16 %vecext
-; CHECK-LABEL: @getss2
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 48, 48
-; CHECK: extsh r3, r3
 
-; CHECK-LE-LABEL: @getss2
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 32, 48
-; CHECK-LE: extsh r3, r3
 
-; CHECK-AIX-LABEL: getss2:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 48, 48
-; CHECK-AIX: extsh 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i16 @getss3(<8 x i16> %vss) {
+; CHECK-LABEL: getss3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    extsh r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getss3:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 16, 48
+; CHECK-LE-NEXT:    extsh r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getss3:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    extsh 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vss, i32 3
   ret i16 %vecext
-; CHECK-LABEL: @getss3
-; CHECK: mfvsrd r3, v2
-; CHECK: clrldi r3, r3, 48
-; CHECK: extsh r3, r3
 
-; CHECK-LE-LABEL: @getss3
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 16, 48
-; CHECK-LE: extsh r3, r3
 
-; CHECK-AIX-LABEL: getss3:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: clrldi 3, 3, 48
-; CHECK-AIX: extsh 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i16 @getss4(<8 x i16> %vss) {
+; CHECK-LABEL: getss4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 16, 48
+; CHECK-NEXT:    extsh r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getss4:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    extsh r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getss4:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 16, 48
+; CHECK-AIX-NEXT:    extsh 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vss, i32 4
   ret i16 %vecext
-; CHECK-LABEL: @getss4
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 16, 48
-; CHECK: extsh r3, r3
 
-; CHECK-LE-LABEL: @getss4
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: clrldi r3, r3, 48
-; CHECK-LE: extsh r3, r3
 
-; CHECK-AIX-LABEL: getss4:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 16, 48
-; CHECK-AIX: extsh 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i16 @getss5(<8 x i16> %vss) {
+; CHECK-LABEL: getss5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 32, 48
+; CHECK-NEXT:    extsh r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getss5:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-LE-NEXT:    extsh r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getss5:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 32, 48
+; CHECK-AIX-NEXT:    extsh 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vss, i32 5
   ret i16 %vecext
-; CHECK-LABEL: @getss5
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 32, 48
-; CHECK: extsh r3, r3
 
-; CHECK-LE-LABEL: @getss5
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 48, 48
-; CHECK-LE: extsh r3, r3
 
-; CHECK-AIX-LABEL: getss5:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 32, 48
-; CHECK-AIX: extsh 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i16 @getss6(<8 x i16> %vss) {
+; CHECK-LABEL: getss6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-NEXT:    extsh r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getss6:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 32, 48
+; CHECK-LE-NEXT:    extsh r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getss6:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 48, 48
+; CHECK-AIX-NEXT:    extsh 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vss, i32 6
   ret i16 %vecext
-; CHECK-LABEL: @getss6
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 48, 48
-; CHECK: extsh r3, r3
 
-; CHECK-LE-LABEL: @getss6
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 32, 48
-; CHECK-LE: extsh r3, r3
 
-; CHECK-AIX-LABEL: getss6:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 48, 48
-; CHECK-AIX: extsh 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i16 @getss7(<8 x i16> %vss) {
+; CHECK-LABEL: getss7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    extsh r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getss7:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 16, 48
+; CHECK-LE-NEXT:    extsh r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getss7:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    extsh 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vss, i32 7
   ret i16 %vecext
-; CHECK-LABEL: @getss7
-; CHECK: mffprd r3, f0
-; CHECK: clrldi  r3, r3, 48
-; CHECK: extsh r3, r3
 
-; CHECK-LE-LABEL: @getss7
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 16, 48
-; CHECK-LE: extsh r3, r3
 
-; CHECK-AIX-LABEL: getss7:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: clrldi 3, 3, 48
-; CHECK-AIX: extsh 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i16 @getus0(<8 x i16> %vus) {
+; CHECK-LABEL: getus0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 16, 48
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getus0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getus0:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 16, 48
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vus, i32 0
   ret i16 %vecext
-; CHECK-LABEL: @getus0
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 16, 48
 
-; CHECK-LE-LABEL: @getus0
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: clrldi r3, r3, 48
 
-; CHECK-AIX-LABEL: getus0:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 16, 48
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i16 @getus1(<8 x i16> %vus) {
+; CHECK-LABEL: getus1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 32, 48
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getus1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getus1:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 32, 48
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vus, i32 1
   ret i16 %vecext
-; CHECK-LABEL: @getus1
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 32, 48
 
-; CHECK-LE-LABEL: @getus1
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 48, 48
 
-; CHECK-AIX-LABEL: getus1:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 32, 48
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i16 @getus2(<8 x i16> %vus) {
+; CHECK-LABEL: getus2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getus2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 32, 48
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getus2:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    rldicl 3, 3, 48, 48
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vus, i32 2
   ret i16 %vecext
-; CHECK-LABEL: @getus2
-; CHECK: mfvsrd r3, v2
-; CHECK: rldicl r3, r3, 48, 48
 
-; CHECK-LE-LABEL: @getus2
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 32, 48
 
-; CHECK-AIX-LABEL: getus2:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: rldicl 3, 3, 48, 48
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i16 @getus3(<8 x i16> %vus) {
+; CHECK-LABEL: getus3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getus3:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    rldicl r3, r3, 16, 48
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getus3:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vus, i32 3
   ret i16 %vecext
-; CHECK-LABEL: @getus3
-; CHECK: mfvsrd r3, v2
-; CHECK: clrldi   r3, r3, 48
 
-; CHECK-LE-LABEL: @getus3
-; CHECK-LE: mffprd r3, f0
-; CHECK-LE: rldicl r3, r3, 16, 48
 
-; CHECK-AIX-LABEL: getus3:
-; CHECK-AIX: mfvsrd 3, 34
-; CHECK-AIX: clrldi 3, 3, 48
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i16 @getus4(<8 x i16> %vus) {
+; CHECK-LABEL: getus4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 16, 48
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getus4:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getus4:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 16, 48
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vus, i32 4
   ret i16 %vecext
-; CHECK-LABEL: @getus4
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 16, 48
 
-; CHECK-LE-LABEL: @getus4
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: clrldi r3, r3, 48
 
-; CHECK-AIX-LABEL: getus4:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 16, 48
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i16 @getus5(<8 x i16> %vus) {
+; CHECK-LABEL: getus5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 32, 48
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getus5:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getus5:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 32, 48
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vus, i32 5
   ret i16 %vecext
-; CHECK-LABEL: @getus5
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 32, 48
 
-; CHECK-LE-LABEL: @getus5
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 48, 48
 
-; CHECK-AIX-LABEL: getus5:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 32, 48
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i16 @getus6(<8 x i16> %vus) {
+; CHECK-LABEL: getus6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    rldicl r3, r3, 48, 48
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getus6:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 32, 48
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getus6:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    rldicl 3, 3, 48, 48
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vus, i32 6
   ret i16 %vecext
-; CHECK-LABEL: @getus6
-; CHECK: mffprd r3, f0
-; CHECK: rldicl r3, r3, 48, 48
 
-; CHECK-LE-LABEL: @getus6
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 32, 48
 
-; CHECK-AIX-LABEL: getus6:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: rldicl 3, 3, 48, 48
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i16 @getus7(<8 x i16> %vus) {
+; CHECK-LABEL: getus7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getus7:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    rldicl r3, r3, 16, 48
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getus7:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %vus, i32 7
   ret i16 %vecext
-; CHECK-LABEL: @getus7
-; CHECK: mffprd r3, f0
-; CHECK: clrldi   r3, r3, 48
 
-; CHECK-LE-LABEL: @getus7
-; CHECK-LE: mfvsrd r3, v2
-; CHECK-LE: rldicl r3, r3, 16, 48
 
-; CHECK-AIX-LABEL: getus7:
-; CHECK-AIX: mffprd 3, 0
-; CHECK-AIX: clrldi 3, 3, 48
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) {
-; CHECK-LABEL: @getvelss
-; CHECK: andi. r4, r5, 4
-; CHECK: li r3, 3
-; CHECK: sldi r4, r4, 1
-; CHECK: andc r3, r3, r5
-; CHECK: lvsl v3, 0, r4
-; CHECK: sldi r3, r3, 4
-; CHECK: vperm v2, v2, v2, v3
-; CHECK: mfvsrd r4, v2
-; CHECK: srd r3, r4, r3
-; CHECK: extsh r3, r3
-
-; CHECK-LE-LABEL: @getvelss
-; CHECK-LE: li r3, 4
-; CHECK-LE: andc r3, r3, r5
-; CHECK-LE: sldi r3, r3, 1
-; CHECK-LE: lvsl v3, 0, r3
-; CHECK-LE: li r3, 3
-; CHECK-LE: and r3, r3, r5
-; CHECK-LE: vperm v2, v2, v2, v3
-; CHECK-LE: sldi r3, r3, 4
-; CHECK-LE: mfvsrd r4, v2
-; CHECK-LE: srd r3, r4, r3
-; CHECK-LE: extsh r3, r3
-
+; CHECK-LABEL: getvelss:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andi. r4, r5, 4
+; CHECK-NEXT:    li r3, 3
+; CHECK-NEXT:    sldi r4, r4, 1
+; CHECK-NEXT:    andc r3, r3, r5
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    sldi r3, r3, 4
+; CHECK-NEXT:    vperm v2, v2, v2, v3
+; CHECK-NEXT:    mfvsrd r4, v2
+; CHECK-NEXT:    srd r3, r4, r3
+; CHECK-NEXT:    extsh r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getvelss:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 4
+; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    sldi r3, r3, 1
+; CHECK-LE-NEXT:    lvsl v3, 0, r3
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    sldi r3, r3, 4
+; CHECK-LE-NEXT:    mfvsrd r4, v2
+; CHECK-LE-NEXT:    srd r3, r4, r3
+; CHECK-LE-NEXT:    extsh r3, r3
+; CHECK-LE-NEXT:    blr
+;
 ; CHECK-AIX-LABEL: getvelss:
-; CHECK-AIX: andi. 5, 3, 4
-; CHECK-AIX: li 4, 3
-; CHECK-AIX: sldi 5, 5, 1
-; CHECK-AIX: andc 3, 4, 3
-; CHECK-AIX: lvsl 3, 0, 5
-; CHECK-AIX: sldi 3, 3, 4
-; CHECK-AIX: vperm 2, 2, 2, 3
-; CHECK-AIX: mfvsrd 4, 34
-; CHECK-AIX: srd 3, 4, 3
-; CHECK-AIX: extsh 3, 3
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    andi. 5, 3, 4
+; CHECK-AIX-NEXT:    li 4, 3
+; CHECK-AIX-NEXT:    sldi 5, 5, 1
+; CHECK-AIX-NEXT:    andc 3, 4, 3
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    sldi 3, 3, 4
+; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
+; CHECK-AIX-NEXT:    mfvsrd 4, 34
+; CHECK-AIX-NEXT:    srd 3, 4, 3
+; CHECK-AIX-NEXT:    extsh 3, 3
+; CHECK-AIX-NEXT:    blr
+
+
 entry:
   %vecext = extractelement <8 x i16> %vss, i32 %i
   ret i16 %vecext
@@ -1191,42 +1798,50 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) {
-; CHECK-LABEL: @getvelus
-; CHECK: andi. r4, r5, 4
-; CHECK: li r3, 3
-; CHECK: sldi r4, r4, 1
-; CHECK: andc r3, r3, r5
-; CHECK: lvsl v3, 0, r4
-; CHECK: sldi r3, r3, 4
-; CHECK: vperm v2, v2, v2, v3
-; CHECK: mfvsrd r4, v2
-; CHECK: srd r3, r4, r3
-; CHECK: clrldi  r3, r3, 48
-
-; CHECK-LE-LABEL: @getvelus
-; CHECK-LE: li r3, 4
-; CHECK-LE: andc r3, r3, r5
-; CHECK-LE: sldi r3, r3, 1
-; CHECK-LE: lvsl v3, 0, r3
-; CHECK-LE: li r3, 3
-; CHECK-LE: and r3, r3, r5
-; CHECK-LE: vperm v2, v2, v2, v3
-; CHECK-LE: sldi r3, r3, 4
-; CHECK-LE: mfvsrd r4, v2
-; CHECK-LE: srd r3, r4, r3
-; CHECK-LE: clrldi r3, r3, 48
-
+; CHECK-LABEL: getvelus:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andi. r4, r5, 4
+; CHECK-NEXT:    li r3, 3
+; CHECK-NEXT:    sldi r4, r4, 1
+; CHECK-NEXT:    andc r3, r3, r5
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    sldi r3, r3, 4
+; CHECK-NEXT:    vperm v2, v2, v2, v3
+; CHECK-NEXT:    mfvsrd r4, v2
+; CHECK-NEXT:    srd r3, r4, r3
+; CHECK-NEXT:    clrldi r3, r3, 48
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getvelus:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 4
+; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    sldi r3, r3, 1
+; CHECK-LE-NEXT:    lvsl v3, 0, r3
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    sldi r3, r3, 4
+; CHECK-LE-NEXT:    mfvsrd r4, v2
+; CHECK-LE-NEXT:    srd r3, r4, r3
+; CHECK-LE-NEXT:    clrldi r3, r3, 48
+; CHECK-LE-NEXT:    blr
+;
 ; CHECK-AIX-LABEL: getvelus:
-; CHECK-AIX: andi. 5, 3, 4
-; CHECK-AIX: li 4, 3
-; CHECK-AIX: sldi 5, 5, 1
-; CHECK-AIX: andc 3, 4, 3
-; CHECK-AIX: lvsl 3, 0, 5
-; CHECK-AIX: sldi 3, 3, 4
-; CHECK-AIX: vperm 2, 2, 2, 3
-; CHECK-AIX: mfvsrd 4, 34
-; CHECK-AIX: srd 3, 4, 3
-; CHECK-AIX: clrldi 3, 3, 48
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    andi. 5, 3, 4
+; CHECK-AIX-NEXT:    li 4, 3
+; CHECK-AIX-NEXT:    sldi 5, 5, 1
+; CHECK-AIX-NEXT:    andc 3, 4, 3
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    sldi 3, 3, 4
+; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
+; CHECK-AIX-NEXT:    mfvsrd 4, 34
+; CHECK-AIX-NEXT:    srd 3, 4, 3
+; CHECK-AIX-NEXT:    clrldi 3, 3, 48
+; CHECK-AIX-NEXT:    blr
+
+
 entry:
   %vecext = extractelement <8 x i16> %vus, i32 %i
   ret i16 %vecext
@@ -1234,381 +1849,715 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i32 @getsi0(<4 x i32> %vsi) {
+; CHECK-LABEL: getsi0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsi0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprwz r3, f0
+; CHECK-LE-NEXT:    extsw r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsi0:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxsldwi 0, 34, 34, 3
+; CHECK-AIX-NEXT:    mffprwz 3, 0
+; CHECK-AIX-NEXT:    extsw 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %vsi, i32 0
   ret i32 %vecext
-; CHECK-LABEL: @getsi0
-; CHECK: xxsldwi vs0, v2, v2, 3
-; CHECK: mffprwz r3, f0
-; CHECK: extsw r3, r3
 
-; CHECK-LE-LABEL: @getsi0
-; CHECK-LE: xxswapd vs0, v2
-; CHECK-LE: mffprwz r3, f0
-; CHECK-LE: extsw r3, r3
 
-; CHECK-AIX-LABEL: getsi0:
-; CHECK-AIX: xxsldwi 0, 34, 34, 3
-; CHECK-AIX: mffprwz 3, 0
-; CHECK-AIX: extsw 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i32 @getsi1(<4 x i32> %vsi) {
+; CHECK-LABEL: getsi1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrwz r3, v2
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsi1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-LE-NEXT:    mffprwz r3, f0
+; CHECK-LE-NEXT:    extsw r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsi1:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrwz 3, 34
+; CHECK-AIX-NEXT:    extsw 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %vsi, i32 1
   ret i32 %vecext
-; CHECK-LABEL: @getsi1
-; CHECK: mfvsrwz r3, v2
-; CHECK: extsw r3, r3
 
-; CHECK-LE-LABEL: @getsi1
-; CHECK-LE: xxsldwi vs0, v2, v2, 1
-; CHECK-LE: mffprwz r3, f0
-; CHECK-LE: extsw r3, r3
 
-; CHECK-AIX-LABEL: getsi1:
-; CHECK-AIX: mfvsrwz 3, 34
-; CHECK-AIX: extsw 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i32 @getsi2(<4 x i32> %vsi) {
+; CHECK-LABEL: getsi2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsi2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrwz r3, v2
+; CHECK-LE-NEXT:    extsw r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsi2:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxsldwi 0, 34, 34, 1
+; CHECK-AIX-NEXT:    mffprwz 3, 0
+; CHECK-AIX-NEXT:    extsw 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %vsi, i32 2
   ret i32 %vecext
-; CHECK-LABEL: @getsi2
-; CHECK: xxsldwi vs0, v2, v2, 1
-; CHECK: mffprwz r3, f0
-; CHECK: extsw r3, r3
 
-; CHECK-LE-LABEL: @getsi2
-; CHECK-LE: mfvsrwz r3, v2
-; CHECK-LE: extsw r3, r3
 
-; CHECK-AIX-LABEL: getsi2:
-; CHECK-AIX: xxsldwi 0, 34, 34, 1
-; CHECK-AIX: mffprwz 3, 0
-; CHECK-AIX: extsw 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i32 @getsi3(<4 x i32> %vsi) {
+; CHECK-LABEL: getsi3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsi3:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-LE-NEXT:    mffprwz r3, f0
+; CHECK-LE-NEXT:    extsw r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsi3:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprwz 3, 0
+; CHECK-AIX-NEXT:    extsw 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %vsi, i32 3
   ret i32 %vecext
-; CHECK-LABEL: @getsi3
-; CHECK: xxswapd vs0, v2
-; CHECK: mffprwz r3, f0
-; CHECK: extsw r3, r3
 
-; CHECK-LE-LABEL: @getsi3
-; CHECK-LE: xxsldwi vs0, v2, v2, 3
-; CHECK-LE: mffprwz r3, f0
-; CHECK-LE: extsw r3, r3
 
-; CHECK-AIX-LABEL: getsi3:
-; CHECK-AIX: xxswapd 0, 34
-; CHECK-AIX: mffprwz 3, 0
-; CHECK-AIX: extsw 3, 3
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i32 @getui0(<4 x i32> %vui) {
+; CHECK-LABEL: getui0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getui0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprwz r3, f0
+; CHECK-LE-NEXT:    clrldi r3, r3, 32
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getui0:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxsldwi 0, 34, 34, 3
+; CHECK-AIX-NEXT:    mffprwz 3, 0
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %vui, i32 0
   ret i32 %vecext
-; CHECK-LABEL: @getui0
-; CHECK: xxsldwi vs0, v2, v2, 3
-; CHECK: mffprwz r3, f0
 
-; CHECK-LE-LABEL: @getui0
-; CHECK-LE: xxswapd vs0, v2
-; CHECK-LE: mffprwz r3, f0
 
-; CHECK-AIX-LABEL: getui0:
-; CHECK-AIX: xxsldwi 0, 34, 34, 3
-; CHECK-AIX: mffprwz 3, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i32 @getui1(<4 x i32> %vui) {
+; CHECK-LABEL: getui1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrwz r3, v2
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getui1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-LE-NEXT:    mffprwz r3, f0
+; CHECK-LE-NEXT:    clrldi r3, r3, 32
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getui1:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrwz 3, 34
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %vui, i32 1
   ret i32 %vecext
-; CHECK-LABEL: @getui1
-; CHECK: mfvsrwz r3, v2
 
-; CHECK-LE-LABEL: @getui1
-; CHECK-LE: xxsldwi vs0, v2, v2, 1
-; CHECK-LE: mffprwz r3, f0
 
-; CHECK-AIX-LABEL: getui1:
-; CHECK-AIX: mfvsrwz 3, 34
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i32 @getui2(<4 x i32> %vui) {
+; CHECK-LABEL: getui2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getui2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrwz r3, v2
+; CHECK-LE-NEXT:    clrldi r3, r3, 32
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getui2:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxsldwi 0, 34, 34, 1
+; CHECK-AIX-NEXT:    mffprwz 3, 0
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %vui, i32 2
   ret i32 %vecext
-; CHECK-LABEL: @getui2
-; CHECK: xxsldwi vs0, v2, v2, 1
-; CHECK: mffprwz r3, f0
 
-; CHECK-LE-LABEL: @getui2
-; CHECK-LE: mfvsrwz r3, v2
 
-; CHECK-AIX-LABEL: getui2:
-; CHECK-AIX: xxsldwi 0, 34, 34, 1
-; CHECK-AIX: mffprwz 3, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i32 @getui3(<4 x i32> %vui) {
+; CHECK-LABEL: getui3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getui3:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-LE-NEXT:    mffprwz r3, f0
+; CHECK-LE-NEXT:    clrldi r3, r3, 32
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getui3:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprwz 3, 0
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %vui, i32 3
   ret i32 %vecext
-; CHECK-LABEL: @getui3
-; CHECK: xxswapd vs0, v2
-; CHECK: mffprwz r3, f0
 
-; CHECK-LE-LABEL: @getui3
-; CHECK-LE: xxsldwi vs0, v2, v2, 3
-; CHECK-LE: mffprwz r3, f0
 
-; CHECK-AIX-LABEL: getui3:
-; CHECK-AIX: xxswapd 0, 34
-; CHECK-AIX: mffprwz 3, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) {
+; CHECK-LABEL: getvelsi:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andi. r4, r5, 2
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    sldi r4, r4, 2
+; CHECK-NEXT:    andc r3, r3, r5
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    sldi r3, r3, 5
+; CHECK-NEXT:    vperm v2, v2, v2, v3
+; CHECK-NEXT:    mfvsrd r4, v2
+; CHECK-NEXT:    srd r3, r4, r3
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getvelsi:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 2
+; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    sldi r3, r3, 2
+; CHECK-LE-NEXT:    lvsl v3, 0, r3
+; CHECK-LE-NEXT:    li r3, 1
+; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    sldi r3, r3, 5
+; CHECK-LE-NEXT:    mfvsrd r4, v2
+; CHECK-LE-NEXT:    srd r3, r4, r3
+; CHECK-LE-NEXT:    extsw r3, r3
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getvelsi:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    andi. 5, 3, 2
+; CHECK-AIX-NEXT:    li 4, 1
+; CHECK-AIX-NEXT:    sldi 5, 5, 2
+; CHECK-AIX-NEXT:    andc 3, 4, 3
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    sldi 3, 3, 5
+; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
+; CHECK-AIX-NEXT:    mfvsrd 4, 34
+; CHECK-AIX-NEXT:    srd 3, 4, 3
+; CHECK-AIX-NEXT:    extsw 3, 3
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %vsi, i32 %i
   ret i32 %vecext
-; CHECK-LABEL: @getvelsi
-; CHECK-LE-LABEL: @getvelsi
-; CHECK-AIX-LABEL: getvelsi
 ; FIXME: add check patterns when variable element extraction is implemented
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) {
+; CHECK-LABEL: getvelui:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andi. r4, r5, 2
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    sldi r4, r4, 2
+; CHECK-NEXT:    andc r3, r3, r5
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    sldi r3, r3, 5
+; CHECK-NEXT:    vperm v2, v2, v2, v3
+; CHECK-NEXT:    mfvsrd r4, v2
+; CHECK-NEXT:    srd r3, r4, r3
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getvelui:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 2
+; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    sldi r3, r3, 2
+; CHECK-LE-NEXT:    lvsl v3, 0, r3
+; CHECK-LE-NEXT:    li r3, 1
+; CHECK-LE-NEXT:    and r3, r3, r5
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    sldi r3, r3, 5
+; CHECK-LE-NEXT:    mfvsrd r4, v2
+; CHECK-LE-NEXT:    srd r3, r4, r3
+; CHECK-LE-NEXT:    clrldi r3, r3, 32
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getvelui:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    andi. 5, 3, 2
+; CHECK-AIX-NEXT:    li 4, 1
+; CHECK-AIX-NEXT:    sldi 5, 5, 2
+; CHECK-AIX-NEXT:    andc 3, 4, 3
+; CHECK-AIX-NEXT:    lvsl 3, 0, 5
+; CHECK-AIX-NEXT:    sldi 3, 3, 5
+; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
+; CHECK-AIX-NEXT:    mfvsrd 4, 34
+; CHECK-AIX-NEXT:    srd 3, 4, 3
+; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %vui, i32 %i
   ret i32 %vecext
-; CHECK-LABEL: @getvelui
-; CHECK-LE-LABEL: @getvelui
-; CHECK-AIX-LABEL: getvelui
 ; FIXME: add check patterns when variable element extraction is implemented
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define i64 @getsl0(<2 x i64> %vsl) {
+; CHECK-LABEL: getsl0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsl0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsl0:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %vsl, i32 0
   ret i64 %vecext
-; CHECK-LABEL: @getsl0
-; CHECK: mfvsrd r3, v2
 
-; CHECK-LE-LABEL: @getsl0
-; CHECK-LE: xxswapd vs0, v2
-; CHECK-LE: mffprd r3, f0
 
-; CHECK-AIX-LABEL: getsl0:
-; CHECK-AIX: mfvsrd 3, 34
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define i64 @getsl1(<2 x i64> %vsl) {
+; CHECK-LABEL: getsl1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getsl1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getsl1:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %vsl, i32 1
   ret i64 %vecext
-; CHECK-LABEL: @getsl1
-; CHECK: xxswapd vs0, v2
-; CHECK: mffprd r3, f0
 
-; CHECK-LE-LABEL: @getsl1
-; CHECK-LE: mfvsrd r3, v2
 
-; CHECK-AIX-LABEL: getsl1:
-; CHECK-AIX: xxswapd 0, 34
-; CHECK-AIX: mffprd 3, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define i64 @getul0(<2 x i64> %vul) {
+; CHECK-LABEL: getul0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getul0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    mffprd r3, f0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getul0:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %vul, i32 0
   ret i64 %vecext
-; CHECK-LABEL: @getul0
-; CHECK: mfvsrd r3, v2
 
-; CHECK-LE-LABEL: @getul0
-; CHECK-LE: xxswapd  vs0, v2
-; CHECK-LE: mffprd r3, f0
 
-; CHECK-AIX-LABEL: getul0:
-; CHECK-AIX: mfvsrd 3, 34
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define i64 @getul1(<2 x i64> %vul) {
+; CHECK-LABEL: getul1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    mffprd r3, f0
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getul1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getul1:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    mffprd 3, 0
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %vul, i32 1
   ret i64 %vecext
-; CHECK-LABEL: @getul1
-; CHECK: xxswapd vs0, v2
-; CHECK: mffprd r3, f0
 
-; CHECK-LE-LABEL: @getul1
-; CHECK-LE: mfvsrd r3, v2
 
-; CHECK-AIX-LABEL: getul1:
-; CHECK-AIX: xxswapd 0, 34
-; CHECK-AIX: mffprd 3, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
+; CHECK-LABEL: getvelsl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andi. r3, r5, 1
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    lvsl v3, 0, r3
+; CHECK-NEXT:    vperm v2, v2, v2, v3
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getvelsl:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 1
+; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    sldi r3, r3, 3
+; CHECK-LE-NEXT:    lvsl v3, 0, r3
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getvelsl:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    andi. 3, 3, 1
+; CHECK-AIX-NEXT:    sldi 3, 3, 3
+; CHECK-AIX-NEXT:    lvsl 3, 0, 3
+; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %vsl, i32 %i
   ret i64 %vecext
-; CHECK-LABEL: @getvelsl
-; CHECK-LE-LABEL: @getvelsl
-; CHECK-AIX-LABEL: getvelsl:
 ; FIXME: add check patterns when variable element extraction is implemented
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
+; CHECK-LABEL: getvelul:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andi. r3, r5, 1
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    lvsl v3, 0, r3
+; CHECK-NEXT:    vperm v2, v2, v2, v3
+; CHECK-NEXT:    mfvsrd r3, v2
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getvelul:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 1
+; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    sldi r3, r3, 3
+; CHECK-LE-NEXT:    lvsl v3, 0, r3
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    mfvsrd r3, v2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getvelul:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    andi. 3, 3, 1
+; CHECK-AIX-NEXT:    sldi 3, 3, 3
+; CHECK-AIX-NEXT:    lvsl 3, 0, 3
+; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
+; CHECK-AIX-NEXT:    mfvsrd 3, 34
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %vul, i32 %i
   ret i64 %vecext
-; CHECK-LABEL: @getvelul
-; CHECK-LE-LABEL: @getvelul
-; CHECK-AIX-LABEL: getvelul
 ; FIXME: add check patterns when variable element extraction is implemented
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define float @getf0(<4 x float> %vf) {
+; CHECK-LABEL: getf0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xscvspdpn f1, v2
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getf0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-LE-NEXT:    xscvspdpn f1, vs0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getf0:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xscvspdpn 1, 34
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %vf, i32 0
   ret float %vecext
-; CHECK-LABEL: @getf0
-; CHECK: xscvspdpn f1, v2
 
-; CHECK-LE-LABEL: @getf0
-; CHECK-LE: xxsldwi vs0, v2, v2, 3
-; CHECK-LE: xscvspdpn f1, vs0
 
-; CHECK-AIX-LABEL: getf0:
-; CHECK-AIX: xscvspdpn 1, 34
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define float @getf1(<4 x float> %vf) {
+; CHECK-LABEL: getf1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-NEXT:    xscvspdpn f1, vs0
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getf1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    xscvspdpn f1, vs0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getf1:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxsldwi 0, 34, 34, 1
+; CHECK-AIX-NEXT:    xscvspdpn 1, 0
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %vf, i32 1
   ret float %vecext
-; CHECK-LABEL: @getf1
-; CHECK: xxsldwi vs0, v2, v2, 1
-; CHECK: xscvspdpn f1, vs0
 
-; CHECK-LE-LABEL: @getf1
-; CHECK-LE: xxswapd vs0, v2
-; CHECK-LE: xscvspdpn f1, vs0
 
-; CHECK-AIX-LABEL: getf1:
-; CHECK-AIX: xxsldwi 0, 34, 34, 1
-; CHECK-AIX: xscvspdpn 1, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define float @getf2(<4 x float> %vf) {
+; CHECK-LABEL: getf2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    xscvspdpn f1, vs0
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getf2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-LE-NEXT:    xscvspdpn f1, vs0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getf2:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 0, 34
+; CHECK-AIX-NEXT:    xscvspdpn 1, 0
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %vf, i32 2
   ret float %vecext
-; CHECK-LABEL: @getf2
-; CHECK: xxswapd vs0, v2
-; CHECK: xscvspdpn f1, vs0
 
-; CHECK-LE-LABEL: @getf2
-; CHECK-LE: xxsldwi vs0, v2, v2, 1
-; CHECK-LE: xscvspdpn f1, vs0
 
-; CHECK-AIX-LABEL: getf2:
-; CHECK-AIX: xxswapd 0, 34
-; CHECK-AIX: xscvspdpn 1, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define float @getf3(<4 x float> %vf) {
+; CHECK-LABEL: getf3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-NEXT:    xscvspdpn f1, vs0
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getf3:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xscvspdpn f1, v2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getf3:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxsldwi 0, 34, 34, 3
+; CHECK-AIX-NEXT:    xscvspdpn 1, 0
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %vf, i32 3
   ret float %vecext
-; CHECK-LABEL: @getf3
-; CHECK: xxsldwi vs0, v2, v2, 3
-; CHECK: xscvspdpn f1, vs0
 
-; CHECK-LE-LABEL: @getf3
-; CHECK-LE: xscvspdpn f1, v2
 
-; CHECK-AIX-LABEL: getf3:
-; CHECK-AIX: xxsldwi 0, 34, 34, 3
-; CHECK-AIX: xscvspdpn 1, 0
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define float @getvelf(<4 x float> %vf, i32 signext %i) {
+; CHECK-LABEL: getvelf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r3, r5, 2
+; CHECK-NEXT:    lvsl v3, 0, r3
+; CHECK-NEXT:    vperm v2, v2, v2, v3
+; CHECK-NEXT:    xscvspdpn f1, v2
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getvelf:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xori r3, r5, 3
+; CHECK-LE-NEXT:    sldi r3, r3, 2
+; CHECK-LE-NEXT:    lvsl v3, 0, r3
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    xscvspdpn f1, v2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getvelf:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    sldi 3, 3, 2
+; CHECK-AIX-NEXT:    lvsl 3, 0, 3
+; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
+; CHECK-AIX-NEXT:    xscvspdpn 1, 34
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %vf, i32 %i
   ret float %vecext
-; CHECK-LABEL: @getvelf
-; CHECK-LE-LABEL: @getvelf
-; CHECK-AIX-LABEL: @getvelf
 ; FIXME: add check patterns when variable element extraction is implemented
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define double @getd0(<2 x double> %vd) {
+; CHECK-LABEL: getd0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxlor f1, v2, v2
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getd0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxswapd vs1, v2
+; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getd0:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxlor 1, 34, 34
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %vd, i32 0
   ret double %vecext
-; CHECK-LABEL: @getd0
-; CHECK: xxlor f1, v2, v2
 
-; CHECK-LE-LABEL: @getd0
-; CHECK-LE: xxswapd vs1, v2
 
-; CHECK-AIX-LABEL: getd0:
 ; CHECK-AIXT: xxlor 1, 34, 34
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define double @getd1(<2 x double> %vd) {
+; CHECK-LABEL: getd1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs1, v2
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getd1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxlor f1, v2, v2
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getd1:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    xxswapd 1, 34
+; CHECK-AIX-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %vd, i32 1
   ret double %vecext
-; CHECK-LABEL: @getd1
-; CHECK: xxswapd vs1, v2
 
-; CHECK-LE-LABEL: @getd1
-; CHECK-LE: xxlor f1, v2, v2
 
-; CHECK-AIX-LABEL: getd1:
-; CHECK-AIX: xxswapd 1, 34
 }
 
 ; Function Attrs: norecurse nounwind readnone
 define double @getveld(<2 x double> %vd, i32 signext %i) {
+; CHECK-LABEL: getveld:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    andi. r3, r5, 1
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    lvsl v3, 0, r3
+; CHECK-NEXT:    vperm v2, v2, v2, v3
+; CHECK-NEXT:    xxlor vs1, v2, v2
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: getveld:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 1
+; CHECK-LE-NEXT:    andc r3, r3, r5
+; CHECK-LE-NEXT:    sldi r3, r3, 3
+; CHECK-LE-NEXT:    lvsl v3, 0, r3
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    xxlor vs1, v2, v2
+; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: getveld:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    andi. 3, 3, 1
+; CHECK-AIX-NEXT:    sldi 3, 3, 3
+; CHECK-AIX-NEXT:    lvsl 3, 0, 3
+; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
+; CHECK-AIX-NEXT:    xxlor 1, 34, 34
+; CHECK-AIX-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %vd, i32 %i
   ret double %vecext
-; CHECK-LABEL: @getveld
-; CHECK-LE-LABEL: @getveld
-; CHECK-AIX-LABEL: @getveld
 ; FIXME: add check patterns when variable element extraction is implemented
 }

diff  --git a/llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll b/llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll
index 0b2910d6b974..85ff60840e0a 100644
--- a/llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll
+++ b/llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll
@@ -247,16 +247,18 @@ define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {
 ; CHECK-LABEL: test14:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lwz r3, 0(r5)
-; CHECK-NEXT:    mtvsrws v2, r3
+; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    addi r3, r3, 5
+; CHECK-NEXT:    xxspltw v2, vs0, 1
 ; CHECK-NEXT:    stw r3, 0(r5)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test14:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lwz r3, 0(r5)
-; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-NEXT:    addi r3, r3, 5
+; CHECK-BE-NEXT:    xxspltw v2, vs0, 1
 ; CHECK-BE-NEXT:    stw r3, 0(r5)
 ; CHECK-BE-NEXT:    blr
 

diff  --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll
index f87cb5b940ca..aa2c7ba5d462 100644
--- a/llvm/test/CodeGen/PowerPC/pr25080.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25080.ll
@@ -64,42 +64,37 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; BE-NEXT:    xxswapd 0, 35
 ; BE-NEXT:    mfvsrwz 3, 35
 ; BE-NEXT:    xxsldwi 1, 35, 35, 1
-; BE-NEXT:    sldi 3, 3, 48
-; BE-NEXT:    mffprwz 4, 0
-; BE-NEXT:    xxsldwi 0, 35, 35, 3
-; BE-NEXT:    mtvsrd 36, 3
-; BE-NEXT:    mffprwz 3, 1
-; BE-NEXT:    sldi 4, 4, 48
-; BE-NEXT:    xxswapd 1, 34
-; BE-NEXT:    mtvsrd 35, 4
 ; BE-NEXT:    mfvsrwz 4, 34
-; BE-NEXT:    sldi 3, 3, 48
-; BE-NEXT:    mtvsrd 37, 3
-; BE-NEXT:    mffprwz 3, 0
-; BE-NEXT:    sldi 4, 4, 48
-; BE-NEXT:    xxsldwi 0, 34, 34, 1
-; BE-NEXT:    vmrghh 3, 5, 3
-; BE-NEXT:    mtvsrd 37, 4
-; BE-NEXT:    sldi 3, 3, 48
-; BE-NEXT:    mffprwz 4, 1
-; BE-NEXT:    xxsldwi 1, 34, 34, 3
-; BE-NEXT:    mtvsrd 34, 3
+; BE-NEXT:    mtvsrwz 36, 3
+; BE-NEXT:    xxsldwi 2, 35, 35, 3
 ; BE-NEXT:    mffprwz 3, 0
-; BE-NEXT:    sldi 4, 4, 48
-; BE-NEXT:    mtvsrd 32, 4
+; BE-NEXT:    xxswapd 0, 34
+; BE-NEXT:    mtvsrwz 35, 4
 ; BE-NEXT:    mffprwz 4, 1
-; BE-NEXT:    sldi 3, 3, 48
-; BE-NEXT:    mtvsrd 33, 3
-; BE-NEXT:    sldi 3, 4, 48
-; BE-NEXT:    vmrghh 2, 2, 4
-; BE-NEXT:    mtvsrd 36, 3
+; BE-NEXT:    xxsldwi 1, 34, 34, 1
+; BE-NEXT:    mtvsrwz 37, 3
 ; BE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
-; BE-NEXT:    vmrghh 0, 1, 0
 ; BE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; BE-NEXT:    vmrghh 4, 4, 5
+; BE-NEXT:    mtvsrwz 32, 4
+; BE-NEXT:    mffprwz 4, 0
+; BE-NEXT:    lxvw4x 33, 0, 3
+; BE-NEXT:    xxsldwi 0, 34, 34, 3
+; BE-NEXT:    mffprwz 3, 1
+; BE-NEXT:    mffprwz 5, 2
+; BE-NEXT:    vperm 2, 0, 5, 1
+; BE-NEXT:    mtvsrwz 37, 3
+; BE-NEXT:    mffprwz 3, 0
+; BE-NEXT:    mtvsrwz 38, 5
+; BE-NEXT:    mtvsrwz 39, 4
+; BE-NEXT:    mtvsrwz 32, 3
+; BE-NEXT:    addis 3, 2, .LCPI0_2 at toc@ha
+; BE-NEXT:    vperm 4, 6, 4, 1
+; BE-NEXT:    addi 3, 3, .LCPI0_2 at toc@l
+; BE-NEXT:    vperm 5, 5, 7, 1
 ; BE-NEXT:    lxvw4x 0, 0, 3
-; BE-NEXT:    vmrghw 2, 2, 3
-; BE-NEXT:    vmrghw 3, 4, 0
+; BE-NEXT:    vperm 3, 0, 3, 1
+; BE-NEXT:    vmrghw 2, 4, 2
+; BE-NEXT:    vmrghw 3, 3, 5
 ; BE-NEXT:    xxmrghd 34, 35, 34
 ; BE-NEXT:    vspltish 3, 15
 ; BE-NEXT:    xxlor 34, 34, 0

diff  --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 42d9a8bd6f1b..c92809ea6830 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -297,16 +297,14 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-LABEL: test32:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    add r5, r3, r4
-; P9BE-NEXT:    lfiwzx f0, r3, r4
+; P9BE-NEXT:    lxsiwzx v2, r3, r4
 ; P9BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; P9BE-NEXT:    xxlxor v3, v3, v3
-; P9BE-NEXT:    xxsldwi v2, f0, f0, 1
 ; P9BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
 ; P9BE-NEXT:    lxvx v4, 0, r3
 ; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    lfiwzx f0, r5, r3
+; P9BE-NEXT:    lxsiwzx v5, r5, r3
 ; P9BE-NEXT:    vperm v2, v3, v2, v4
-; P9BE-NEXT:    xxsldwi v5, f0, f0, 1
 ; P9BE-NEXT:    vperm v3, v3, v5, v4
 ; P9BE-NEXT:    vspltisw v4, 8
 ; P9BE-NEXT:    vnegw v3, v3
@@ -352,8 +350,6 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe
 ; CHECK-NEXT:    li r6, 0
 ; CHECK-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
 ; CHECK-NEXT:    mtvsrd v3, r6
-; CHECK-NEXT:    vsplth v4, v4, 3
-; CHECK-NEXT:    vsplth v2, v2, 3
 ; CHECK-NEXT:    vmrghh v4, v3, v4
 ; CHECK-NEXT:    vmrghh v2, v3, v2
 ; CHECK-NEXT:    vsplth v3, v3, 3
@@ -373,19 +369,19 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe
 ; P9BE-NEXT:    sldi r4, r4, 1
 ; P9BE-NEXT:    li r7, 16
 ; P9BE-NEXT:    add r6, r3, r4
-; P9BE-NEXT:    lxsihzx v4, r3, r4
-; P9BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; P9BE-NEXT:    lxsihzx v5, r3, r4
+; P9BE-NEXT:    addis r3, r2, .LCPI3_1 at toc@ha
 ; P9BE-NEXT:    lxsihzx v2, r6, r7
+; P9BE-NEXT:    addis r6, r2, .LCPI3_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI3_1 at toc@l
+; P9BE-NEXT:    addi r6, r6, .LCPI3_0 at toc@l
+; P9BE-NEXT:    lxvx v3, 0, r6
 ; P9BE-NEXT:    li r6, 0
-; P9BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; P9BE-NEXT:    sldi r6, r6, 48
+; P9BE-NEXT:    mtvsrwz v4, r6
+; P9BE-NEXT:    vperm v2, v4, v2, v3
+; P9BE-NEXT:    vperm v3, v4, v5, v3
 ; P9BE-NEXT:    vsplth v4, v4, 3
-; P9BE-NEXT:    mtvsrd v3, r6
-; P9BE-NEXT:    vsplth v2, v2, 3
-; P9BE-NEXT:    vmrghh v4, v3, v4
-; P9BE-NEXT:    vmrghh v2, v3, v2
-; P9BE-NEXT:    vsplth v3, v3, 0
-; P9BE-NEXT:    vmrghw v3, v3, v4
+; P9BE-NEXT:    vmrghw v3, v4, v3
 ; P9BE-NEXT:    lxvx v4, 0, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vperm v2, v3, v2, v4
@@ -438,10 +434,8 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext
 ; CHECK-NEXT:    lxsibzx v5, r6, r3
 ; CHECK-NEXT:    vspltb v4, v3, 7
 ; CHECK-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-NEXT:    vspltb v2, v2, 7
-; CHECK-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
 ; CHECK-NEXT:    vmrghb v2, v3, v2
-; CHECK-NEXT:    vspltb v5, v5, 7
+; CHECK-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
 ; CHECK-NEXT:    vmrglh v2, v2, v4
 ; CHECK-NEXT:    vmrghb v3, v3, v5
 ; CHECK-NEXT:    vmrglw v2, v2, v4
@@ -461,24 +455,24 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    add r6, r3, r4
 ; P9BE-NEXT:    li r7, 8
-; P9BE-NEXT:    lxsibzx v4, r3, r4
-; P9BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; P9BE-NEXT:    lxsibzx v5, r3, r4
+; P9BE-NEXT:    addis r3, r2, .LCPI4_1 at toc@ha
 ; P9BE-NEXT:    lxsibzx v2, r6, r7
+; P9BE-NEXT:    addis r6, r2, .LCPI4_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI4_1 at toc@l
+; P9BE-NEXT:    addi r6, r6, .LCPI4_0 at toc@l
+; P9BE-NEXT:    lxvx v3, 0, r6
 ; P9BE-NEXT:    li r6, 0
-; P9BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; P9BE-NEXT:    sldi r6, r6, 56
+; P9BE-NEXT:    mtvsrwz v4, r6
+; P9BE-NEXT:    vperm v2, v4, v2, v3
+; P9BE-NEXT:    vperm v3, v4, v5, v3
 ; P9BE-NEXT:    vspltb v4, v4, 7
-; P9BE-NEXT:    mtvsrd v3, r6
-; P9BE-NEXT:    vspltb v2, v2, 7
-; P9BE-NEXT:    vmrghb v4, v3, v4
-; P9BE-NEXT:    vmrghb v2, v3, v2
-; P9BE-NEXT:    vspltb v3, v3, 0
-; P9BE-NEXT:    vmrghh v4, v4, v3
-; P9BE-NEXT:    xxspltw v3, v3, 0
-; P9BE-NEXT:    vmrghw v2, v4, v2
-; P9BE-NEXT:    lxvx v4, 0, r3
+; P9BE-NEXT:    vmrghh v3, v3, v4
+; P9BE-NEXT:    xxspltw v4, v4, 0
+; P9BE-NEXT:    vmrghw v2, v3, v2
+; P9BE-NEXT:    lxvx v3, 0, r3
 ; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    vperm v2, v3, v2, v4
+; P9BE-NEXT:    vperm v2, v4, v2, v3
 ; P9BE-NEXT:    xxspltw v3, v2, 1
 ; P9BE-NEXT:    vadduwm v2, v2, v3
 ; P9BE-NEXT:    vextuwlx r3, r3, v2

diff  --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index 4ee8ca335335..39d722adf509 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -35,10 +35,10 @@ define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec)  {
 ;
 ; P8BE-LABEL: s2v_test1:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lfiwzx f0, 0, r3
-; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
-; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
-; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    lxsiwzx v3, 0, r3
+; P8BE-NEXT:    vmrghw v4, v2, v3
+; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
+; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
 ; P8BE-NEXT:    blr
 entry:
   %0 = load i32, i32* %int32, align 4
@@ -75,10 +75,10 @@ define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8BE-LABEL: s2v_test2:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    addi r3, r3, 4
-; P8BE-NEXT:    lfiwzx f0, 0, r3
-; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
-; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
-; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    lxsiwzx v3, 0, r3
+; P8BE-NEXT:    vmrghw v4, v2, v3
+; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
+; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
 ; P8BE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
@@ -118,10 +118,10 @@ define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32
 ; P8BE-LABEL: s2v_test3:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    sldi r4, r7, 2
-; P8BE-NEXT:    lfiwzx f0, r3, r4
-; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
-; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
-; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    lxsiwzx v3, r3, r4
+; P8BE-NEXT:    vmrghw v4, v2, v3
+; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
+; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
 ; P8BE-NEXT:    blr
 entry:
   %idxprom = sext i32 %Idx to i64
@@ -160,10 +160,10 @@ define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8BE-LABEL: s2v_test4:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    addi r3, r3, 4
-; P8BE-NEXT:    lfiwzx f0, 0, r3
-; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
-; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
-; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    lxsiwzx v3, 0, r3
+; P8BE-NEXT:    vmrghw v4, v2, v3
+; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
+; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
 ; P8BE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
@@ -199,10 +199,10 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1)  {
 ;
 ; P8BE-LABEL: s2v_test5:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lfiwzx f0, 0, r5
-; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
-; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
-; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    lxsiwzx v3, 0, r5
+; P8BE-NEXT:    vmrghw v4, v2, v3
+; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
+; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
 ; P8BE-NEXT:    blr
 entry:
   %0 = load i32, i32* %ptr1, align 4
@@ -239,10 +239,10 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
 ;
 ; P8BE-LABEL: s2v_test_f1:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lfiwzx f0, 0, r3
-; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
-; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
-; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; P8BE-NEXT:    lxsiwzx v3, 0, r3
+; P8BE-NEXT:    vmrghw v4, v2, v3
+; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
+; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
 ; P8BE-NEXT:    blr
 entry:
   %0 = load float, float* %f64, align 4
@@ -263,10 +263,8 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
 ; P9BE-LABEL: s2v_test_f2:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    addi r3, r3, 4
-; P9BE-NEXT:    xxspltw v2, v2, 1
-; P9BE-NEXT:    lfiwzx f0, 0, r3
-; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
-; P9BE-NEXT:    vmrghw v2, v3, v2
+; P9BE-NEXT:    lxsiwzx v3, 0, r3
+; P9BE-NEXT:    vmrgow v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: s2v_test_f2:
@@ -280,10 +278,8 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
 ; P8BE-LABEL: s2v_test_f2:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    addi r3, r3, 4
-; P8BE-NEXT:    xxspltw v2, v2, 1
-; P8BE-NEXT:    lfiwzx f0, 0, r3
-; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
-; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    lxsiwzx v3, 0, r3
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds float, float* %f64, i64 1
@@ -305,10 +301,8 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
 ; P9BE-LABEL: s2v_test_f3:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    sldi r4, r7, 2
-; P9BE-NEXT:    xxspltw v2, v2, 1
-; P9BE-NEXT:    lfiwzx f0, r3, r4
-; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
-; P9BE-NEXT:    vmrghw v2, v3, v2
+; P9BE-NEXT:    lxsiwzx v3, r3, r4
+; P9BE-NEXT:    vmrgow v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: s2v_test_f3:
@@ -322,10 +316,8 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
 ; P8BE-LABEL: s2v_test_f3:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    sldi r4, r7, 2
-; P8BE-NEXT:    xxspltw v2, v2, 1
-; P8BE-NEXT:    lfiwzx f0, r3, r4
-; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
-; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    lxsiwzx v3, r3, r4
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
 entry:
   %idxprom = sext i32 %Idx to i64
@@ -348,10 +340,8 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
 ; P9BE-LABEL: s2v_test_f4:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    addi r3, r3, 4
-; P9BE-NEXT:    xxspltw v2, v2, 1
-; P9BE-NEXT:    lfiwzx f0, 0, r3
-; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
-; P9BE-NEXT:    vmrghw v2, v3, v2
+; P9BE-NEXT:    lxsiwzx v3, 0, r3
+; P9BE-NEXT:    vmrgow v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: s2v_test_f4:
@@ -365,10 +355,8 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
 ; P8BE-LABEL: s2v_test_f4:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    addi r3, r3, 4
-; P8BE-NEXT:    xxspltw v2, v2, 1
-; P8BE-NEXT:    lfiwzx f0, 0, r3
-; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
-; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    lxsiwzx v3, 0, r3
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds float, float* %f64, i64 1
@@ -388,10 +376,8 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr
 ;
 ; P9BE-LABEL: s2v_test_f5:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    lfiwzx f0, 0, r5
-; P9BE-NEXT:    xxspltw v2, v2, 1
-; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
-; P9BE-NEXT:    vmrghw v2, v3, v2
+; P9BE-NEXT:    lxsiwzx v3, 0, r5
+; P9BE-NEXT:    vmrgow v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: s2v_test_f5:
@@ -403,10 +389,8 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr
 ;
 ; P8BE-LABEL: s2v_test_f5:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lfiwzx f0, 0, r5
-; P8BE-NEXT:    xxspltw v2, v2, 1
-; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
-; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    lxsiwzx v3, 0, r5
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
 entry:
   %0 = load float, float* %ptr1, align 8

diff  --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
index b1234d4c1957..5c71c7394960 100644
--- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
@@ -81,10 +81,9 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, -124
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -21386
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    ori r4, r4, 37253
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 0
+; P9BE-NEXT:    ori r4, r4, 37253
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r4, r3, r4
@@ -95,13 +94,15 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -16728
-; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; P9BE-NEXT:    ori r4, r4, 63249
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    extsh r3, r3
+; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    srwi r5, r4, 31
 ; P9BE-NEXT:    srawi r4, r4, 8
@@ -109,10 +110,9 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, -1003
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 21399
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    ori r4, r4, 33437
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    li r3, 4
+; P9BE-NEXT:    ori r4, r4, 33437
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r4, r3, r4
@@ -121,9 +121,8 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 98
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
-; P9BE-NEXT:    vmrghh v2, v2, v4
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vperm v2, v2, v4, v5
 ; P9BE-NEXT:    vmrghw v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
@@ -224,19 +223,18 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P8BE-NEXT:    mulli r9, r9, -124
 ; P8BE-NEXT:    mulli r10, r10, 95
 ; P8BE-NEXT:    sub r3, r5, r3
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    sub r5, r6, r8
-; P8BE-NEXT:    mtvsrd v2, r3
-; P8BE-NEXT:    sub r6, r7, r9
-; P8BE-NEXT:    sldi r3, r5, 48
+; P8BE-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
+; P8BE-NEXT:    mtvsrwz v2, r3
+; P8BE-NEXT:    addi r3, r5, .LCPI0_0 at toc@l
+; P8BE-NEXT:    sub r6, r6, r8
+; P8BE-NEXT:    lxvw4x v3, 0, r3
+; P8BE-NEXT:    sub r3, r7, r9
+; P8BE-NEXT:    mtvsrwz v4, r6
 ; P8BE-NEXT:    sub r4, r4, r10
-; P8BE-NEXT:    mtvsrd v3, r3
-; P8BE-NEXT:    sldi r3, r6, 48
-; P8BE-NEXT:    sldi r4, r4, 48
-; P8BE-NEXT:    mtvsrd v4, r3
-; P8BE-NEXT:    mtvsrd v5, r4
-; P8BE-NEXT:    vmrghh v2, v3, v2
-; P8BE-NEXT:    vmrghh v3, v5, v4
+; P8BE-NEXT:    mtvsrwz v5, r3
+; P8BE-NEXT:    mtvsrwz v0, r4
+; P8BE-NEXT:    vperm v2, v4, v2, v3
+; P8BE-NEXT:    vperm v3, v0, v5, v3
 ; P8BE-NEXT:    vmrghw v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
@@ -311,8 +309,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    add r5, r5, r6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
@@ -323,12 +320,14 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    add r5, r5, r6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    extsh r3, r3
+; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mulhw r5, r3, r4
 ; P9BE-NEXT:    add r5, r5, r3
 ; P9BE-NEXT:    srwi r6, r5, 31
@@ -336,8 +335,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    add r5, r5, r6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
@@ -348,9 +346,8 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
-; P9BE-NEXT:    vmrghh v2, v2, v4
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vperm v2, v2, v4, v5
 ; P9BE-NEXT:    vmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -441,21 +438,20 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P8BE-NEXT:    srawi r3, r3, 6
 ; P8BE-NEXT:    mulli r10, r10, 95
 ; P8BE-NEXT:    sub r5, r5, r8
+; P8BE-NEXT:    addis r8, r2, .LCPI1_0 at toc@ha
 ; P8BE-NEXT:    add r3, r3, r11
-; P8BE-NEXT:    sldi r5, r5, 48
+; P8BE-NEXT:    mtvsrwz v2, r5
+; P8BE-NEXT:    addi r5, r8, .LCPI1_0 at toc@l
 ; P8BE-NEXT:    mulli r3, r3, 95
 ; P8BE-NEXT:    sub r6, r6, r9
-; P8BE-NEXT:    mtvsrd v2, r5
-; P8BE-NEXT:    sldi r6, r6, 48
-; P8BE-NEXT:    sub r7, r7, r10
-; P8BE-NEXT:    mtvsrd v3, r6
+; P8BE-NEXT:    lxvw4x v3, 0, r5
+; P8BE-NEXT:    mtvsrwz v4, r6
+; P8BE-NEXT:    sub r5, r7, r10
+; P8BE-NEXT:    mtvsrwz v5, r5
 ; P8BE-NEXT:    sub r3, r4, r3
-; P8BE-NEXT:    sldi r4, r7, 48
-; P8BE-NEXT:    vmrghh v2, v3, v2
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    mtvsrd v4, r4
-; P8BE-NEXT:    mtvsrd v5, r3
-; P8BE-NEXT:    vmrghh v3, v5, v4
+; P8BE-NEXT:    vperm v2, v4, v2, v3
+; P8BE-NEXT:    mtvsrwz v0, r3
+; P8BE-NEXT:    vperm v3, v0, v5, v3
 ; P8BE-NEXT:    vmrghw v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -540,8 +536,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r6
 ; P9BE-NEXT:    mulli r6, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r6
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r6, r3
@@ -552,12 +547,14 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    add r6, r6, r7
 ; P9BE-NEXT:    mulli r7, r6, 95
 ; P9BE-NEXT:    sub r3, r3, r7
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    extsh r7, r3
+; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mulhw r8, r7, r5
 ; P9BE-NEXT:    add r7, r8, r7
 ; P9BE-NEXT:    srwi r8, r7, 31
@@ -565,8 +562,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    add r7, r7, r8
 ; P9BE-NEXT:    mulli r8, r7, 95
 ; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
@@ -576,21 +572,16 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    srawi r5, r5, 6
 ; P9BE-NEXT:    add r5, r5, r8
 ; P9BE-NEXT:    mulli r8, r5, 95
+; P9BE-NEXT:    mtvsrwz v0, r5
 ; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
-; P9BE-NEXT:    sldi r3, r4, 48
-; P9BE-NEXT:    vmrghh v2, v2, v4
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vperm v2, v2, v4, v5
+; P9BE-NEXT:    mtvsrwz v4, r6
 ; P9BE-NEXT:    vmrghw v2, v2, v3
-; P9BE-NEXT:    mtvsrd v3, r3
-; P9BE-NEXT:    sldi r3, r6, 48
-; P9BE-NEXT:    mtvsrd v4, r3
-; P9BE-NEXT:    sldi r3, r7, 48
-; P9BE-NEXT:    vmrghh v3, v4, v3
-; P9BE-NEXT:    mtvsrd v4, r3
-; P9BE-NEXT:    sldi r3, r5, 48
-; P9BE-NEXT:    mtvsrd v5, r3
-; P9BE-NEXT:    vmrghh v4, v5, v4
+; P9BE-NEXT:    mtvsrwz v3, r4
+; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    mtvsrwz v4, r7
+; P9BE-NEXT:    vperm v4, v0, v4, v5
 ; P9BE-NEXT:    vmrghw v3, v4, v3
 ; P9BE-NEXT:    vadduhm v2, v2, v3
 ; P9BE-NEXT:    blr
@@ -656,68 +647,65 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ;
 ; P8BE-LABEL: combine_srem_sdiv:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r5, v2
-; P8BE-NEXT:    lis r4, -21386
-; P8BE-NEXT:    ori r4, r4, 37253
-; P8BE-NEXT:    clrldi r3, r5, 48
-; P8BE-NEXT:    rldicl r6, r5, 48, 48
-; P8BE-NEXT:    extsh r8, r3
-; P8BE-NEXT:    rldicl r7, r5, 32, 48
+; P8BE-NEXT:    mfvsrd r4, v2
+; P8BE-NEXT:    lis r3, -21386
+; P8BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; P8BE-NEXT:    addis r30, r2, .LCPI2_0 at toc@ha
+; P8BE-NEXT:    ori r3, r3, 37253
+; P8BE-NEXT:    clrldi r5, r4, 48
+; P8BE-NEXT:    rldicl r6, r4, 48, 48
+; P8BE-NEXT:    rldicl r7, r4, 32, 48
+; P8BE-NEXT:    extsh r8, r5
 ; P8BE-NEXT:    extsh r9, r6
-; P8BE-NEXT:    rldicl r5, r5, 16, 48
-; P8BE-NEXT:    mulhw r11, r8, r4
 ; P8BE-NEXT:    extsh r10, r7
-; P8BE-NEXT:    extsh r5, r5
-; P8BE-NEXT:    mulhw r12, r9, r4
-; P8BE-NEXT:    mulhw r0, r10, r4
-; P8BE-NEXT:    mulhw r4, r5, r4
+; P8BE-NEXT:    mulhw r11, r8, r3
+; P8BE-NEXT:    mulhw r12, r9, r3
+; P8BE-NEXT:    rldicl r4, r4, 16, 48
+; P8BE-NEXT:    mulhw r0, r10, r3
+; P8BE-NEXT:    extsh r4, r4
+; P8BE-NEXT:    mulhw r3, r4, r3
 ; P8BE-NEXT:    add r8, r11, r8
 ; P8BE-NEXT:    add r9, r12, r9
-; P8BE-NEXT:    srawi r11, r8, 6
-; P8BE-NEXT:    srwi r8, r8, 31
+; P8BE-NEXT:    srwi r11, r8, 31
 ; P8BE-NEXT:    add r10, r0, r10
-; P8BE-NEXT:    add r4, r4, r5
-; P8BE-NEXT:    add r8, r11, r8
+; P8BE-NEXT:    srawi r8, r8, 6
+; P8BE-NEXT:    addi r0, r30, .LCPI2_0 at toc@l
+; P8BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; P8BE-NEXT:    srawi r12, r9, 6
 ; P8BE-NEXT:    srwi r9, r9, 31
-; P8BE-NEXT:    srawi r0, r10, 6
-; P8BE-NEXT:    srawi r11, r4, 6
+; P8BE-NEXT:    add r8, r8, r11
+; P8BE-NEXT:    add r3, r3, r4
+; P8BE-NEXT:    lxvw4x v2, 0, r0
+; P8BE-NEXT:    srawi r11, r10, 6
 ; P8BE-NEXT:    srwi r10, r10, 31
 ; P8BE-NEXT:    add r9, r12, r9
-; P8BE-NEXT:    srwi r4, r4, 31
+; P8BE-NEXT:    mtvsrwz v3, r8
 ; P8BE-NEXT:    mulli r12, r8, 95
-; P8BE-NEXT:    add r10, r0, r10
-; P8BE-NEXT:    add r4, r11, r4
-; P8BE-NEXT:    mulli r0, r9, 95
-; P8BE-NEXT:    sldi r9, r9, 48
-; P8BE-NEXT:    sldi r8, r8, 48
-; P8BE-NEXT:    mtvsrd v3, r9
-; P8BE-NEXT:    mulli r9, r4, 95
-; P8BE-NEXT:    mtvsrd v2, r8
-; P8BE-NEXT:    mulli r8, r10, 95
-; P8BE-NEXT:    sldi r10, r10, 48
-; P8BE-NEXT:    sub r3, r3, r12
-; P8BE-NEXT:    mtvsrd v4, r10
-; P8BE-NEXT:    sub r6, r6, r0
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    vmrghh v2, v3, v2
-; P8BE-NEXT:    sldi r6, r6, 48
-; P8BE-NEXT:    mtvsrd v3, r3
-; P8BE-NEXT:    sub r3, r5, r9
-; P8BE-NEXT:    sub r7, r7, r8
-; P8BE-NEXT:    mtvsrd v5, r6
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    sldi r5, r7, 48
-; P8BE-NEXT:    mtvsrd v1, r3
-; P8BE-NEXT:    sldi r3, r4, 48
-; P8BE-NEXT:    mtvsrd v0, r5
-; P8BE-NEXT:    vmrghh v3, v5, v3
-; P8BE-NEXT:    mtvsrd v5, r3
-; P8BE-NEXT:    vmrghh v0, v1, v0
-; P8BE-NEXT:    vmrghh v4, v5, v4
-; P8BE-NEXT:    vmrghw v3, v0, v3
-; P8BE-NEXT:    vmrghw v2, v4, v2
-; P8BE-NEXT:    vadduhm v2, v3, v2
+; P8BE-NEXT:    add r10, r11, r10
+; P8BE-NEXT:    srwi r11, r3, 31
+; P8BE-NEXT:    mtvsrwz v4, r9
+; P8BE-NEXT:    srawi r3, r3, 6
+; P8BE-NEXT:    mulli r8, r9, 95
+; P8BE-NEXT:    mtvsrwz v5, r10
+; P8BE-NEXT:    add r3, r3, r11
+; P8BE-NEXT:    mulli r9, r10, 95
+; P8BE-NEXT:    vperm v3, v4, v3, v2
+; P8BE-NEXT:    mulli r10, r3, 95
+; P8BE-NEXT:    sub r5, r5, r12
+; P8BE-NEXT:    sub r6, r6, r8
+; P8BE-NEXT:    mtvsrwz v4, r5
+; P8BE-NEXT:    mtvsrwz v0, r6
+; P8BE-NEXT:    sub r5, r7, r9
+; P8BE-NEXT:    sub r4, r4, r10
+; P8BE-NEXT:    mtvsrwz v1, r5
+; P8BE-NEXT:    mtvsrwz v6, r4
+; P8BE-NEXT:    vperm v4, v0, v4, v2
+; P8BE-NEXT:    mtvsrwz v0, r3
+; P8BE-NEXT:    vperm v1, v6, v1, v2
+; P8BE-NEXT:    vperm v2, v0, v5, v2
+; P8BE-NEXT:    vmrghw v4, v1, v4
+; P8BE-NEXT:    vmrghw v2, v2, v3
+; P8BE-NEXT:    vadduhm v2, v4, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -780,8 +768,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    addze r4, r4
 ; P9BE-NEXT:    slwi r4, r4, 5
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
@@ -790,13 +777,15 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    slwi r4, r4, 6
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -21386
-; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; P9BE-NEXT:    ori r4, r4, 37253
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    extsh r3, r3
+; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    add r4, r4, r3
 ; P9BE-NEXT:    srwi r5, r4, 31
@@ -804,8 +793,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
@@ -813,9 +801,8 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    addze r4, r4
 ; P9BE-NEXT:    slwi r4, r4, 3
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
-; P9BE-NEXT:    vmrghh v2, v2, v4
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vperm v2, v2, v4, v5
 ; P9BE-NEXT:    vmrghw v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
@@ -885,24 +872,23 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P8BE-NEXT:    srwi r10, r3, 31
 ; P8BE-NEXT:    srawi r3, r3, 6
 ; P8BE-NEXT:    slwi r8, r9, 6
+; P8BE-NEXT:    mtvsrwz v2, r6
 ; P8BE-NEXT:    add r3, r3, r10
 ; P8BE-NEXT:    srawi r9, r4, 3
-; P8BE-NEXT:    sub r7, r7, r8
+; P8BE-NEXT:    addis r10, r2, .LCPI3_0 at toc@ha
+; P8BE-NEXT:    sub r6, r7, r8
 ; P8BE-NEXT:    mulli r3, r3, 95
-; P8BE-NEXT:    sldi r6, r6, 48
 ; P8BE-NEXT:    addze r8, r9
-; P8BE-NEXT:    mtvsrd v2, r6
-; P8BE-NEXT:    slwi r6, r8, 3
-; P8BE-NEXT:    sub r4, r4, r6
-; P8BE-NEXT:    sldi r4, r4, 48
+; P8BE-NEXT:    addi r7, r10, .LCPI3_0 at toc@l
+; P8BE-NEXT:    mtvsrwz v4, r6
+; P8BE-NEXT:    lxvw4x v3, 0, r7
 ; P8BE-NEXT:    sub r3, r5, r3
-; P8BE-NEXT:    sldi r5, r7, 48
-; P8BE-NEXT:    mtvsrd v5, r4
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    mtvsrd v3, r5
-; P8BE-NEXT:    mtvsrd v4, r3
-; P8BE-NEXT:    vmrghh v2, v3, v2
-; P8BE-NEXT:    vmrghh v3, v5, v4
+; P8BE-NEXT:    slwi r5, r8, 3
+; P8BE-NEXT:    vperm v2, v4, v2, v3
+; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    mtvsrwz v5, r3
+; P8BE-NEXT:    mtvsrwz v0, r4
+; P8BE-NEXT:    vperm v3, v0, v5, v3
 ; P8BE-NEXT:    vmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
@@ -973,10 +959,9 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 23
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 24749
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    ori r4, r4, 47143
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 6
+; P9BE-NEXT:    ori r4, r4, 47143
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r4, r3, r4
@@ -986,13 +971,15 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 5423
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -14230
-; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; P9BE-NEXT:    ori r4, r4, 30865
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v3, v4
 ; P9BE-NEXT:    extsh r3, r3
+; P9BE-NEXT:    vperm v3, v3, v4, v5
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    add r4, r4, r3
 ; P9BE-NEXT:    srwi r5, r4, 31
@@ -1000,12 +987,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 654
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
+; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
-; P9BE-NEXT:    vmrghh v2, v4, v2
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    vperm v2, v4, v2, v5
 ; P9BE-NEXT:    vmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -1057,51 +1042,50 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ;
 ; P8BE-LABEL: dont_fold_srem_one:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    lis r5, 24749
-; P8BE-NEXT:    lis r6, -19946
+; P8BE-NEXT:    mfvsrd r4, v2
+; P8BE-NEXT:    lis r3, 24749
+; P8BE-NEXT:    lis r7, -19946
 ; P8BE-NEXT:    lis r8, -14230
-; P8BE-NEXT:    ori r5, r5, 47143
-; P8BE-NEXT:    ori r6, r6, 17097
+; P8BE-NEXT:    ori r3, r3, 47143
+; P8BE-NEXT:    ori r7, r7, 17097
 ; P8BE-NEXT:    ori r8, r8, 30865
-; P8BE-NEXT:    clrldi r4, r3, 48
-; P8BE-NEXT:    rldicl r7, r3, 48, 48
-; P8BE-NEXT:    rldicl r3, r3, 32, 48
+; P8BE-NEXT:    clrldi r5, r4, 48
+; P8BE-NEXT:    rldicl r6, r4, 48, 48
+; P8BE-NEXT:    rldicl r4, r4, 32, 48
+; P8BE-NEXT:    extsh r5, r5
+; P8BE-NEXT:    extsh r6, r6
 ; P8BE-NEXT:    extsh r4, r4
-; P8BE-NEXT:    extsh r7, r7
-; P8BE-NEXT:    extsh r3, r3
-; P8BE-NEXT:    mulhw r5, r4, r5
-; P8BE-NEXT:    mulhw r6, r7, r6
-; P8BE-NEXT:    mulhw r8, r3, r8
-; P8BE-NEXT:    srwi r9, r5, 31
-; P8BE-NEXT:    srawi r5, r5, 11
-; P8BE-NEXT:    add r6, r6, r7
-; P8BE-NEXT:    add r8, r8, r3
-; P8BE-NEXT:    add r5, r5, r9
-; P8BE-NEXT:    srwi r9, r6, 31
-; P8BE-NEXT:    srawi r6, r6, 4
-; P8BE-NEXT:    add r6, r6, r9
-; P8BE-NEXT:    srwi r9, r8, 31
-; P8BE-NEXT:    srawi r8, r8, 9
-; P8BE-NEXT:    mulli r5, r5, 5423
-; P8BE-NEXT:    add r8, r8, r9
-; P8BE-NEXT:    mulli r6, r6, 23
-; P8BE-NEXT:    li r9, 0
+; P8BE-NEXT:    mulhw r3, r5, r3
+; P8BE-NEXT:    mulhw r7, r6, r7
+; P8BE-NEXT:    mulhw r8, r4, r8
+; P8BE-NEXT:    srawi r9, r3, 11
+; P8BE-NEXT:    srwi r3, r3, 31
+; P8BE-NEXT:    add r7, r7, r6
+; P8BE-NEXT:    add r8, r8, r4
+; P8BE-NEXT:    add r3, r9, r3
+; P8BE-NEXT:    srwi r9, r7, 31
+; P8BE-NEXT:    srawi r7, r7, 4
+; P8BE-NEXT:    srawi r10, r8, 9
+; P8BE-NEXT:    srwi r8, r8, 31
+; P8BE-NEXT:    add r7, r7, r9
+; P8BE-NEXT:    addis r9, r2, .LCPI4_0 at toc@ha
+; P8BE-NEXT:    mulli r3, r3, 5423
+; P8BE-NEXT:    add r8, r10, r8
+; P8BE-NEXT:    li r10, 0
+; P8BE-NEXT:    mulli r7, r7, 23
 ; P8BE-NEXT:    mulli r8, r8, 654
-; P8BE-NEXT:    sub r4, r4, r5
-; P8BE-NEXT:    sldi r5, r9, 48
-; P8BE-NEXT:    mtvsrd v2, r5
-; P8BE-NEXT:    sub r5, r7, r6
-; P8BE-NEXT:    sldi r4, r4, 48
-; P8BE-NEXT:    sub r3, r3, r8
-; P8BE-NEXT:    mtvsrd v3, r4
-; P8BE-NEXT:    sldi r4, r5, 48
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    mtvsrd v4, r4
-; P8BE-NEXT:    mtvsrd v5, r3
-; P8BE-NEXT:    vmrghh v3, v4, v3
-; P8BE-NEXT:    vmrghh v2, v2, v5
-; P8BE-NEXT:    vmrghw v2, v2, v3
+; P8BE-NEXT:    mtvsrwz v2, r10
+; P8BE-NEXT:    sub r3, r5, r3
+; P8BE-NEXT:    addi r5, r9, .LCPI4_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r5
+; P8BE-NEXT:    sub r5, r6, r7
+; P8BE-NEXT:    mtvsrwz v4, r3
+; P8BE-NEXT:    sub r3, r4, r8
+; P8BE-NEXT:    mtvsrwz v5, r5
+; P8BE-NEXT:    mtvsrwz v0, r3
+; P8BE-NEXT:    vperm v4, v5, v4, v3
+; P8BE-NEXT:    vperm v2, v2, v0, v3
+; P8BE-NEXT:    vmrghw v2, v2, v4
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
   ret <4 x i16> %1
@@ -1166,10 +1150,9 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 23
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 24749
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    ori r4, r4, 47143
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 6
+; P9BE-NEXT:    ori r4, r4, 47143
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r4, r3, r4
@@ -1178,22 +1161,22 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 5423
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v3, v4
 ; P9BE-NEXT:    extsh r3, r3
+; P9BE-NEXT:    vperm v3, v3, v4, v5
 ; P9BE-NEXT:    srawi r4, r3, 15
 ; P9BE-NEXT:    addze r4, r4
 ; P9BE-NEXT:    slwi r4, r4, 15
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
+; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
-; P9BE-NEXT:    vmrghh v2, v4, v2
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    vperm v2, v4, v2, v5
 ; P9BE-NEXT:    vmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -1243,8 +1226,10 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P8BE-NEXT:    mfvsrd r3, v2
 ; P8BE-NEXT:    lis r4, 24749
 ; P8BE-NEXT:    lis r5, -19946
+; P8BE-NEXT:    li r9, 0
 ; P8BE-NEXT:    ori r4, r4, 47143
 ; P8BE-NEXT:    ori r5, r5, 17097
+; P8BE-NEXT:    mtvsrwz v2, r9
 ; P8BE-NEXT:    clrldi r6, r3, 48
 ; P8BE-NEXT:    rldicl r7, r3, 48, 48
 ; P8BE-NEXT:    extsh r6, r6
@@ -1261,25 +1246,22 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P8BE-NEXT:    srawi r5, r5, 4
 ; P8BE-NEXT:    mulli r4, r4, 5423
 ; P8BE-NEXT:    add r5, r5, r8
-; P8BE-NEXT:    li r8, 0
+; P8BE-NEXT:    addis r8, r2, .LCPI5_0 at toc@ha
+; P8BE-NEXT:    srawi r10, r3, 15
 ; P8BE-NEXT:    mulli r5, r5, 23
-; P8BE-NEXT:    srawi r9, r3, 15
 ; P8BE-NEXT:    sub r4, r6, r4
-; P8BE-NEXT:    sldi r6, r8, 48
-; P8BE-NEXT:    addze r8, r9
-; P8BE-NEXT:    mtvsrd v2, r6
+; P8BE-NEXT:    addi r6, r8, .LCPI5_0 at toc@l
+; P8BE-NEXT:    addze r8, r10
+; P8BE-NEXT:    lxvw4x v3, 0, r6
 ; P8BE-NEXT:    slwi r6, r8, 15
-; P8BE-NEXT:    sldi r4, r4, 48
+; P8BE-NEXT:    mtvsrwz v4, r4
 ; P8BE-NEXT:    sub r5, r7, r5
 ; P8BE-NEXT:    sub r3, r3, r6
-; P8BE-NEXT:    mtvsrd v3, r4
-; P8BE-NEXT:    sldi r4, r5, 48
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    mtvsrd v4, r4
-; P8BE-NEXT:    mtvsrd v5, r3
-; P8BE-NEXT:    vmrghh v3, v4, v3
-; P8BE-NEXT:    vmrghh v2, v2, v5
-; P8BE-NEXT:    vmrghw v2, v2, v3
+; P8BE-NEXT:    mtvsrwz v5, r5
+; P8BE-NEXT:    mtvsrwz v0, r3
+; P8BE-NEXT:    vperm v4, v5, v4, v3
+; P8BE-NEXT:    vperm v2, v2, v0, v3
+; P8BE-NEXT:    vmrghw v2, v2, v4
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
   ret <4 x i16> %1

diff  --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
index b17853f6960a..389e52680732 100644
--- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
@@ -74,32 +74,32 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 1003
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 21399
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    ori r4, r4, 33437
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 4
+; P9BE-NEXT:    ori r4, r4, 33437
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    srwi r4, r4, 5
 ; P9BE-NEXT:    mulli r4, r4, 98
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    clrlwi r4, r3, 16
 ; P9BE-NEXT:    rlwinm r3, r3, 30, 18, 31
+; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mulhwu r3, r3, r5
 ; P9BE-NEXT:    srwi r3, r3, 2
 ; P9BE-NEXT:    mulli r3, r3, 124
 ; P9BE-NEXT:    sub r3, r4, r3
 ; P9BE-NEXT:    lis r4, 22765
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    ori r4, r4, 8969
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    li r3, 0
+; P9BE-NEXT:    ori r4, r4, 8969
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mulhwu r4, r3, r4
@@ -109,9 +109,8 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r4, r4, 6
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
-; P9BE-NEXT:    vmrghh v2, v2, v4
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vperm v2, v2, v4, v5
 ; P9BE-NEXT:    vmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -178,41 +177,40 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P8BE-NEXT:    rldicl r6, r4, 16, 48
 ; P8BE-NEXT:    clrldi r5, r4, 48
 ; P8BE-NEXT:    clrlwi r6, r6, 16
-; P8BE-NEXT:    rldicl r8, r4, 48, 48
 ; P8BE-NEXT:    clrlwi r5, r5, 16
 ; P8BE-NEXT:    mulhwu r3, r6, r3
+; P8BE-NEXT:    rldicl r8, r4, 48, 48
+; P8BE-NEXT:    mulhwu r7, r5, r7
 ; P8BE-NEXT:    rldicl r4, r4, 32, 48
 ; P8BE-NEXT:    clrlwi r8, r8, 16
-; P8BE-NEXT:    mulhwu r7, r5, r7
 ; P8BE-NEXT:    rlwinm r11, r4, 30, 18, 31
-; P8BE-NEXT:    clrlwi r4, r4, 16
 ; P8BE-NEXT:    mulhwu r9, r8, r9
+; P8BE-NEXT:    clrlwi r4, r4, 16
 ; P8BE-NEXT:    mulhwu r10, r11, r10
 ; P8BE-NEXT:    sub r11, r6, r3
-; P8BE-NEXT:    srwi r11, r11, 1
 ; P8BE-NEXT:    srwi r7, r7, 8
+; P8BE-NEXT:    srwi r11, r11, 1
 ; P8BE-NEXT:    add r3, r11, r3
-; P8BE-NEXT:    srwi r9, r9, 5
-; P8BE-NEXT:    srwi r10, r10, 2
 ; P8BE-NEXT:    mulli r7, r7, 1003
+; P8BE-NEXT:    srwi r9, r9, 5
 ; P8BE-NEXT:    srwi r3, r3, 6
+; P8BE-NEXT:    srwi r10, r10, 2
 ; P8BE-NEXT:    mulli r9, r9, 98
 ; P8BE-NEXT:    mulli r3, r3, 95
 ; P8BE-NEXT:    mulli r10, r10, 124
 ; P8BE-NEXT:    sub r5, r5, r7
-; P8BE-NEXT:    sub r7, r8, r9
-; P8BE-NEXT:    sldi r5, r5, 48
+; P8BE-NEXT:    addis r7, r2, .LCPI0_0 at toc@ha
+; P8BE-NEXT:    mtvsrwz v2, r5
+; P8BE-NEXT:    addi r5, r7, .LCPI0_0 at toc@l
+; P8BE-NEXT:    sub r8, r8, r9
+; P8BE-NEXT:    lxvw4x v3, 0, r5
 ; P8BE-NEXT:    sub r3, r6, r3
 ; P8BE-NEXT:    sub r4, r4, r10
-; P8BE-NEXT:    mtvsrd v2, r5
-; P8BE-NEXT:    sldi r5, r7, 48
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    sldi r4, r4, 48
-; P8BE-NEXT:    mtvsrd v3, r5
-; P8BE-NEXT:    mtvsrd v4, r3
-; P8BE-NEXT:    mtvsrd v5, r4
-; P8BE-NEXT:    vmrghh v2, v3, v2
-; P8BE-NEXT:    vmrghh v3, v4, v5
+; P8BE-NEXT:    mtvsrwz v4, r8
+; P8BE-NEXT:    mtvsrwz v5, r3
+; P8BE-NEXT:    mtvsrwz v0, r4
+; P8BE-NEXT:    vperm v2, v4, v2, v3
+; P8BE-NEXT:    vperm v3, v5, v0, v3
 ; P8BE-NEXT:    vmrghw v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
@@ -287,8 +285,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r5, r5, 6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
@@ -299,12 +296,14 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r5, r5, 6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    clrlwi r3, r3, 16
+; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mulhwu r5, r3, r4
 ; P9BE-NEXT:    sub r6, r3, r5
 ; P9BE-NEXT:    srwi r6, r6, 1
@@ -312,8 +311,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r5, r5, 6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
@@ -324,9 +322,8 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r4, r4, 6
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
-; P9BE-NEXT:    vmrghh v2, v2, v4
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vperm v2, v2, v4, v5
 ; P9BE-NEXT:    vmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -411,27 +408,26 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P8BE-NEXT:    add r10, r11, r10
 ; P8BE-NEXT:    srwi r11, r12, 1
 ; P8BE-NEXT:    srwi r9, r9, 6
+; P8BE-NEXT:    mulli r8, r8, 95
 ; P8BE-NEXT:    add r3, r11, r3
 ; P8BE-NEXT:    srwi r10, r10, 6
 ; P8BE-NEXT:    srwi r3, r3, 6
-; P8BE-NEXT:    mulli r8, r8, 95
 ; P8BE-NEXT:    mulli r9, r9, 95
 ; P8BE-NEXT:    mulli r10, r10, 95
 ; P8BE-NEXT:    mulli r3, r3, 95
 ; P8BE-NEXT:    sub r5, r5, r8
+; P8BE-NEXT:    addis r8, r2, .LCPI1_0 at toc@ha
+; P8BE-NEXT:    mtvsrwz v2, r5
+; P8BE-NEXT:    addi r5, r8, .LCPI1_0 at toc@l
 ; P8BE-NEXT:    sub r6, r6, r9
-; P8BE-NEXT:    sub r7, r7, r10
+; P8BE-NEXT:    lxvw4x v3, 0, r5
+; P8BE-NEXT:    sub r5, r7, r10
 ; P8BE-NEXT:    sub r3, r4, r3
-; P8BE-NEXT:    sldi r5, r5, 48
-; P8BE-NEXT:    sldi r6, r6, 48
-; P8BE-NEXT:    sldi r4, r7, 48
-; P8BE-NEXT:    mtvsrd v2, r5
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    mtvsrd v3, r6
-; P8BE-NEXT:    mtvsrd v4, r4
-; P8BE-NEXT:    mtvsrd v5, r3
-; P8BE-NEXT:    vmrghh v2, v3, v2
-; P8BE-NEXT:    vmrghh v3, v5, v4
+; P8BE-NEXT:    mtvsrwz v4, r6
+; P8BE-NEXT:    mtvsrwz v5, r5
+; P8BE-NEXT:    mtvsrwz v0, r3
+; P8BE-NEXT:    vperm v2, v4, v2, v3
+; P8BE-NEXT:    vperm v3, v0, v5, v3
 ; P8BE-NEXT:    vmrghw v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -516,8 +512,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r4, r4, 6
 ; P9BE-NEXT:    mulli r6, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r6
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r6, r3, 16
@@ -528,12 +523,14 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r6, r6, 6
 ; P9BE-NEXT:    mulli r7, r6, 95
 ; P9BE-NEXT:    sub r3, r3, r7
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    clrlwi r7, r3, 16
+; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mulhwu r8, r7, r5
 ; P9BE-NEXT:    sub r7, r7, r8
 ; P9BE-NEXT:    srwi r7, r7, 1
@@ -541,8 +538,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r7, r7, 6
 ; P9BE-NEXT:    mulli r8, r7, 95
 ; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
@@ -552,21 +548,16 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    add r5, r8, r5
 ; P9BE-NEXT:    srwi r5, r5, 6
 ; P9BE-NEXT:    mulli r8, r5, 95
+; P9BE-NEXT:    mtvsrwz v0, r5
 ; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
-; P9BE-NEXT:    sldi r3, r4, 48
-; P9BE-NEXT:    vmrghh v2, v2, v4
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vperm v2, v2, v4, v5
+; P9BE-NEXT:    mtvsrwz v4, r6
 ; P9BE-NEXT:    vmrghw v2, v2, v3
-; P9BE-NEXT:    mtvsrd v3, r3
-; P9BE-NEXT:    sldi r3, r6, 48
-; P9BE-NEXT:    mtvsrd v4, r3
-; P9BE-NEXT:    sldi r3, r7, 48
-; P9BE-NEXT:    vmrghh v3, v4, v3
-; P9BE-NEXT:    mtvsrd v4, r3
-; P9BE-NEXT:    sldi r3, r5, 48
-; P9BE-NEXT:    mtvsrd v5, r3
-; P9BE-NEXT:    vmrghh v4, v5, v4
+; P9BE-NEXT:    mtvsrwz v3, r4
+; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    mtvsrwz v4, r7
+; P9BE-NEXT:    vperm v4, v0, v4, v5
 ; P9BE-NEXT:    vmrghw v3, v4, v3
 ; P9BE-NEXT:    vadduhm v2, v2, v3
 ; P9BE-NEXT:    blr
@@ -634,68 +625,63 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ;
 ; P8BE-LABEL: combine_urem_udiv:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r5, v2
-; P8BE-NEXT:    lis r4, 22765
-; P8BE-NEXT:    ori r4, r4, 8969
-; P8BE-NEXT:    clrldi r3, r5, 48
-; P8BE-NEXT:    rldicl r6, r5, 48, 48
-; P8BE-NEXT:    clrlwi r8, r3, 16
-; P8BE-NEXT:    rldicl r7, r5, 32, 48
+; P8BE-NEXT:    mfvsrd r4, v2
+; P8BE-NEXT:    lis r3, 22765
+; P8BE-NEXT:    ori r3, r3, 8969
+; P8BE-NEXT:    clrldi r5, r4, 48
+; P8BE-NEXT:    rldicl r6, r4, 48, 48
+; P8BE-NEXT:    clrlwi r8, r5, 16
 ; P8BE-NEXT:    clrlwi r9, r6, 16
-; P8BE-NEXT:    rldicl r5, r5, 16, 48
-; P8BE-NEXT:    mulhwu r10, r8, r4
+; P8BE-NEXT:    rldicl r7, r4, 32, 48
+; P8BE-NEXT:    rldicl r4, r4, 16, 48
+; P8BE-NEXT:    mulhwu r10, r8, r3
+; P8BE-NEXT:    mulhwu r12, r9, r3
 ; P8BE-NEXT:    clrlwi r11, r7, 16
-; P8BE-NEXT:    mulhwu r12, r9, r4
-; P8BE-NEXT:    clrlwi r5, r5, 16
-; P8BE-NEXT:    mulhwu r0, r11, r4
-; P8BE-NEXT:    mulhwu r4, r5, r4
+; P8BE-NEXT:    clrlwi r4, r4, 16
+; P8BE-NEXT:    mulhwu r0, r11, r3
+; P8BE-NEXT:    mulhwu r3, r4, r3
 ; P8BE-NEXT:    sub r8, r8, r10
 ; P8BE-NEXT:    sub r9, r9, r12
 ; P8BE-NEXT:    srwi r8, r8, 1
-; P8BE-NEXT:    add r8, r8, r10
-; P8BE-NEXT:    sub r10, r11, r0
 ; P8BE-NEXT:    srwi r9, r9, 1
-; P8BE-NEXT:    sub r11, r5, r4
+; P8BE-NEXT:    sub r11, r11, r0
+; P8BE-NEXT:    add r8, r8, r10
 ; P8BE-NEXT:    add r9, r9, r12
-; P8BE-NEXT:    srwi r8, r8, 6
+; P8BE-NEXT:    sub r12, r4, r3
+; P8BE-NEXT:    addis r10, r2, .LCPI2_0 at toc@ha
 ; P8BE-NEXT:    srwi r11, r11, 1
-; P8BE-NEXT:    srwi r10, r10, 1
+; P8BE-NEXT:    srwi r8, r8, 6
+; P8BE-NEXT:    srwi r12, r12, 1
 ; P8BE-NEXT:    srwi r9, r9, 6
-; P8BE-NEXT:    mulli r12, r8, 95
-; P8BE-NEXT:    add r4, r11, r4
-; P8BE-NEXT:    add r10, r10, r0
-; P8BE-NEXT:    mulli r11, r9, 95
-; P8BE-NEXT:    srwi r4, r4, 6
-; P8BE-NEXT:    srwi r10, r10, 6
-; P8BE-NEXT:    sldi r9, r9, 48
-; P8BE-NEXT:    sldi r8, r8, 48
-; P8BE-NEXT:    mtvsrd v3, r9
-; P8BE-NEXT:    mulli r9, r4, 95
-; P8BE-NEXT:    mtvsrd v2, r8
-; P8BE-NEXT:    mulli r8, r10, 95
-; P8BE-NEXT:    sub r3, r3, r12
-; P8BE-NEXT:    sub r6, r6, r11
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    vmrghh v2, v3, v2
-; P8BE-NEXT:    sldi r6, r6, 48
-; P8BE-NEXT:    sldi r10, r10, 48
-; P8BE-NEXT:    mtvsrd v3, r3
-; P8BE-NEXT:    sub r3, r5, r9
-; P8BE-NEXT:    sub r7, r7, r8
-; P8BE-NEXT:    mtvsrd v5, r6
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    sldi r5, r7, 48
-; P8BE-NEXT:    mtvsrd v1, r3
-; P8BE-NEXT:    sldi r3, r4, 48
-; P8BE-NEXT:    mtvsrd v4, r10
-; P8BE-NEXT:    mtvsrd v0, r5
-; P8BE-NEXT:    vmrghh v3, v5, v3
-; P8BE-NEXT:    mtvsrd v5, r3
-; P8BE-NEXT:    vmrghh v0, v1, v0
-; P8BE-NEXT:    vmrghh v4, v5, v4
-; P8BE-NEXT:    vmrghw v3, v0, v3
-; P8BE-NEXT:    vmrghw v2, v4, v2
-; P8BE-NEXT:    vadduhm v2, v3, v2
+; P8BE-NEXT:    addi r10, r10, .LCPI2_0 at toc@l
+; P8BE-NEXT:    add r11, r11, r0
+; P8BE-NEXT:    mulli r0, r8, 95
+; P8BE-NEXT:    add r3, r12, r3
+; P8BE-NEXT:    mtvsrwz v3, r8
+; P8BE-NEXT:    lxvw4x v2, 0, r10
+; P8BE-NEXT:    srwi r10, r11, 6
+; P8BE-NEXT:    mulli r8, r9, 95
+; P8BE-NEXT:    srwi r3, r3, 6
+; P8BE-NEXT:    mtvsrwz v4, r9
+; P8BE-NEXT:    mulli r9, r10, 95
+; P8BE-NEXT:    mtvsrwz v5, r10
+; P8BE-NEXT:    mulli r10, r3, 95
+; P8BE-NEXT:    vperm v3, v4, v3, v2
+; P8BE-NEXT:    sub r5, r5, r0
+; P8BE-NEXT:    sub r6, r6, r8
+; P8BE-NEXT:    mtvsrwz v4, r5
+; P8BE-NEXT:    mtvsrwz v0, r6
+; P8BE-NEXT:    sub r5, r7, r9
+; P8BE-NEXT:    sub r4, r4, r10
+; P8BE-NEXT:    mtvsrwz v1, r5
+; P8BE-NEXT:    mtvsrwz v6, r4
+; P8BE-NEXT:    vperm v4, v0, v4, v2
+; P8BE-NEXT:    mtvsrwz v0, r3
+; P8BE-NEXT:    vperm v1, v6, v1, v2
+; P8BE-NEXT:    vperm v2, v0, v5, v2
+; P8BE-NEXT:    vmrghw v4, v1, v4
+; P8BE-NEXT:    vmrghw v2, v2, v3
+; P8BE-NEXT:    vadduhm v2, v4, v2
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -744,17 +730,18 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 8969
 ; P9BE-NEXT:    clrlwi r3, r3, 27
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 26
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    clrlwi r3, r3, 16
+; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    sub r5, r3, r4
 ; P9BE-NEXT:    srwi r5, r5, 1
@@ -762,14 +749,12 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r4, r4, 6
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 29
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
-; P9BE-NEXT:    vmrghh v2, v2, v4
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vperm v2, v2, v4, v5
 ; P9BE-NEXT:    vmrghw v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
@@ -807,32 +792,31 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    mfvsrd r4, v2
 ; P8BE-NEXT:    lis r3, 22765
+; P8BE-NEXT:    addis r7, r2, .LCPI3_0 at toc@ha
 ; P8BE-NEXT:    ori r3, r3, 8969
 ; P8BE-NEXT:    clrldi r5, r4, 48
-; P8BE-NEXT:    rldicl r7, r4, 16, 48
+; P8BE-NEXT:    rldicl r8, r4, 16, 48
 ; P8BE-NEXT:    clrlwi r5, r5, 16
-; P8BE-NEXT:    clrlwi r7, r7, 26
 ; P8BE-NEXT:    mulhwu r3, r5, r3
 ; P8BE-NEXT:    sub r6, r5, r3
 ; P8BE-NEXT:    srwi r6, r6, 1
 ; P8BE-NEXT:    add r3, r6, r3
 ; P8BE-NEXT:    rldicl r6, r4, 32, 48
 ; P8BE-NEXT:    srwi r3, r3, 6
-; P8BE-NEXT:    rldicl r4, r4, 48, 48
 ; P8BE-NEXT:    clrlwi r6, r6, 27
 ; P8BE-NEXT:    mulli r3, r3, 95
-; P8BE-NEXT:    sldi r6, r6, 48
+; P8BE-NEXT:    mtvsrwz v2, r6
+; P8BE-NEXT:    addi r6, r7, .LCPI3_0 at toc@l
+; P8BE-NEXT:    rldicl r4, r4, 48, 48
+; P8BE-NEXT:    clrlwi r7, r8, 26
+; P8BE-NEXT:    lxvw4x v3, 0, r6
 ; P8BE-NEXT:    clrlwi r4, r4, 29
-; P8BE-NEXT:    mtvsrd v2, r6
-; P8BE-NEXT:    sldi r6, r7, 48
-; P8BE-NEXT:    sldi r4, r4, 48
-; P8BE-NEXT:    mtvsrd v3, r6
-; P8BE-NEXT:    mtvsrd v5, r4
+; P8BE-NEXT:    mtvsrwz v4, r7
+; P8BE-NEXT:    mtvsrwz v0, r4
 ; P8BE-NEXT:    sub r3, r5, r3
-; P8BE-NEXT:    vmrghh v2, v3, v2
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    mtvsrd v4, r3
-; P8BE-NEXT:    vmrghh v3, v5, v4
+; P8BE-NEXT:    vperm v2, v4, v2, v3
+; P8BE-NEXT:    mtvsrwz v5, r3
+; P8BE-NEXT:    vperm v3, v0, v5, v3
 ; P8BE-NEXT:    vmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
@@ -895,33 +879,32 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 5423
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -19946
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    ori r4, r4, 17097
-; P9BE-NEXT:    mtvsrd v3, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 4
+; P9BE-NEXT:    ori r4, r4, 17097
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    srwi r4, r4, 4
 ; P9BE-NEXT:    mulli r4, r4, 23
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; P9BE-NEXT:    lxvx v5, 0, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    clrlwi r4, r3, 16
 ; P9BE-NEXT:    rlwinm r3, r3, 31, 17, 31
+; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mulhwu r3, r3, r5
 ; P9BE-NEXT:    srwi r3, r3, 8
 ; P9BE-NEXT:    mulli r3, r3, 654
 ; P9BE-NEXT:    sub r3, r4, r3
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v2, r3
+; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    mtvsrd v4, r3
-; P9BE-NEXT:    vmrghh v2, v4, v2
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    vperm v2, v4, v2, v5
 ; P9BE-NEXT:    vmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -970,40 +953,39 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P8BE-NEXT:    lis r3, 24749
 ; P8BE-NEXT:    lis r7, -19946
 ; P8BE-NEXT:    lis r8, -14230
+; P8BE-NEXT:    li r10, 0
 ; P8BE-NEXT:    ori r3, r3, 47143
 ; P8BE-NEXT:    ori r7, r7, 17097
 ; P8BE-NEXT:    ori r8, r8, 30865
+; P8BE-NEXT:    mtvsrwz v2, r10
 ; P8BE-NEXT:    clrldi r5, r4, 48
 ; P8BE-NEXT:    rldicl r6, r4, 48, 48
-; P8BE-NEXT:    rldicl r4, r4, 32, 48
 ; P8BE-NEXT:    clrlwi r5, r5, 16
+; P8BE-NEXT:    rldicl r4, r4, 32, 48
 ; P8BE-NEXT:    clrlwi r6, r6, 16
 ; P8BE-NEXT:    mulhwu r3, r5, r3
 ; P8BE-NEXT:    rlwinm r9, r4, 31, 17, 31
-; P8BE-NEXT:    clrlwi r4, r4, 16
 ; P8BE-NEXT:    mulhwu r7, r6, r7
 ; P8BE-NEXT:    mulhwu r8, r9, r8
-; P8BE-NEXT:    li r9, 0
+; P8BE-NEXT:    addis r9, r2, .LCPI4_0 at toc@ha
 ; P8BE-NEXT:    srwi r3, r3, 11
-; P8BE-NEXT:    srwi r7, r7, 4
 ; P8BE-NEXT:    mulli r3, r3, 5423
+; P8BE-NEXT:    srwi r7, r7, 4
 ; P8BE-NEXT:    srwi r8, r8, 8
 ; P8BE-NEXT:    mulli r7, r7, 23
 ; P8BE-NEXT:    mulli r8, r8, 654
 ; P8BE-NEXT:    sub r3, r5, r3
-; P8BE-NEXT:    sldi r5, r9, 48
-; P8BE-NEXT:    mtvsrd v2, r5
+; P8BE-NEXT:    addi r5, r9, .LCPI4_0 at toc@l
+; P8BE-NEXT:    mtvsrwz v4, r3
+; P8BE-NEXT:    clrlwi r3, r4, 16
+; P8BE-NEXT:    lxvw4x v3, 0, r5
 ; P8BE-NEXT:    sub r5, r6, r7
-; P8BE-NEXT:    sldi r3, r3, 48
-; P8BE-NEXT:    sub r4, r4, r8
-; P8BE-NEXT:    sldi r5, r5, 48
-; P8BE-NEXT:    mtvsrd v3, r3
-; P8BE-NEXT:    sldi r3, r4, 48
-; P8BE-NEXT:    mtvsrd v4, r5
-; P8BE-NEXT:    mtvsrd v5, r3
-; P8BE-NEXT:    vmrghh v3, v4, v3
-; P8BE-NEXT:    vmrghh v2, v2, v5
-; P8BE-NEXT:    vmrghw v2, v2, v3
+; P8BE-NEXT:    sub r3, r3, r8
+; P8BE-NEXT:    mtvsrwz v5, r5
+; P8BE-NEXT:    mtvsrwz v0, r3
+; P8BE-NEXT:    vperm v4, v5, v4, v3
+; P8BE-NEXT:    vperm v2, v2, v0, v3
+; P8BE-NEXT:    vmrghw v2, v2, v4
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
   ret <4 x i16> %1

diff  --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
index 2eee272c71b9..eb41177600d9 100644
--- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
@@ -86,7 +86,7 @@ define void @test8(<8 x double>* nocapture %Sink, <8 x i16>* nocapture readonly
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_2 at toc@ha
@@ -162,7 +162,7 @@ define void @test4(<4 x double>* nocapture %Sink, <4 x i16>* nocapture readonly
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
@@ -210,7 +210,7 @@ define void @test2(<2 x double>* nocapture %Sink, <2 x i16>* nocapture readonly
 ; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v2
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
index 6a95771a589e..c809382f305c 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
@@ -49,19 +49,20 @@ define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtfprd f0, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -130,31 +131,30 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xscvspdpn f0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    vmrghh v2, v4, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v2, v3
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -271,59 +271,54 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghh v4, v4, v5
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, <8 x float>* %0, align 32
@@ -537,117 +532,104 @@ define void @test16elt(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.resul
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs1, 16(r4)
-; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xscvspdpn f4, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd vs4, vs2
+; CHECK-BE-NEXT:    xscvspdpn f5, vs2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-BE-NEXT:    xxsldwi vs6, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    mtvsrwz v3, r5
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f5
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    mtvsrwz v4, r5
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    xscvspdpn f4, vs6
+; CHECK-BE-NEXT:    mtvsrwz v4, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v2, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f4
-; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mtvsrwz v5, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    mtvsrwz v5, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    xxsldwi vs3, vs0, vs0, 3
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs3
+; CHECK-BE-NEXT:    vperm v5, v0, v5, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    vperm v0, v0, v1, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    vmrghw v4, v0, v5
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v5, r5
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    lxv vs1, 48(r4)
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r5
-; CHECK-BE-NEXT:    mffprwz r5, f0
-; CHECK-BE-NEXT:    lxv vs0, 32(r4)
-; CHECK-BE-NEXT:    xscvspdpn f5, vs1
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v0, r5
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vmrghh v5, v5, v0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    xxmrghd vs4, v3, v2
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrwz v6, r5
+; CHECK-BE-NEXT:    vperm v1, v6, v1, v2
 ; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xxmrghd vs2, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs1, vs1, 3
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    vperm v3, v5, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    vmrghw v3, v3, v1
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r4
-; CHECK-BE-NEXT:    vmrghh v4, v4, v5
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
+; CHECK-BE-NEXT:    xxswapd vs0, vs1
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xscvspdpn f0, vs1
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs1, vs1, 1
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    mtvsrwz v0, r4
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd vs0, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -697,19 +679,20 @@ define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtfprd f0, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -778,31 +761,30 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xscvspdpn f0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    vmrghh v2, v4, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v2, v3
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -919,59 +901,54 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghh v4, v4, v5
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, <8 x float>* %0, align 32
@@ -1185,117 +1162,104 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret(<16 x i16>) %ag
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs1, 16(r4)
-; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xscvspdpn f4, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd vs4, vs2
+; CHECK-BE-NEXT:    xscvspdpn f5, vs2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-BE-NEXT:    xxsldwi vs6, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    mtvsrwz v3, r5
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f5
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    mtvsrwz v4, r5
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    xscvspdpn f4, vs6
+; CHECK-BE-NEXT:    mtvsrwz v4, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v2, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f4
-; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mtvsrwz v5, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    mtvsrwz v5, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    xxsldwi vs3, vs0, vs0, 3
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs3
+; CHECK-BE-NEXT:    vperm v5, v0, v5, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    vperm v0, v0, v1, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    vmrghw v4, v0, v5
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v5, r5
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    lxv vs1, 48(r4)
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r5
-; CHECK-BE-NEXT:    mffprwz r5, f0
-; CHECK-BE-NEXT:    lxv vs0, 32(r4)
-; CHECK-BE-NEXT:    xscvspdpn f5, vs1
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v0, r5
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vmrghh v5, v5, v0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    xxmrghd vs4, v3, v2
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrwz v6, r5
+; CHECK-BE-NEXT:    vperm v1, v6, v1, v2
 ; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xxmrghd vs2, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs1, vs1, 3
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    vperm v3, v5, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    vmrghw v3, v3, v1
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r4
-; CHECK-BE-NEXT:    vmrghh v4, v4, v5
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
+; CHECK-BE-NEXT:    xxswapd vs0, vs1
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xscvspdpn f0, vs1
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs1, vs1, 1
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    mtvsrwz v0, r4
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd vs0, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
index c7965d6c3e09..8786f5770847 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
@@ -54,19 +54,20 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtfprd f0, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
@@ -138,32 +139,31 @@ define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xscvspdpn f0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v4, v3
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vmrghb v2, v4, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vperm v2, v5, v2, v3
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -283,59 +283,54 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghb v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -553,113 +548,100 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghb v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vmrghb v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vperm v5, v0, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghb v4, v5, v4
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v0, r3
-; CHECK-BE-NEXT:    vmrghb v5, v5, v0
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v1, r3
+; CHECK-BE-NEXT:    vperm v2, v0, v1, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v5
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x float>, <16 x float>* %0, align 64
@@ -712,19 +694,20 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtfprd f0, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
@@ -796,32 +779,31 @@ define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xscvspdpn f0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v4, v3
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vmrghb v2, v4, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vperm v2, v5, v2, v3
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -941,59 +923,54 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghb v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -1211,113 +1188,100 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghb v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vmrghb v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vperm v5, v0, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghb v4, v5, v4
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v0, r3
-; CHECK-BE-NEXT:    vmrghb v5, v5, v0
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v1, r3
+; CHECK-BE-NEXT:    vperm v2, v0, v1, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v5
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x float>, <16 x float>* %0, align 64

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
index 26db909198d5..2786c2fbe7d8 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
@@ -41,16 +41,17 @@ define i32 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -114,27 +115,26 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -237,52 +237,47 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghh v4, v4, v5
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, <8 x double>* %0, align 64
@@ -468,101 +463,88 @@ define void @test16elt(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.resul
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs4, 48(r4)
-; CHECK-BE-NEXT:    lxv vs3, 32(r4)
-; CHECK-BE-NEXT:    lxv vs2, 16(r4)
-; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f5, f4
-; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f6, f3
+; CHECK-BE-NEXT:    lxv vs3, 48(r4)
+; CHECK-BE-NEXT:    lxv vs2, 32(r4)
+; CHECK-BE-NEXT:    lxv vs1, 16(r4)
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xscvdpsxws f5, f2
+; CHECK-BE-NEXT:    xscvdpsxws f6, f1
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f7, f2
-; CHECK-BE-NEXT:    lxv vs0, 112(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f7, f0
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r5, f5
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r5
-; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f6
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f7
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    mtvsrd v4, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r5
+; CHECK-BE-NEXT:    mtvsrwz v3, r5
+; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    mtvsrwz v4, r5
+; CHECK-BE-NEXT:    mffprwz r5, f6
+; CHECK-BE-NEXT:    mtvsrwz v5, r5
+; CHECK-BE-NEXT:    mffprwz r5, f7
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v0, r5
+; CHECK-BE-NEXT:    lxv vs3, 112(r4)
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    lxv vs2, 96(r4)
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v1, r5
+; CHECK-BE-NEXT:    vperm v3, v3, v1, v2
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    lxv vs1, 80(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    vperm v4, v4, v1, v2
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
+; CHECK-BE-NEXT:    mffprwz r5, f0
+; CHECK-BE-NEXT:    lxv vs0, 64(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vperm v5, v5, v1, v2
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    vperm v0, v0, v1, v2
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    vmrghw v5, v0, v5
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mffprwz r4, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    vmrghh v4, v4, v1
-; CHECK-BE-NEXT:    mtvsrd v1, r5
+; CHECK-BE-NEXT:    xxmrghd vs4, v5, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r5, f0
-; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    vmrghh v5, v5, v1
-; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtvsrd v1, r5
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    vmrghh v0, v0, v1
-; CHECK-BE-NEXT:    xxmrghd vs3, v3, v2
-; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    stxv vs3, 0(r3)
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    vmrghw v2, v2, v0
-; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r4
-; CHECK-BE-NEXT:    vmrghh v4, v4, v5
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r4
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd vs0, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -604,16 +586,17 @@ define i32 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -677,27 +660,26 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -800,52 +782,47 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghh v4, v4, v5
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, <8 x double>* %0, align 64
@@ -1031,101 +1008,88 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret(<16 x i16>) %ag
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs4, 48(r4)
-; CHECK-BE-NEXT:    lxv vs3, 32(r4)
-; CHECK-BE-NEXT:    lxv vs2, 16(r4)
-; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f5, f4
-; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f6, f3
+; CHECK-BE-NEXT:    lxv vs3, 48(r4)
+; CHECK-BE-NEXT:    lxv vs2, 32(r4)
+; CHECK-BE-NEXT:    lxv vs1, 16(r4)
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xscvdpsxws f5, f2
+; CHECK-BE-NEXT:    xscvdpsxws f6, f1
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f7, f2
-; CHECK-BE-NEXT:    lxv vs0, 112(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f7, f0
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r5, f5
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r5
-; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f6
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f7
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    mtvsrd v4, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r5
+; CHECK-BE-NEXT:    mtvsrwz v3, r5
+; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    mtvsrwz v4, r5
+; CHECK-BE-NEXT:    mffprwz r5, f6
+; CHECK-BE-NEXT:    mtvsrwz v5, r5
+; CHECK-BE-NEXT:    mffprwz r5, f7
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v0, r5
+; CHECK-BE-NEXT:    lxv vs3, 112(r4)
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    lxv vs2, 96(r4)
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v1, r5
+; CHECK-BE-NEXT:    vperm v3, v3, v1, v2
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    lxv vs1, 80(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    vperm v4, v4, v1, v2
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
+; CHECK-BE-NEXT:    mffprwz r5, f0
+; CHECK-BE-NEXT:    lxv vs0, 64(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vperm v5, v5, v1, v2
+; CHECK-BE-NEXT:    mtvsrwz v1, r5
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    vperm v0, v0, v1, v2
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    vmrghw v5, v0, v5
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mffprwz r4, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    vmrghh v4, v4, v1
-; CHECK-BE-NEXT:    mtvsrd v1, r5
+; CHECK-BE-NEXT:    xxmrghd vs4, v5, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r5, f0
-; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    vmrghh v5, v5, v1
-; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtvsrd v1, r5
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    vmrghh v0, v0, v1
-; CHECK-BE-NEXT:    xxmrghd vs3, v3, v2
-; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    stxv vs3, 0(r3)
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    vmrghw v2, v2, v0
-; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v5, r4
-; CHECK-BE-NEXT:    vmrghh v4, v4, v5
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r4
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd vs0, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
index 887cee610b8b..caf483e45b60 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -29,10 +29,10 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xscvdpuxws f0, v2
 ; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    mtvsrwz v3, r3
 ; CHECK-P9-NEXT:    xscvdpuxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
@@ -42,11 +42,11 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    xscvdpuxws f0, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpuxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -321,10 +321,10 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xscvdpsxws f0, v2
 ; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    mtvsrwz v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
@@ -334,11 +334,11 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    xscvdpsxws f0, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    vmrgow v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
index 603572b19e1b..da07d1bce1c2 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
@@ -46,16 +46,17 @@ define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    vmrghb v2, v3, v2
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
@@ -122,28 +123,27 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -249,52 +249,47 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -481,100 +476,87 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs7, 112(r3)
-; CHECK-BE-NEXT:    lxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    xscvdpsxws f8, f7
 ; CHECK-BE-NEXT:    xxswapd vs7, vs7
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs4, 64(r3)
 ; CHECK-BE-NEXT:    lxv vs5, 80(r3)
+; CHECK-BE-NEXT:    lxv vs6, 96(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f7
 ; CHECK-BE-NEXT:    mffprwz r3, f8
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f7
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f6
 ; CHECK-BE-NEXT:    xxswapd vs6, vs6
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f7
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f6
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f5
 ; CHECK-BE-NEXT:    xxswapd vs5, vs5
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f6
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
 ; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v0, r3
-; CHECK-BE-NEXT:    vmrghb v5, v5, v0
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v1, r3
+; CHECK-BE-NEXT:    vperm v2, v0, v1, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v5
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, <16 x double>* %0, align 128
@@ -619,16 +601,17 @@ define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    vmrghb v2, v3, v2
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
@@ -695,28 +678,27 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -822,52 +804,47 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -1054,100 +1031,87 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs7, 112(r3)
-; CHECK-BE-NEXT:    lxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    xscvdpsxws f8, f7
 ; CHECK-BE-NEXT:    xxswapd vs7, vs7
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs4, 64(r3)
 ; CHECK-BE-NEXT:    lxv vs5, 80(r3)
+; CHECK-BE-NEXT:    lxv vs6, 96(r3)
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f7
 ; CHECK-BE-NEXT:    mffprwz r3, f8
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f7
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f6
 ; CHECK-BE-NEXT:    xxswapd vs6, vs6
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f7
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f6
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f5
 ; CHECK-BE-NEXT:    xxswapd vs5, vs5
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f6
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
 ; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mtvsrd v0, r3
-; CHECK-BE-NEXT:    vmrghb v5, v5, v0
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v1, r3
+; CHECK-BE-NEXT:    vperm v2, v0, v1, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v5
+; CHECK-BE-NEXT:    vmrghw v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, <16 x double>* %0, align 128

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index ee03a5edf13e..e73fa39dea34 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -22,10 +22,8 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprwz f1, r3
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xscvdpspn vs0, f0
-; CHECK-P8-NEXT:    xscvdpspn vs1, f1
-; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P8-NEXT:    xscvdpspn v2, f0
+; CHECK-P8-NEXT:    xscvdpspn v3, f1
 ; CHECK-P8-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -42,12 +40,10 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
 ; CHECK-P9-NEXT:    clrlwi r3, r3, 16
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v3, f0
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
@@ -67,7 +63,7 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-NEXT:    xscvuxdsp f0, f0
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    vmrgow v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -101,7 +97,7 @@ define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3
-; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -136,19 +132,13 @@ define void @test8elt(<8 x float>* noalias nocapture sret(<8 x float>) %agg.resu
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
-; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
-; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xxlxor v3, v3, v3
+; CHECK-BE-NEXT:    vmrghh v4, v3, v2
+; CHECK-BE-NEXT:    vmrglh v2, v3, v2
+; CHECK-BE-NEXT:    xvcvuxwsp vs0, v4
 ; CHECK-BE-NEXT:    xvcvuxwsp vs1, v2
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = uitofp <8 x i16> %a to <8 x float>
@@ -218,21 +208,18 @@ define void @test16elt(<16 x float>* noalias nocapture sret(<16 x float>) %agg.r
 ; CHECK-BE-NEXT:    xxlxor v5, v5, v5
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v4, 0, r4
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
-; CHECK-BE-NEXT:    vperm v0, v3, v5, v4
+; CHECK-BE-NEXT:    vperm v0, v5, v3, v4
+; CHECK-BE-NEXT:    vperm v4, v5, v2, v4
+; CHECK-BE-NEXT:    vmrglh v3, v5, v3
+; CHECK-BE-NEXT:    vmrglh v2, v5, v2
 ; CHECK-BE-NEXT:    xvcvuxwsp vs0, v0
-; CHECK-BE-NEXT:    lxvx v0, 0, r4
-; CHECK-BE-NEXT:    vperm v3, v5, v3, v0
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    xvcvuxwsp vs1, v3
-; CHECK-BE-NEXT:    vperm v3, v2, v5, v4
-; CHECK-BE-NEXT:    vperm v2, v5, v2, v0
+; CHECK-BE-NEXT:    xvcvuxwsp vs1, v4
+; CHECK-BE-NEXT:    stxv vs1, 32(r3)
 ; CHECK-BE-NEXT:    xvcvuxwsp vs2, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp vs3, v2
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i16>, <16 x i16>* %0, align 32
@@ -254,10 +241,8 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprwa f1, r3
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xscvdpspn vs0, f0
-; CHECK-P8-NEXT:    xscvdpspn vs1, f1
-; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P8-NEXT:    xscvdpspn v2, f0
+; CHECK-P8-NEXT:    xscvdpspn v3, f1
 ; CHECK-P8-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -274,12 +259,10 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
 ; CHECK-P9-NEXT:    extsh r3, r3
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v3, f0
 ; CHECK-P9-NEXT:    mtfprwa f0, r3
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
@@ -299,7 +282,7 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    mtfprwa f0, r3
 ; CHECK-BE-NEXT:    xscvsxdsp f0, f0
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    vmrgow v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -374,18 +357,15 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret(<8 x float>) %a
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    xxlxor v3, v3, v3
+; CHECK-BE-NEXT:    vmrglh v3, v3, v2
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v2
 ; CHECK-BE-NEXT:    vextsh2w v3, v3
 ; CHECK-BE-NEXT:    vextsh2w v2, v2
 ; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
 ; CHECK-BE-NEXT:    xvcvsxwsp vs1, v2
-; CHECK-BE-NEXT:    stxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = sitofp <8 x i16> %a to <8 x float>
@@ -451,26 +431,23 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>)
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v2, 16(r4)
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v4, 0, r4
-; CHECK-BE-NEXT:    vperm v0, v5, v3, v4
-; CHECK-BE-NEXT:    vperm v4, v5, v2, v4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    vmrglh v5, v4, v3
+; CHECK-BE-NEXT:    vmrglh v4, v4, v2
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v2
-; CHECK-BE-NEXT:    vextsh2w v0, v0
+; CHECK-BE-NEXT:    vextsh2w v5, v5
 ; CHECK-BE-NEXT:    vextsh2w v4, v4
 ; CHECK-BE-NEXT:    vextsh2w v3, v3
 ; CHECK-BE-NEXT:    vextsh2w v2, v2
-; CHECK-BE-NEXT:    xvcvsxwsp vs0, v0
+; CHECK-BE-NEXT:    xvcvsxwsp vs0, v5
 ; CHECK-BE-NEXT:    xvcvsxwsp vs1, v4
 ; CHECK-BE-NEXT:    xvcvsxwsp vs2, v3
 ; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2
-; CHECK-BE-NEXT:    stxv vs3, 32(r3)
-; CHECK-BE-NEXT:    stxv vs2, 0(r3)
 ; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i16>, <16 x i16>* %0, align 32

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index 99a103103096..c4fdb613d05a 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -23,7 +23,7 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
@@ -34,12 +34,12 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3
-; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxddp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -97,7 +97,7 @@ define void @test4elt(<4 x double>* noalias nocapture sret(<4 x double>) %agg.re
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
@@ -186,7 +186,7 @@ define void @test8elt(<8 x double>* noalias nocapture sret(<8 x double>) %agg.re
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_2 at toc@ha
@@ -324,7 +324,7 @@ define void @test16elt(<16 x double>* noalias nocapture sret(<16 x double>) %agg
 ; CHECK-BE-NEXT:    lxvx v4, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
-; CHECK-BE-NEXT:    vperm v0, v3, v5, v4
+; CHECK-BE-NEXT:    vperm v0, v5, v3, v4
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v0
 ; CHECK-BE-NEXT:    lxvx v0, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
@@ -342,7 +342,7 @@ define void @test16elt(<16 x double>* noalias nocapture sret(<16 x double>) %agg
 ; CHECK-BE-NEXT:    vperm v3, v5, v3, v6
 ; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    xvcvuxddp vs3, v3
-; CHECK-BE-NEXT:    vperm v3, v2, v5, v4
+; CHECK-BE-NEXT:    vperm v3, v5, v2, v4
 ; CHECK-BE-NEXT:    xvcvuxddp vs4, v3
 ; CHECK-BE-NEXT:    vperm v3, v5, v2, v0
 ; CHECK-BE-NEXT:    xvcvuxddp vs5, v3
@@ -382,7 +382,7 @@ define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r3
@@ -393,7 +393,7 @@ define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
index 8dd041aa25dc..6a490737c710 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
@@ -16,10 +16,8 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxlor vs1, v2, v2
 ; CHECK-P8-NEXT:    xscvuxdsp f1, f1
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
-; CHECK-P8-NEXT:    xscvdpspn vs1, f1
-; CHECK-P8-NEXT:    xscvdpspn vs0, f0
-; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    xscvdpspn v3, f1
+; CHECK-P8-NEXT:    xscvdpspn v2, f0
 ; CHECK-P8-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -29,12 +27,10 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v3, f0
 ; CHECK-P9-NEXT:    xxlor vs0, v2, v2
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
@@ -47,7 +43,7 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    xscvuxdsp f0, f0
 ; CHECK-BE-NEXT:    xscvdpspn v2, f1
 ; CHECK-BE-NEXT:    xscvdpspn v3, f0
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    vmrgow v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -309,10 +305,8 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxlor vs1, v2, v2
 ; CHECK-P8-NEXT:    xscvsxdsp f1, f1
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
-; CHECK-P8-NEXT:    xscvdpspn vs1, f1
-; CHECK-P8-NEXT:    xscvdpspn vs0, f0
-; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    xscvdpspn v3, f1
+; CHECK-P8-NEXT:    xscvdpspn v2, f0
 ; CHECK-P8-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -322,12 +316,10 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v3, f0
 ; CHECK-P9-NEXT:    xxlor vs0, v2, v2
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
@@ -340,7 +332,7 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    xscvsxdsp f0, f0
 ; CHECK-BE-NEXT:    xscvdpspn v2, f1
 ; CHECK-BE-NEXT:    xscvdpspn v3, f0
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    vmrgow v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index 6f0f6fd26ed0..f9ab5e1f60bf 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -22,10 +22,8 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprwz f1, r3
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xscvdpspn vs0, f0
-; CHECK-P8-NEXT:    xscvdpspn vs1, f1
-; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P8-NEXT:    xscvdpspn v2, f0
+; CHECK-P8-NEXT:    xscvdpspn v3, f1
 ; CHECK-P8-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -42,12 +40,10 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P9-NEXT:    vextubrx r3, r3, v2
 ; CHECK-P9-NEXT:    clrlwi r3, r3, 24
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v3, f0
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
@@ -67,7 +63,7 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-NEXT:    xscvuxdsp f0, f0
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    vmrgow v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -91,7 +87,7 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
@@ -102,12 +98,12 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3
-; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -163,7 +159,7 @@ define void @test8elt(<8 x float>* noalias nocapture sret(<8 x float>) %agg.resu
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
@@ -248,7 +244,7 @@ define void @test16elt(<16 x float>* noalias nocapture sret(<16 x float>) %agg.r
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
@@ -287,10 +283,8 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprwa f1, r3
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xscvdpspn vs0, f0
-; CHECK-P8-NEXT:    xscvdpspn vs1, f1
-; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P8-NEXT:    xscvdpspn v2, f0
+; CHECK-P8-NEXT:    xscvdpspn v3, f1
 ; CHECK-P8-NEXT:    vmrghw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -307,12 +301,10 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P9-NEXT:    vextubrx r3, r3, v2
 ; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v3, f0
 ; CHECK-P9-NEXT:    mtfprwa f0, r3
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
@@ -332,7 +324,7 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    mtfprwa f0, r3
 ; CHECK-BE-NEXT:    xscvsxdsp f0, f0
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    vmrgow v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -359,7 +351,7 @@ define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r3
@@ -370,7 +362,7 @@ define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index 9e4014c8a2db..60cb5877f2b8 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -23,7 +23,7 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
@@ -34,12 +34,12 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3
-; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxddp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -72,7 +72,7 @@ define void @test4elt(<4 x double>* noalias nocapture sret(<4 x double>) %agg.re
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r4
+; CHECK-P9-NEXT:    mtvsrwz v2, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
 ; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
@@ -90,14 +90,14 @@ define void @test4elt(<4 x double>* noalias nocapture sret(<4 x double>) %agg.re
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r4
+; CHECK-BE-NEXT:    mtvsrwz v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
 ; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
@@ -189,7 +189,7 @@ define void @test8elt(<8 x double>* noalias nocapture sret(<8 x double>) %agg.re
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_2 at toc@ha
@@ -346,7 +346,7 @@ define void @test16elt(<16 x double>* noalias nocapture sret(<16 x double>) %agg
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
@@ -415,7 +415,7 @@ define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r3
@@ -426,7 +426,7 @@ define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3
@@ -471,7 +471,7 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret(<4 x double>)
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrws v2, r4
+; CHECK-P9-NEXT:    mtvsrwz v2, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
@@ -490,7 +490,7 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret(<4 x double>)
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrws v2, r4
+; CHECK-BE-NEXT:    mtvsrwz v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0 at toc@l

diff  --git a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
index e833b2527245..47f79eee5ef7 100644
--- a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
@@ -345,37 +345,32 @@ define <8 x i16> @testInvalidExtend(<16 x i8> %a) {
 ; CHECK-BE-NEXT:    li 6, 6
 ; CHECK-BE-NEXT:    extsb 9, 9
 ; CHECK-BE-NEXT:    extsb 10, 10
-; CHECK-BE-NEXT:    extsb 7, 7
-; CHECK-BE-NEXT:    extsb 8, 8
 ; CHECK-BE-NEXT:    vextublx 3, 3, 2
 ; CHECK-BE-NEXT:    vextublx 4, 4, 2
+; CHECK-BE-NEXT:    vextublx 5, 5, 2
+; CHECK-BE-NEXT:    extsb 7, 7
+; CHECK-BE-NEXT:    extsb 8, 8
+; CHECK-BE-NEXT:    extsb 5, 5
 ; CHECK-BE-NEXT:    extsb 3, 3
 ; CHECK-BE-NEXT:    extsb 4, 4
-; CHECK-BE-NEXT:    sldi 10, 10, 48
-; CHECK-BE-NEXT:    sldi 9, 9, 48
-; CHECK-BE-NEXT:    vextublx 5, 5, 2
+; CHECK-BE-NEXT:    mtvsrwz 35, 9
+; CHECK-BE-NEXT:    addis 9, 2, .LCPI11_0 at toc@ha
 ; CHECK-BE-NEXT:    vextublx 6, 6, 2
-; CHECK-BE-NEXT:    sldi 8, 8, 48
-; CHECK-BE-NEXT:    sldi 7, 7, 48
-; CHECK-BE-NEXT:    extsb 5, 5
+; CHECK-BE-NEXT:    mtvsrwz 34, 10
+; CHECK-BE-NEXT:    mtvsrwz 37, 7
 ; CHECK-BE-NEXT:    extsb 6, 6
-; CHECK-BE-NEXT:    sldi 6, 6, 48
-; CHECK-BE-NEXT:    sldi 5, 5, 48
-; CHECK-BE-NEXT:    sldi 4, 4, 48
-; CHECK-BE-NEXT:    sldi 3, 3, 48
-; CHECK-BE-NEXT:    mtvsrd 34, 10
-; CHECK-BE-NEXT:    mtvsrd 35, 9
-; CHECK-BE-NEXT:    mtvsrd 36, 7
-; CHECK-BE-NEXT:    mtvsrd 37, 3
-; CHECK-BE-NEXT:    vmrghh 2, 3, 2
-; CHECK-BE-NEXT:    mtvsrd 35, 8
-; CHECK-BE-NEXT:    vmrghh 3, 4, 3
-; CHECK-BE-NEXT:    mtvsrd 36, 5
+; CHECK-BE-NEXT:    mtvsrwz 32, 3
+; CHECK-BE-NEXT:    addi 9, 9, .LCPI11_0 at toc@l
+; CHECK-BE-NEXT:    lxvx 36, 0, 9
+; CHECK-BE-NEXT:    vperm 2, 3, 2, 4
+; CHECK-BE-NEXT:    mtvsrwz 35, 8
+; CHECK-BE-NEXT:    vperm 3, 5, 3, 4
+; CHECK-BE-NEXT:    mtvsrwz 37, 5
 ; CHECK-BE-NEXT:    vmrghw 2, 3, 2
-; CHECK-BE-NEXT:    mtvsrd 35, 6
-; CHECK-BE-NEXT:    vmrghh 3, 4, 3
-; CHECK-BE-NEXT:    mtvsrd 36, 4
-; CHECK-BE-NEXT:    vmrghh 4, 5, 4
+; CHECK-BE-NEXT:    mtvsrwz 35, 6
+; CHECK-BE-NEXT:    vperm 3, 5, 3, 4
+; CHECK-BE-NEXT:    mtvsrwz 37, 4
+; CHECK-BE-NEXT:    vperm 4, 0, 5, 4
 ; CHECK-BE-NEXT:    vmrghw 3, 4, 3
 ; CHECK-BE-NEXT:    xxmrghd 34, 35, 34
 ; CHECK-BE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index dc543aaa6132..d8b5dfba96fd 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -79,13 +79,10 @@ define <3 x float> @constrained_vector_fdiv_v3f32() #0 {
 ; PC64LE-NEXT:    addi 3, 3, .LCPI2_4 at toc@l
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xsdivsp 0, 3, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 1, 1, 3
-; PC64LE-NEXT:    xxsldwi 35, 2, 2, 3
+; PC64LE-NEXT:    xscvdpspn 34, 1
+; PC64LE-NEXT:    xscvdpspn 35, 2
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 0
 ; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
@@ -105,13 +102,10 @@ define <3 x float> @constrained_vector_fdiv_v3f32() #0 {
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    xsdivsp 2, 2, 0
 ; PC64LE9-NEXT:    xsdivsp 0, 3, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 1
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    blr
 entry:
@@ -354,16 +348,13 @@ define <3 x float> @constrained_vector_frem_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI7_3 at toc@l(3)
 ; PC64LE-NEXT:    bl fmodf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    xscvdpspn 34, 29
 ; PC64LE-NEXT:    addis 3, 2, .LCPI7_4 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI7_4 at toc@l
 ; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 30
 ; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -400,16 +391,13 @@ define <3 x float> @constrained_vector_frem_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI7_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl fmodf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 29
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI7_4 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI7_4 at toc@l
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 29
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -695,13 +683,10 @@ define <3 x float> @constrained_vector_fmul_v3f32() #0 {
 ; PC64LE-NEXT:    addi 3, 3, .LCPI12_4 at toc@l
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xsmulsp 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 1, 1, 3
-; PC64LE-NEXT:    xxsldwi 35, 2, 2, 3
+; PC64LE-NEXT:    xscvdpspn 34, 1
+; PC64LE-NEXT:    xscvdpspn 35, 2
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 0
 ; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
@@ -721,13 +706,10 @@ define <3 x float> @constrained_vector_fmul_v3f32() #0 {
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    xsmulsp 2, 1, 2
 ; PC64LE9-NEXT:    xsmulsp 1, 1, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 1, 1, 3
-; PC64LE9-NEXT:    xscvdpspn 1, 2
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 1
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    blr
 entry:
@@ -910,13 +892,10 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 {
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xsaddsp 2, 0, 2
 ; PC64LE-NEXT:    xsaddsp 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 1, 1, 3
-; PC64LE-NEXT:    xxsldwi 35, 2, 2, 3
+; PC64LE-NEXT:    xscvdpspn 34, 1
+; PC64LE-NEXT:    xscvdpspn 35, 2
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 0
 ; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
@@ -935,13 +914,10 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 {
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    xsaddsp 2, 0, 2
 ; PC64LE9-NEXT:    xsaddsp 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 1
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    blr
 entry:
@@ -1122,13 +1098,10 @@ define <3 x float> @constrained_vector_fsub_v3f32() #0 {
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xssubsp 2, 0, 2
 ; PC64LE-NEXT:    xssubsp 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 1, 1, 3
-; PC64LE-NEXT:    xxsldwi 35, 2, 2, 3
+; PC64LE-NEXT:    xscvdpspn 34, 1
+; PC64LE-NEXT:    xscvdpspn 35, 2
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 0
 ; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
@@ -1147,13 +1120,10 @@ define <3 x float> @constrained_vector_fsub_v3f32() #0 {
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    xssubsp 2, 0, 2
 ; PC64LE9-NEXT:    xssubsp 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 2
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 1
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    blr
 entry:
@@ -1318,16 +1288,13 @@ define <3 x float> @constrained_vector_sqrt_v3f32() #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI27_3 at toc@ha
 ; PC64LE-NEXT:    xssqrtsp 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI27_3 at toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xssqrtsp 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 2
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sqrt_v3f32:
@@ -1343,12 +1310,9 @@ define <3 x float> @constrained_vector_sqrt_v3f32() #0 {
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI27_3 at toc@l
 ; PC64LE9-NEXT:    xssqrtsp 1, 1
 ; PC64LE9-NEXT:    xssqrtsp 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 3
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 3
+; PC64LE9-NEXT:    xscvdpspn 36, 0
+; PC64LE9-NEXT:    xscvdpspn 35, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 2
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
@@ -1571,16 +1535,13 @@ define <3 x float> @constrained_vector_pow_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI32_3 at toc@l(3)
 ; PC64LE-NEXT:    bl powf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    xscvdpspn 34, 29
 ; PC64LE-NEXT:    addis 3, 2, .LCPI32_4 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI32_4 at toc@l
 ; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 30
 ; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -1617,16 +1578,13 @@ define <3 x float> @constrained_vector_pow_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI32_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl powf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 29
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI32_4 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI32_4 at toc@l
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 29
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -1963,16 +1921,13 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI37_2 at toc@l(3)
 ; PC64LE-NEXT:    bl __powisf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 34, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI37_3 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI37_3 at toc@l
 ; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 31
 ; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 48
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -2005,16 +1960,13 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI37_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl __powisf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI37_3 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI37_3 at toc@l
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 31
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -2320,17 +2272,14 @@ define <3 x float> @constrained_vector_sin_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI42_2 at toc@l(3)
 ; PC64LE-NEXT:    bl sinf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 34, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI42_3 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI42_3 at toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 31
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 48
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
@@ -2359,16 +2308,13 @@ define <3 x float> @constrained_vector_sin_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI42_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl sinf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI42_3 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI42_3 at toc@l
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 31
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -2657,17 +2603,14 @@ define <3 x float> @constrained_vector_cos_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI47_2 at toc@l(3)
 ; PC64LE-NEXT:    bl cosf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 34, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI47_3 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI47_3 at toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 31
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 48
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
@@ -2696,16 +2639,13 @@ define <3 x float> @constrained_vector_cos_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI47_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl cosf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI47_3 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI47_3 at toc@l
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 31
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -2994,17 +2934,14 @@ define <3 x float> @constrained_vector_exp_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI52_2 at toc@l(3)
 ; PC64LE-NEXT:    bl expf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 34, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI52_3 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI52_3 at toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 31
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 48
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
@@ -3033,16 +2970,13 @@ define <3 x float> @constrained_vector_exp_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI52_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl expf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI52_3 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI52_3 at toc@l
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 31
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -3331,17 +3265,14 @@ define <3 x float> @constrained_vector_exp2_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI57_2 at toc@l(3)
 ; PC64LE-NEXT:    bl exp2f
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 34, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI57_3 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI57_3 at toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 31
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 48
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
@@ -3370,16 +3301,13 @@ define <3 x float> @constrained_vector_exp2_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI57_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp2f
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI57_3 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI57_3 at toc@l
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 31
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -3668,17 +3596,14 @@ define <3 x float> @constrained_vector_log_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI62_2 at toc@l(3)
 ; PC64LE-NEXT:    bl logf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 34, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI62_3 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI62_3 at toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 31
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 48
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
@@ -3707,16 +3632,13 @@ define <3 x float> @constrained_vector_log_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI62_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl logf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI62_3 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI62_3 at toc@l
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 31
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -4005,17 +3927,14 @@ define <3 x float> @constrained_vector_log10_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI67_2 at toc@l(3)
 ; PC64LE-NEXT:    bl log10f
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 34, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI67_3 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI67_3 at toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 31
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 48
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
@@ -4044,16 +3963,13 @@ define <3 x float> @constrained_vector_log10_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI67_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl log10f
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI67_3 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI67_3 at toc@l
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 31
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -4342,17 +4258,14 @@ define <3 x float> @constrained_vector_log2_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI72_2 at toc@l(3)
 ; PC64LE-NEXT:    bl log2f
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 34, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI72_3 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI72_3 at toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 31
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 48
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
@@ -4381,16 +4294,13 @@ define <3 x float> @constrained_vector_log2_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI72_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl log2f
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI72_3 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI72_3 at toc@l
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 31
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -4625,16 +4535,13 @@ define <3 x float> @constrained_vector_rint_v3f32() #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI77_3 at toc@ha
 ; PC64LE-NEXT:    xsrdpic 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI77_3 at toc@l
-; PC64LE-NEXT:    xsrdpic 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xsrdpic 0, 2
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 0
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_rint_v3f32:
@@ -4650,12 +4557,9 @@ define <3 x float> @constrained_vector_rint_v3f32() #0 {
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI77_3 at toc@l
 ; PC64LE9-NEXT:    xsrdpic 1, 1
 ; PC64LE9-NEXT:    xsrdpic 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 3
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 3
+; PC64LE9-NEXT:    xscvdpspn 36, 0
+; PC64LE9-NEXT:    xscvdpspn 35, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 2
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
@@ -4854,17 +4758,14 @@ define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 1, .LCPI82_2 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyintf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 34, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI82_3 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI82_3 at toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 31
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 48
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
@@ -4893,16 +4794,13 @@ define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 1, .LCPI82_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl nearbyintf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI82_3 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI82_3 at toc@l
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 31
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -5177,16 +5075,13 @@ define <3 x float> @constrained_vector_maxnum_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 2, .LCPI87_4 at toc@l(3)
 ; PC64LE-NEXT:    bl fmaxf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    xscvdpspn 34, 29
 ; PC64LE-NEXT:    addis 3, 2, .LCPI87_5 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI87_5 at toc@l
 ; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 30
 ; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -5224,16 +5119,13 @@ define <3 x float> @constrained_vector_maxnum_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 2, .LCPI87_4 at toc@l(3)
 ; PC64LE9-NEXT:    bl fmaxf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 29
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI87_5 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI87_5 at toc@l
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 29
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -5464,16 +5356,13 @@ define <3 x float> @constrained_vector_minnum_v3f32() #0 {
 ; PC64LE-NEXT:    lfs 2, .LCPI92_4 at toc@l(3)
 ; PC64LE-NEXT:    bl fminf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    xscvdpspn 34, 29
 ; PC64LE-NEXT:    addis 3, 2, .LCPI92_5 at toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI92_5 at toc@l
 ; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 30
 ; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -5511,16 +5400,13 @@ define <3 x float> @constrained_vector_minnum_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 2, .LCPI92_4 at toc@l(3)
 ; PC64LE9-NEXT:    bl fminf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 34, 1
+; PC64LE9-NEXT:    xscvdpspn 35, 29
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI92_5 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI92_5 at toc@l
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 29
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 30
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -5696,12 +5582,12 @@ define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32() #0 {
 ; PC64LE9-NEXT:    lfs 0, .LCPI96_0 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 34, 3
+; PC64LE9-NEXT:    mtvsrwz 34, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI96_1 at toc@ha
 ; PC64LE9-NEXT:    lfs 0, .LCPI96_1 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 35, 3
+; PC64LE9-NEXT:    mtvsrwz 35, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    blr
 entry:
@@ -5745,9 +5631,9 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 {
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    xscvdpsxws 0, 1
-; PC64LE9-NEXT:    mtvsrws 34, 3
+; PC64LE9-NEXT:    mtvsrwz 34, 3
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 35, 3
+; PC64LE9-NEXT:    mtvsrwz 35, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI97_2 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI97_2 at toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
@@ -5756,7 +5642,7 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 0, .LCPI97_3 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 36, 3
+; PC64LE9-NEXT:    mtvsrwz 36, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -5991,12 +5877,12 @@ define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64() #0 {
 ; PC64LE9-NEXT:    lfd 0, .LCPI104_0 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 34, 3
+; PC64LE9-NEXT:    mtvsrwz 34, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI104_1 at toc@ha
 ; PC64LE9-NEXT:    lfd 0, .LCPI104_1 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 35, 3
+; PC64LE9-NEXT:    mtvsrwz 35, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6037,12 +5923,12 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 {
 ; PC64LE9-NEXT:    lfd 0, .LCPI105_0 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 34, 3
+; PC64LE9-NEXT:    mtvsrwz 34, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI105_1 at toc@ha
 ; PC64LE9-NEXT:    lfd 0, .LCPI105_1 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 35, 3
+; PC64LE9-NEXT:    mtvsrwz 35, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI105_2 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI105_2 at toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
@@ -6051,7 +5937,7 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 {
 ; PC64LE9-NEXT:    lfd 0, .LCPI105_3 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 36, 3
+; PC64LE9-NEXT:    mtvsrwz 36, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6284,12 +6170,12 @@ define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32() #0 {
 ; PC64LE9-NEXT:    lfs 0, .LCPI112_0 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 34, 3
+; PC64LE9-NEXT:    mtvsrwz 34, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI112_1 at toc@ha
 ; PC64LE9-NEXT:    lfs 0, .LCPI112_1 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 35, 3
+; PC64LE9-NEXT:    mtvsrwz 35, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6333,9 +6219,9 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 {
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    xscvdpuxws 0, 1
-; PC64LE9-NEXT:    mtvsrws 34, 3
+; PC64LE9-NEXT:    mtvsrwz 34, 3
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 35, 3
+; PC64LE9-NEXT:    mtvsrwz 35, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI113_2 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI113_2 at toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
@@ -6344,7 +6230,7 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 {
 ; PC64LE9-NEXT:    lfs 0, .LCPI113_3 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 36, 3
+; PC64LE9-NEXT:    mtvsrwz 36, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6578,12 +6464,12 @@ define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64() #0 {
 ; PC64LE9-NEXT:    lfd 0, .LCPI120_0 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 34, 3
+; PC64LE9-NEXT:    mtvsrwz 34, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI120_1 at toc@ha
 ; PC64LE9-NEXT:    lfd 0, .LCPI120_1 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 35, 3
+; PC64LE9-NEXT:    mtvsrwz 35, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6624,12 +6510,12 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 {
 ; PC64LE9-NEXT:    lfd 0, .LCPI121_0 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 34, 3
+; PC64LE9-NEXT:    mtvsrwz 34, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI121_1 at toc@ha
 ; PC64LE9-NEXT:    lfd 0, .LCPI121_1 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 35, 3
+; PC64LE9-NEXT:    mtvsrwz 35, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI121_2 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI121_2 at toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
@@ -6638,7 +6524,7 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 {
 ; PC64LE9-NEXT:    lfd 0, .LCPI121_3 at toc@l(3)
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrws 36, 3
+; PC64LE9-NEXT:    mtvsrwz 36, 3
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6857,10 +6743,8 @@ define <2 x float> @constrained_vector_fptrunc_v2f64() #0 {
 ; PC64LE-NEXT:    lfd 1, .LCPI128_1 at toc@l(4)
 ; PC64LE-NEXT:    xsrsp 0, 0
 ; PC64LE-NEXT:    xsrsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
 ; PC64LE-NEXT:    blr
 ;
@@ -6870,12 +6754,10 @@ define <2 x float> @constrained_vector_fptrunc_v2f64() #0 {
 ; PC64LE9-NEXT:    lfd 0, .LCPI128_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI128_1 at toc@ha
 ; PC64LE9-NEXT:    xsrsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    lfd 0, .LCPI128_1 at toc@l(3)
 ; PC64LE9-NEXT:    xsrsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6899,16 +6781,13 @@ define <3 x float> @constrained_vector_fptrunc_v3f64() #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI129_2 at toc@ha
 ; PC64LE-NEXT:    xsrsp 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI129_2 at toc@l
-; PC64LE-NEXT:    xsrsp 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xsrsp 0, 2
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 0
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
@@ -6917,21 +6796,18 @@ define <3 x float> @constrained_vector_fptrunc_v3f64() #0 {
 ; PC64LE9-NEXT:    lfd 0, .LCPI129_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI129_1 at toc@ha
 ; PC64LE9-NEXT:    xsrsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    lfd 0, .LCPI129_1 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI129_2 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI129_2 at toc@l
 ; PC64LE9-NEXT:    xsrsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI129_3 at toc@ha
 ; PC64LE9-NEXT:    lfd 0, .LCPI129_3 at toc@l(3)
 ; PC64LE9-NEXT:    xsrsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 36, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -7780,10 +7656,8 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
 ; PC64LE-NEXT:    mtfprwa 1, 4
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
 ; PC64LE-NEXT:    blr
 ;
@@ -7795,12 +7669,10 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
 ; PC64LE9-NEXT:    li 3, 4
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    mtfprwa 0, 3
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    vmrghw 2, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -7839,10 +7711,8 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; PC64LE-NEXT:    mtfprd 1, 4
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
 ; PC64LE-NEXT:    blr
 ;
@@ -7852,12 +7722,10 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; PC64LE9-NEXT:    mtfprd 0, 3
 ; PC64LE9-NEXT:    mfvsrd 3, 34
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    mtfprd 0, 3
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    vmrghw 2, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -7911,26 +7779,23 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    addis 3, 2, .LCPI161_0 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI161_0 at toc@l
-; PC64LE-NEXT:    mffprwz 4, 0
-; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwa 0, 4
-; PC64LE-NEXT:    mtfprwa 1, 5
+; PC64LE-NEXT:    mffprwz 3, 0
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    mtfprwa 0, 3
+; PC64LE-NEXT:    mtfprwa 1, 4
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    mfvsrwz 4, 34
-; PC64LE-NEXT:    mtfprwa 2, 4
-; PC64LE-NEXT:    xscvsxdsp 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    mfvsrwz 3, 34
+; PC64LE-NEXT:    mtfprwa 2, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI161_0 at toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI161_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvsxdsp 0, 2
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 0
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i32:
@@ -7941,21 +7806,18 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE9-NEXT:    li 3, 4
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    mtfprwa 0, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI161_0 at toc@ha
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI161_0 at toc@l
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 36, 0
 ; PC64LE9-NEXT:    vmrghw 3, 4, 3
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    mfvsrwz 3, 34
 ; PC64LE9-NEXT:    mtfprwa 0, 3
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE9-NEXT:    blr
 entry:
@@ -7998,22 +7860,19 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    addis 6, 2, .LCPI163_0 at toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI163_0 at toc@ha
 ; PC64LE-NEXT:    mtfprd 1, 4
-; PC64LE-NEXT:    addi 3, 6, .LCPI163_0 at toc@l
+; PC64LE-NEXT:    addi 3, 3, .LCPI163_0 at toc@l
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
 ; PC64LE-NEXT:    mtfprd 2, 5
-; PC64LE-NEXT:    xscvsxdsp 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvsxdsp 0, 2
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 0
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64:
@@ -8022,18 +7881,15 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI163_0 at toc@ha
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI163_0 at toc@l
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    mtfprd 0, 4
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    mtfprd 0, 5
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 36, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -8363,10 +8219,8 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
 ; PC64LE-NEXT:    mtfprwz 1, 4
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
 ; PC64LE-NEXT:    blr
 ;
@@ -8378,12 +8232,10 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
 ; PC64LE9-NEXT:    li 3, 4
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    vmrghw 2, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -8422,10 +8274,8 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; PC64LE-NEXT:    mtfprd 1, 4
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    xscvdpspn 35, 1
 ; PC64LE-NEXT:    vmrghw 2, 2, 3
 ; PC64LE-NEXT:    blr
 ;
@@ -8435,12 +8285,10 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; PC64LE9-NEXT:    mtfprd 0, 3
 ; PC64LE9-NEXT:    mfvsrd 3, 34
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    mtfprd 0, 3
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    vmrghw 2, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -8494,26 +8342,23 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    addis 3, 2, .LCPI179_0 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI179_0 at toc@l
-; PC64LE-NEXT:    mffprwz 4, 0
-; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwz 0, 4
-; PC64LE-NEXT:    mtfprwz 1, 5
+; PC64LE-NEXT:    mffprwz 3, 0
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mtfprwz 1, 4
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
-; PC64LE-NEXT:    mfvsrwz 4, 34
-; PC64LE-NEXT:    mtfprwz 2, 4
-; PC64LE-NEXT:    xscvuxdsp 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    mfvsrwz 3, 34
+; PC64LE-NEXT:    mtfprwz 2, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI179_0 at toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI179_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvuxdsp 0, 2
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 0
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v3f32_v3i32:
@@ -8524,21 +8369,18 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE9-NEXT:    li 3, 4
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI179_0 at toc@ha
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI179_0 at toc@l
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 36, 0
 ; PC64LE9-NEXT:    vmrghw 3, 4, 3
 ; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    mfvsrwz 3, 34
 ; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE9-NEXT:    blr
 entry:
@@ -8581,22 +8423,19 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_uitofp_v3f32_v3i64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    addis 6, 2, .LCPI181_0 at toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI181_0 at toc@ha
 ; PC64LE-NEXT:    mtfprd 1, 4
-; PC64LE-NEXT:    addi 3, 6, .LCPI181_0 at toc@l
+; PC64LE-NEXT:    addi 3, 3, .LCPI181_0 at toc@l
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
+; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
 ; PC64LE-NEXT:    mtfprd 2, 5
-; PC64LE-NEXT:    xscvuxdsp 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 3
+; PC64LE-NEXT:    xscvdpspn 34, 0
+; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvuxdsp 0, 2
 ; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    xscvdpspn 35, 0
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v3f32_v3i64:
@@ -8605,18 +8444,15 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI181_0 at toc@ha
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI181_0 at toc@l
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 34, 0
 ; PC64LE9-NEXT:    mtfprd 0, 4
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    mtfprd 0, 5
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 36, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 43bb0469cc91..23532e2dd772 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -1369,10 +1369,8 @@ define <2 x float> @test44(<2 x i64> %a) {
 ; CHECK-LE-NEXT:    xxlor vs1, v2, v2
 ; CHECK-LE-NEXT:    xscvuxdsp f1, f1
 ; CHECK-LE-NEXT:    xscvuxdsp f0, f0
-; CHECK-LE-NEXT:    xscvdpspn vs1, f1
-; CHECK-LE-NEXT:    xscvdpspn vs0, f0
-; CHECK-LE-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-LE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-LE-NEXT:    xscvdpspn v3, f1
+; CHECK-LE-NEXT:    xscvdpspn v2, f0
 ; CHECK-LE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-LE-NEXT:    blr
   %v = uitofp <2 x i64> %a to <2 x float>
@@ -1451,10 +1449,8 @@ define <2 x float> @test45(<2 x i64> %a) {
 ; CHECK-LE-NEXT:    xxlor vs1, v2, v2
 ; CHECK-LE-NEXT:    xscvsxdsp f1, f1
 ; CHECK-LE-NEXT:    xscvsxdsp f0, f0
-; CHECK-LE-NEXT:    xscvdpspn vs1, f1
-; CHECK-LE-NEXT:    xscvdpspn vs0, f0
-; CHECK-LE-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-LE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-LE-NEXT:    xscvdpspn v3, f1
+; CHECK-LE-NEXT:    xscvdpspn v2, f0
 ; CHECK-LE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-LE-NEXT:    blr
   %v = sitofp <2 x i64> %a to <2 x float>


        


More information about the llvm-commits mailing list