[llvm] 934d5fa - [PowerPC] Exploit xxperm, check for dead vectors and substitute vperm with xxperm

Maryam Moghadas via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 23 11:28:18 PST 2022


Author: Maryam Moghadas
Date: 2022-11-23T13:28:12-06:00
New Revision: 934d5fa2b8672695c335deed0e19d0e777c98403

URL: https://github.com/llvm/llvm-project/commit/934d5fa2b8672695c335deed0e19d0e777c98403
DIFF: https://github.com/llvm/llvm-project/commit/934d5fa2b8672695c335deed0e19d0e777c98403.diff

LOG: [PowerPC] Exploit xxperm, check for dead vectors and substitute vperm with xxperm

vperm instruction requires the data to be in the Altivec registers, if one of
the vector operands is not used after this vperm instruction then it can be
substituted by xxperm which doubles the number of available registers.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D133700

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.h
    llvm/lib/Target/PowerPC/PPCInstrAltivec.td
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll
    llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
    llvm/test/CodeGen/PowerPC/build-vector-tests.ll
    llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
    llvm/test/CodeGen/PowerPC/extract-and-store.ll
    llvm/test/CodeGen/PowerPC/load-and-splat.ll
    llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
    llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
    llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
    llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
    llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
    llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
    llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
    llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
    llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/vec-itofp.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
    llvm/test/CodeGen/PowerPC/vec_int_ext.ll
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 98db9073f06a1..3c56d550c2e1e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -138,7 +138,8 @@ cl::opt<bool> DisableAutoPairedVecSt(
 
 STATISTIC(NumTailCalls, "Number of tail calls");
 STATISTIC(NumSiblingCalls, "Number of sibling calls");
-STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
+STATISTIC(ShufflesHandledWithVPERM,
+          "Number of shuffles lowered to a VPERM or XXPERM");
 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
 
 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
@@ -1640,6 +1641,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "PPCISD::XXSPLTI32DX";
   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
+  case PPCISD::XXPERM:
+    return "PPCISD::XXPERM";
   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
   case PPCISD::CMPB:            return "PPCISD::CMPB";
   case PPCISD::Hi:              return "PPCISD::Hi";
@@ -10152,42 +10155,133 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   // vector that will get spilled to the constant pool.
   if (V2.isUndef()) V2 = V1;
 
+  return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
+}
+
+SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
+                                      ArrayRef<int> PermMask, EVT VT,
+                                      SDValue V1, SDValue V2) const {
+  unsigned Opcode = PPCISD::VPERM;
+  EVT ValType = V1.getValueType();
+  SDLoc dl(Op);
+  bool NeedSwap = false;
+  bool isLittleEndian = Subtarget.isLittleEndian();
+  bool isPPC64 = Subtarget.isPPC64();
+
+  // Only need to place items backwards in LE,
+  // the mask will be properly calculated.
+  if (isLittleEndian)
+    std::swap(V1, V2);
+
+  if (Subtarget.isISA3_0() && (V1->hasOneUse() || V2->hasOneUse())) {
+    LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
+                         "XXPERM instead\n");
+    Opcode = PPCISD::XXPERM;
+
+    // if V2 is dead, then we swap V1 and V2 so we can
+    // use V2 as the destination instead.
+    if (!V1->hasOneUse() && V2->hasOneUse()) {
+      std::swap(V1, V2);
+      NeedSwap = !NeedSwap;
+    }
+  }
+
+  bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
+  bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
+
   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
   // that it is in input element units, not in bytes.  Convert now.
 
   // For little endian, the order of the input vectors is reversed, and
   // the permutation mask is complemented with respect to 31.  This is
-  // necessary to produce proper semantics with the big-endian-biased vperm
+  // necessary to produce proper semantics with the big-endian-based vperm
   // instruction.
   EVT EltVT = V1.getValueType().getVectorElementType();
-  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+  unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
+
+  /*
+  Vectors will be appended like so: [ V1 | v2 ]
+  XXSWAPD on V1:
+  [   A   |   B   |   C   |   D   ] -> [   C   |   D   |   A   |   B   ]
+     0-3     4-7     8-11   12-15         0-3     4-7     8-11   12-15
+  i.e.  index of A, B += 8, and index of C, D -= 8.
+  XXSWAPD on V2:
+  [   E   |   F   |   G   |   H   ] -> [   G   |   H   |   E   |   F   ]
+    16-19   20-23   24-27   28-31        16-19   20-23   24-27   28-31
+  i.e.  index of E, F += 8, index of G, H -= 8
+  Swap V1 and V2:
+  [   V1   |   V2  ] -> [   V2   |   V1   ]
+     0-15     16-31        0-15     16-31
+  i.e.  index of V1 += 16, index of V2 -= 16
+  */
 
   SmallVector<SDValue, 16> ResultMask;
   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
 
+    if (V1HasXXSWAPD) {
+      if (SrcElt < 8)
+        SrcElt += 8;
+      else if (SrcElt < 16)
+        SrcElt -= 8;
+    }
+    if (V2HasXXSWAPD) {
+      if (SrcElt > 23)
+        SrcElt -= 8;
+      else if (SrcElt > 15)
+        SrcElt += 8;
+    }
+    if (NeedSwap) {
+      if (SrcElt < 16)
+        SrcElt += 16;
+      else
+        SrcElt -= 16;
+    }
     for (unsigned j = 0; j != BytesPerElement; ++j)
       if (isLittleEndian)
-        ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
-                                             dl, MVT::i32));
+        ResultMask.push_back(
+            DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
       else
-        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
-                                             MVT::i32));
+        ResultMask.push_back(
+            DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
+  }
+
+  if (V1HasXXSWAPD) {
+    dl = SDLoc(V1->getOperand(0));
+    V1 = V1->getOperand(0)->getOperand(1);
+  }
+  if (V2HasXXSWAPD) {
+    dl = SDLoc(V2->getOperand(0));
+    V2 = V2->getOperand(0)->getOperand(1);
+  }
+
+  if (V1HasXXSWAPD || V2HasXXSWAPD || Opcode == PPCISD::XXPERM) {
+    if (isPPC64 && ValType != MVT::v2f64)
+      V1 = DAG.getBitcast(MVT::v2f64, V1);
+    if (isPPC64 && V2.getValueType() != MVT::v2f64)
+      V2 = DAG.getBitcast(MVT::v2f64, V2);
   }
 
   ShufflesHandledWithVPERM++;
   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
-  LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  if (Opcode == PPCISD::XXPERM) {
+    LLVM_DEBUG(dbgs() << "Emitting a XXPERM for the following shuffle:\n");
+  } else {
+    LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
+  }
   LLVM_DEBUG(SVOp->dump());
   LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
   LLVM_DEBUG(VPermMask.dump());
 
-  if (isLittleEndian)
-    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
-                       V2, V1, VPermMask);
-  else
-    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
-                       V1, V2, VPermMask);
+  if (Opcode == PPCISD::XXPERM)
+    VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
+
+  SDValue VPERMNode =
+      DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
+
+  VPERMNode = DAG.getBitcast(ValType, VPERMNode);
+  return VPERMNode;
 }
 
 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 6b5b9a75a2850..411350805819f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -123,6 +123,7 @@ namespace llvm {
     /// XXPERMDI - The PPC XXPERMDI instruction
     ///
     XXPERMDI,
+    XXPERM,
 
     /// The CMPB instruction (takes two operands of i32 or i64).
     CMPB,
@@ -1290,6 +1291,8 @@ namespace llvm {
     SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVPERM(SDValue Op, SelectionDAG &DAG, ArrayRef<int> PermMask,
+                       EVT VT, SDValue V1, SDValue V2) const;
     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 86cb528a537fd..9236b8fea773b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1057,6 +1057,8 @@ def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
 
 def : Pat<(PPCvperm v16i8:$vA, v16i8:$vB, v16i8:$vC),
           (VPERM $vA, $vB, $vC)>;
+def : Pat<(PPCvperm v2f64:$vA, v2f64:$vB, v16i8:$vC),
+          (VPERM $vA, $vB, $vC)>;
 
 def : Pat<(PPCfre v4f32:$A), (VREFP $A)>;
 def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>;

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index aa97e800804b8..43912f8cefba7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -86,6 +86,9 @@ def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [
   SDTCisVec<0>, SDTCisPtrTy<1>
 ]>;
 
+def SDT_PPCxxperm : SDTypeProfile<1, 3, [
+  SDTCisVT<0, v2f64>, SDTCisVT<1, v2f64>,
+  SDTCisVT<2, v2f64>, SDTCisVT<3, v4i32>]>;
 //--------------------------- Custom PPC nodes -------------------------------//
 def PPClxvd2x  : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -116,6 +119,7 @@ def PPCsextldsplat : SDNode<"PPCISD::SEXT_LD_SPLAT", SDT_PPCldsplat,
 def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED",
                      SDTypeProfile<1, 1, []>, []>;
 
+def PPCxxperm : SDNode<"PPCISD::XXPERM", SDT_PPCxxperm, []>;
 //-------------------------- Predicate definitions ---------------------------//
 def HasVSX : Predicate<"Subtarget->hasVSX()">;
 def IsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
@@ -1643,13 +1647,14 @@ let Predicates = [HasVSX, HasP9Vector] in {
   def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
 
   // Vector Permute
-  // FIXME: Setting the hasSideEffects flag here to match current behaviour.
-  let hasSideEffects = 1 in {
-    def XXPERM  : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc,
-                                  IIC_VecPerm, []>;
-    def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc,
-                                  IIC_VecPerm, []>;
-  }
+  def XXPERM  : XX3Form<60, 26, (outs vsrc:$XT),
+                                (ins vsrc:$XA, vsrc:$XTi, vsrc:$XB),
+                        "xxperm $XT, $XA, $XB", IIC_VecPerm, []>,
+                        RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+  def XXPERMR : XX3Form<60, 58, (outs vsrc:$XT),
+                                (ins vsrc:$XA, vsrc:$XTi, vsrc:$XB),
+                        "xxpermr $XT, $XA, $XB", IIC_VecPerm, []>,
+                        RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
 
   // Vector Splat Immediate Byte
   // FIXME: Setting the hasSideEffects flag here to match current behaviour.
@@ -4143,6 +4148,8 @@ def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)),
           (v8i16 (VSPLTHs 3, (LXSIHZX ForceXForm:$A)))>;
 def : Pat<(v16i8 (PPCldsplat ForceXForm:$A)),
           (v16i8 (VSPLTBs 7, (LXSIBZX ForceXForm:$A)))>;
+def : Pat<(v2f64 (PPCxxperm v2f64:$XT, v2f64:$XB, v4i32:$C)),
+          (XXPERM v2f64:$XT, v2f64:$XB, v4i32:$C)>;
 } // HasVSX, HasP9Vector
 
 // Any Power9 VSX subtarget with equivalent length but better Power10 VSX

diff  --git a/llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll b/llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll
index f124b0db5c648..f48f8a5c60e0b 100644
--- a/llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll
@@ -444,16 +444,16 @@ entry:
 define <8 x i16> @shuffle_vector_halfword_0_4(<8 x i16> %a) {
 ; CHECK-64-LABEL: shuffle_vector_halfword_0_4:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C0(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C0(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_halfword_0_4:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C0(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C0(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 4, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -493,16 +493,16 @@ entry:
 define <8 x i16> @shuffle_vector_halfword_3_4(<8 x i16> %a) {
 ; CHECK-64-LABEL: shuffle_vector_halfword_3_4:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C1(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C1(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_halfword_3_4:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C1(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C1(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
@@ -542,16 +542,16 @@ entry:
 define <8 x i16> @shuffle_vector_halfword_6_4(<8 x i16> %a) {
 ; CHECK-64-LABEL: shuffle_vector_halfword_6_4:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C2(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C2(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_halfword_6_4:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C2(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C2(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 4, i32 7>
@@ -561,16 +561,16 @@ entry:
 define <8 x i16> @shuffle_vector_halfword_7_4(<8 x i16> %a) {
 ; CHECK-64-LABEL: shuffle_vector_halfword_7_4:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C3(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C3(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_halfword_7_4:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C3(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C3(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
@@ -1460,16 +1460,16 @@ entry:
 define <16 x i8> @shuffle_vector_byte_1_8(<16 x i8> %a) {
 ; CHECK-64-LABEL: shuffle_vector_byte_1_8:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C4(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C4(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_byte_1_8:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C4(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C4(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 8, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1479,16 +1479,16 @@ entry:
 define <16 x i8> @shuffle_vector_byte_2_8(<16 x i8> %a) {
 ; CHECK-64-LABEL: shuffle_vector_byte_2_8:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C5(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C5(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_byte_2_8:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C5(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C5(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1528,16 +1528,16 @@ entry:
 define <16 x i8> @shuffle_vector_byte_5_8(<16 x i8> %a) {
 ; CHECK-64-LABEL: shuffle_vector_byte_5_8:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C6(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C6(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_byte_5_8:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C6(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C6(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 8, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1547,16 +1547,16 @@ entry:
 define <16 x i8> @shuffle_vector_byte_6_8(<16 x i8> %a) {
 ; CHECK-64-LABEL: shuffle_vector_byte_6_8:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C7(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C7(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_byte_6_8:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C7(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C7(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1566,16 +1566,16 @@ entry:
 define <16 x i8> @shuffle_vector_byte_7_8(<16 x i8> %a) {
 ; CHECK-64-LABEL: shuffle_vector_byte_7_8:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C8(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C8(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_byte_7_8:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C8(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C8(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1630,16 +1630,16 @@ entry:
 define <16 x i8> @shuffle_vector_byte_11_8(<16 x i8> %a) {
 ; CHECK-64-LABEL: shuffle_vector_byte_11_8:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C9(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C9(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_byte_11_8:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C9(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C9(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 8, i32 12, i32 13, i32 14, i32 15>
@@ -1649,16 +1649,16 @@ entry:
 define <16 x i8> @shuffle_vector_byte_12_8(<16 x i8> %a) {
 ; CHECK-64-LABEL: shuffle_vector_byte_12_8:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C10(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C10(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_byte_12_8:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C10(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C10(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 8, i32 13, i32 14, i32 15>
@@ -1698,16 +1698,16 @@ entry:
 define <16 x i8> @shuffle_vector_byte_15_8(<16 x i8> %a) {
 ; CHECK-64-LABEL: shuffle_vector_byte_15_8:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    ld 3, L..C11(2)
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    ld 3, L..C11(2) # %const.0
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: shuffle_vector_byte_15_8:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lwz 3, L..C11(2)
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lwz 3, L..C11(2) # %const.0
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>

diff  --git a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
index 46ff2280118e1..2e0427e9cb362 100644
--- a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
@@ -1447,15 +1447,15 @@ define <4 x float> @testSameVecEl0LE(<4 x float> %a) {
 ; CHECK-64-LABEL: testSameVecEl0LE:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    ld 3, L..C0(2) # %const.0
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: testSameVecEl0LE:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    lwz 3, L..C0(2) # %const.0
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 6, i32 1, i32 2, i32 3>
@@ -1465,15 +1465,15 @@ define <4 x float> @testSameVecEl1LE(<4 x float> %a) {
 ; CHECK-64-LABEL: testSameVecEl1LE:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    ld 3, L..C1(2) # %const.0
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: testSameVecEl1LE:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    lwz 3, L..C1(2) # %const.0
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
@@ -1483,15 +1483,15 @@ define <4 x float> @testSameVecEl3LE(<4 x float> %a) {
 ; CHECK-64-LABEL: testSameVecEl3LE:
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    ld 3, L..C2(2) # %const.0
-; CHECK-64-NEXT:    lxv 35, 0(3)
-; CHECK-64-NEXT:    vperm 2, 2, 2, 3
+; CHECK-64-NEXT:    lxv 0, 0(3)
+; CHECK-64-NEXT:    xxperm 34, 34, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: testSameVecEl3LE:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    lwz 3, L..C2(2) # %const.0
-; CHECK-32-NEXT:    lxv 35, 0(3)
-; CHECK-32-NEXT:    vperm 2, 2, 2, 3
+; CHECK-32-NEXT:    lxv 0, 0(3)
+; CHECK-32-NEXT:    xxperm 34, 34, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>

diff  --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index 35bca69b1b8de..411b991dc29c1 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -928,8 +928,8 @@ define <4 x i32> @fromDiffMemConsDi(ptr nocapture readonly %arr) {
 ; P9BE-NEXT:    lxv v2, 0(r3)
 ; P9BE-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
-; P9BE-NEXT:    lxv v3, 0(r3)
-; P9BE-NEXT:    vperm v2, v2, v2, v3
+; P9BE-NEXT:    lxv vs0, 0(r3)
+; P9BE-NEXT:    xxperm v2, v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromDiffMemConsDi:
@@ -1028,8 +1028,8 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
 ; P9BE-NEXT:    lxvx v2, r3, r4
 ; P9BE-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
-; P9BE-NEXT:    lxv v3, 0(r3)
-; P9BE-NEXT:    vperm v2, v2, v2, v3
+; P9BE-NEXT:    lxv vs0, 0(r3)
+; P9BE-NEXT:    xxperm v2, v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromDiffMemVarDi:
@@ -1040,8 +1040,8 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
 ; P9LE-NEXT:    lxvx v2, r3, r4
 ; P9LE-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
 ; P9LE-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
-; P9LE-NEXT:    lxv v3, 0(r3)
-; P9LE-NEXT:    vperm v2, v2, v2, v3
+; P9LE-NEXT:    lxv vs0, 0(r3)
+; P9LE-NEXT:    xxperm v2, v2, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromDiffMemVarDi:
@@ -1058,15 +1058,14 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
 ;
 ; P8LE-LABEL: fromDiffMemVarDi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    sldi r4, r4, 2
 ; P8LE-NEXT:    addis r5, r2, .LCPI9_0 at toc@ha
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    addi r5, r5, .LCPI9_0 at toc@l
 ; P8LE-NEXT:    add r3, r3, r4
-; P8LE-NEXT:    addi r4, r5, .LCPI9_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r5
 ; P8LE-NEXT:    addi r3, r3, -12
-; P8LE-NEXT:    lxvd2x vs1, 0, r4
-; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    xxswapd v3, vs1
-; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
@@ -1448,22 +1447,22 @@ entry:
 define <4 x i32> @fromDiffMemConsDConvftoi(ptr nocapture readonly %ptr) {
 ; P9BE-LABEL: fromDiffMemConsDConvftoi:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    lxv vs0, 0(r3)
 ; P9BE-NEXT:    addis r3, r2, .LCPI18_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI18_0 at toc@l
-; P9BE-NEXT:    lxv v3, 0(r3)
-; P9BE-NEXT:    vperm v2, v2, v2, v3
-; P9BE-NEXT:    xvcvspsxws v2, v2
+; P9BE-NEXT:    lxv vs1, 0(r3)
+; P9BE-NEXT:    xxperm vs0, vs0, vs1
+; P9BE-NEXT:    xvcvspsxws v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromDiffMemConsDConvftoi:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    lxv vs0, 0(r3)
 ; P9LE-NEXT:    addis r3, r2, .LCPI18_0 at toc@ha
 ; P9LE-NEXT:    addi r3, r3, .LCPI18_0 at toc@l
-; P9LE-NEXT:    lxv v3, 0(r3)
-; P9LE-NEXT:    vperm v2, v2, v2, v3
-; P9LE-NEXT:    xvcvspsxws v2, v2
+; P9LE-NEXT:    lxv vs1, 0(r3)
+; P9LE-NEXT:    xxperm vs0, vs0, vs1
+; P9LE-NEXT:    xvcvspsxws v2, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromDiffMemConsDConvftoi:
@@ -1479,11 +1478,10 @@ define <4 x i32> @fromDiffMemConsDConvftoi(ptr nocapture readonly %ptr) {
 ; P8LE-LABEL: fromDiffMemConsDConvftoi:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r4, r2, .LCPI18_0 at toc@ha
-; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    lxvd2x v2, 0, r3
 ; P8LE-NEXT:    addi r4, r4, .LCPI18_0 at toc@l
-; P8LE-NEXT:    lxvd2x vs1, 0, r4
-; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    lxvd2x vs0, 0, r4
+; P8LE-NEXT:    xxswapd v3, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    xvcvspsxws v2, v2
 ; P8LE-NEXT:    blr
@@ -2450,8 +2448,8 @@ define <4 x i32> @fromDiffMemConsDui(ptr nocapture readonly %arr) {
 ; P9BE-NEXT:    lxv v2, 0(r3)
 ; P9BE-NEXT:    addis r3, r2, .LCPI39_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI39_0 at toc@l
-; P9BE-NEXT:    lxv v3, 0(r3)
-; P9BE-NEXT:    vperm v2, v2, v2, v3
+; P9BE-NEXT:    lxv vs0, 0(r3)
+; P9BE-NEXT:    xxperm v2, v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromDiffMemConsDui:
@@ -2550,8 +2548,8 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
 ; P9BE-NEXT:    lxvx v2, r3, r4
 ; P9BE-NEXT:    addis r3, r2, .LCPI41_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI41_0 at toc@l
-; P9BE-NEXT:    lxv v3, 0(r3)
-; P9BE-NEXT:    vperm v2, v2, v2, v3
+; P9BE-NEXT:    lxv vs0, 0(r3)
+; P9BE-NEXT:    xxperm v2, v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromDiffMemVarDui:
@@ -2562,8 +2560,8 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
 ; P9LE-NEXT:    lxvx v2, r3, r4
 ; P9LE-NEXT:    addis r3, r2, .LCPI41_0 at toc@ha
 ; P9LE-NEXT:    addi r3, r3, .LCPI41_0 at toc@l
-; P9LE-NEXT:    lxv v3, 0(r3)
-; P9LE-NEXT:    vperm v2, v2, v2, v3
+; P9LE-NEXT:    lxv vs0, 0(r3)
+; P9LE-NEXT:    xxperm v2, v2, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromDiffMemVarDui:
@@ -2580,15 +2578,14 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
 ;
 ; P8LE-LABEL: fromDiffMemVarDui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    sldi r4, r4, 2
 ; P8LE-NEXT:    addis r5, r2, .LCPI41_0 at toc@ha
+; P8LE-NEXT:    sldi r4, r4, 2
+; P8LE-NEXT:    addi r5, r5, .LCPI41_0 at toc@l
 ; P8LE-NEXT:    add r3, r3, r4
-; P8LE-NEXT:    addi r4, r5, .LCPI41_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs0, 0, r5
 ; P8LE-NEXT:    addi r3, r3, -12
-; P8LE-NEXT:    lxvd2x vs1, 0, r4
-; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    xxswapd v3, vs1
-; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
@@ -2970,22 +2967,22 @@ entry:
 define <4 x i32> @fromDiffMemConsDConvftoui(ptr nocapture readonly %ptr) {
 ; P9BE-LABEL: fromDiffMemConsDConvftoui:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    lxv vs0, 0(r3)
 ; P9BE-NEXT:    addis r3, r2, .LCPI50_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI50_0 at toc@l
-; P9BE-NEXT:    lxv v3, 0(r3)
-; P9BE-NEXT:    vperm v2, v2, v2, v3
-; P9BE-NEXT:    xvcvspuxws v2, v2
+; P9BE-NEXT:    lxv vs1, 0(r3)
+; P9BE-NEXT:    xxperm vs0, vs0, vs1
+; P9BE-NEXT:    xvcvspuxws v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromDiffMemConsDConvftoui:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    lxv vs0, 0(r3)
 ; P9LE-NEXT:    addis r3, r2, .LCPI50_0 at toc@ha
 ; P9LE-NEXT:    addi r3, r3, .LCPI50_0 at toc@l
-; P9LE-NEXT:    lxv v3, 0(r3)
-; P9LE-NEXT:    vperm v2, v2, v2, v3
-; P9LE-NEXT:    xvcvspuxws v2, v2
+; P9LE-NEXT:    lxv vs1, 0(r3)
+; P9LE-NEXT:    xxperm vs0, vs0, vs1
+; P9LE-NEXT:    xvcvspuxws v2, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromDiffMemConsDConvftoui:
@@ -3001,11 +2998,10 @@ define <4 x i32> @fromDiffMemConsDConvftoui(ptr nocapture readonly %ptr) {
 ; P8LE-LABEL: fromDiffMemConsDConvftoui:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r4, r2, .LCPI50_0 at toc@ha
-; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    lxvd2x v2, 0, r3
 ; P8LE-NEXT:    addi r4, r4, .LCPI50_0 at toc@l
-; P8LE-NEXT:    lxvd2x vs1, 0, r4
-; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    lxvd2x vs0, 0, r4
+; P8LE-NEXT:    xxswapd v3, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    xvcvspuxws v2, v2
 ; P8LE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 7ea300c6de965..5d48a873295fd 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -491,10 +491,10 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
 ; CHECK-P9-BE:       # %bb.0: # %entry
 ; CHECK-P9-BE-NEXT:    lxsd v2, 0(r3)
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI12_0 at toc@ha
-; CHECK-P9-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-BE-NEXT:    xxlxor vs0, vs0, vs0
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI12_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P9-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-P9-BE-NEXT:    blr
 ;
 ; CHECK-NOVSX-LABEL: testmrglb3:
@@ -748,11 +748,12 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_
 ; CHECK-P9-BE:       # %bb.0: # %entry
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI15_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI15_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
+; CHECK-P9-BE-NEXT:    lxv vs1, 0(r3)
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI15_1 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI15_1 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v4, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v2, v4, v3
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm vs0, v2, vs1
+; CHECK-P9-BE-NEXT:    xxlor v2, vs0, vs0
 ; CHECK-P9-BE-NEXT:    blr
 ;
 ; CHECK-NOVSX-LABEL: replace_undefs_in_splat:

diff  --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
index 98333fb78ca23..9840de29b5386 100644
--- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll
+++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
@@ -600,29 +600,29 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, ptr nocapture %
 ; CHECK-P9-LABEL: test_stores_exceed_vec_size:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI16_0 at toc@ha
-; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs34, vs34, 1
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI16_0 at toc@l
-; CHECK-P9-NEXT:    lxv vs35, 0(r3)
-; CHECK-P9-NEXT:    li r3, 16
-; CHECK-P9-NEXT:    stfiwx f0, r5, r3
+; CHECK-P9-NEXT:    lxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    li r3, 20
 ; CHECK-P9-NEXT:    stxsiwx vs34, r5, r3
-; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
-; CHECK-P9-NEXT:    stxv vs35, 0(r5)
+; CHECK-P9-NEXT:    li r3, 16
+; CHECK-P9-NEXT:    stfiwx f1, r5, r3
+; CHECK-P9-NEXT:    xxperm vs34, vs34, vs0
+; CHECK-P9-NEXT:    stxv vs34, 0(r5)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P9-BE-LABEL: test_stores_exceed_vec_size:
 ; CHECK-P9-BE:       # %bb.0: # %entry
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI16_0 at toc@ha
-; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-BE-NEXT:    xxsldwi vs1, vs34, vs34, 1
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI16_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv vs35, 0(r3)
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-P9-BE-NEXT:    li r3, 16
 ; CHECK-P9-BE-NEXT:    stxsiwx vs34, r5, r3
 ; CHECK-P9-BE-NEXT:    li r3, 20
-; CHECK-P9-BE-NEXT:    stfiwx f0, r5, r3
-; CHECK-P9-BE-NEXT:    vperm v3, v2, v2, v3
-; CHECK-P9-BE-NEXT:    stxv vs35, 0(r5)
+; CHECK-P9-BE-NEXT:    stfiwx f1, r5, r3
+; CHECK-P9-BE-NEXT:    xxperm vs34, vs34, vs0
+; CHECK-P9-BE-NEXT:    stxv vs34, 0(r5)
 ; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 2

diff  --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index b0708ff90356c..bed9bc0a62987 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -215,11 +215,11 @@ define dso_local void @test4(ptr nocapture %c, ptr nocapture readonly %a) local_
 ; P9-AIX32-NEXT:    stw r4, -16(r1)
 ; P9-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
 ; P9-AIX32-NEXT:    stw r5, -32(r1)
-; P9-AIX32-NEXT:    lxv v3, -16(r1)
-; P9-AIX32-NEXT:    lxv v4, -32(r1)
-; P9-AIX32-NEXT:    lxv v2, 0(r4)
-; P9-AIX32-NEXT:    vperm v2, v4, v3, v2
-; P9-AIX32-NEXT:    stxv v2, 0(r3)
+; P9-AIX32-NEXT:    lxv vs1, -16(r1)
+; P9-AIX32-NEXT:    lxv vs2, -32(r1)
+; P9-AIX32-NEXT:    lxv vs0, 0(r4)
+; P9-AIX32-NEXT:    xxperm vs1, vs2, vs0
+; P9-AIX32-NEXT:    stxv vs1, 0(r3)
 ; P9-AIX32-NEXT:    blr
 ;
 ; P8-AIX32-LABEL: test4:
@@ -291,12 +291,12 @@ define void @test5(ptr %a, ptr %in) {
 ; P9-AIX32-NEXT:    srawi r5, r4, 31
 ; P9-AIX32-NEXT:    stw r4, -16(r1)
 ; P9-AIX32-NEXT:    lwz r4, L..C1(r2) # %const.0
-; P9-AIX32-NEXT:    lxv v3, -16(r1)
+; P9-AIX32-NEXT:    lxv vs1, -16(r1)
 ; P9-AIX32-NEXT:    stw r5, -32(r1)
-; P9-AIX32-NEXT:    lxv v4, -32(r1)
-; P9-AIX32-NEXT:    lxv v2, 0(r4)
-; P9-AIX32-NEXT:    vperm v2, v4, v3, v2
-; P9-AIX32-NEXT:    stxv v2, 0(r3)
+; P9-AIX32-NEXT:    lxv vs2, -32(r1)
+; P9-AIX32-NEXT:    lxv vs0, 0(r4)
+; P9-AIX32-NEXT:    xxperm vs1, vs2, vs0
+; P9-AIX32-NEXT:    stxv vs1, 0(r3)
 ; P9-AIX32-NEXT:    blr
 ;
 ; P8-AIX32-LABEL: test5:
@@ -367,13 +367,13 @@ define void @test6(ptr %a, ptr %in) {
 ; P9-AIX32-NEXT:    lwz r4, 0(r4)
 ; P9-AIX32-NEXT:    li r5, 0
 ; P9-AIX32-NEXT:    stw r5, -32(r1)
-; P9-AIX32-NEXT:    lxv v3, -32(r1)
+; P9-AIX32-NEXT:    lxv vs1, -32(r1)
 ; P9-AIX32-NEXT:    stw r4, -16(r1)
 ; P9-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
-; P9-AIX32-NEXT:    lxv v4, -16(r1)
-; P9-AIX32-NEXT:    lxv v2, 0(r4)
-; P9-AIX32-NEXT:    vperm v2, v3, v4, v2
-; P9-AIX32-NEXT:    stxv v2, 0(r3)
+; P9-AIX32-NEXT:    lxv vs2, -16(r1)
+; P9-AIX32-NEXT:    lxv vs0, 0(r4)
+; P9-AIX32-NEXT:    xxperm vs2, vs1, vs0
+; P9-AIX32-NEXT:    stxv vs2, 0(r3)
 ; P9-AIX32-NEXT:    blr
 ;
 ; P8-AIX32-LABEL: test6:

diff  --git a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
index a6e7c631919fe..8da3a33278f67 100644
--- a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
+++ b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
@@ -98,8 +98,8 @@ define <4 x i32> @load_swap10(ptr %vp1, ptr %vp2) {
 ; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-BE-NEXT:    blr
   %v1 = load <4 x i32>, ptr %vp1
   %v2 = load <4 x i32>, ptr %vp2
@@ -138,8 +138,8 @@ define <4 x i32> @load_swap11(ptr %vp1, ptr %vp2) {
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-BE-NEXT:    blr
   %v1 = load <4 x i32>, ptr %vp1
   %v2 = load <4 x i32>, ptr %vp2
@@ -178,8 +178,8 @@ define <8 x i16> @load_swap20(ptr %vp1, ptr %vp2){
 ; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-BE-NEXT:    blr
   %v1 = load <8 x i16>, ptr %vp1
   %v2 = load <8 x i16>, ptr %vp2
@@ -218,8 +218,8 @@ define <8 x i16> @load_swap21(ptr %vp1, ptr %vp2){
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-BE-NEXT:    blr
   %v1 = load <8 x i16>, ptr %vp1
   %v2 = load <8 x i16>, ptr %vp2
@@ -360,8 +360,8 @@ define <4 x float> @load_swap50(ptr %vp1, ptr %vp2) {
 ; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-BE-NEXT:    blr
   %v1 = load <4 x float>, ptr %vp1
   %v2 = load <4 x float>, ptr %vp2
@@ -400,8 +400,8 @@ define <4 x float> @load_swap51(ptr %vp1, ptr %vp2) {
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-BE-NEXT:    blr
   %v1 = load <4 x float>, ptr %vp1
   %v2 = load <4 x float>, ptr %vp2
@@ -493,8 +493,8 @@ define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, ptr %vp) {
 ; CHECK-P9-BE:       # %bb.0:
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI13_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI13_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
 ; CHECK-P9-BE-NEXT:    blr
   %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -532,9 +532,9 @@ define void @swap_store11(<4 x i32> %v1, <4 x i32> %v2, ptr %vp) {
 ; CHECK-P9-BE:       # %bb.0:
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI14_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI14_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v3, v3, vs0
+; CHECK-P9-BE-NEXT:    stxv v3, 0(r7)
 ; CHECK-P9-BE-NEXT:    blr
   %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
   store <4 x i32> %v3, ptr %vp
@@ -571,8 +571,8 @@ define void @swap_store20(<8 x i16> %v1, <8 x i16> %v2, ptr %vp) {
 ; CHECK-P9-BE:       # %bb.0:
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI15_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI15_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
 ; CHECK-P9-BE-NEXT:    blr
   %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -610,9 +610,9 @@ define void @swap_store21(<8 x i16> %v1, <8 x i16> %v2, ptr %vp) {
 ; CHECK-P9-BE:       # %bb.0:
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI16_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI16_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v3, v3, vs0
+; CHECK-P9-BE-NEXT:    stxv v3, 0(r7)
 ; CHECK-P9-BE-NEXT:    blr
   %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
   store <8 x i16> %v3, ptr %vp
@@ -775,8 +775,8 @@ define void @swap_store50(<4 x float> %v1, <4 x float> %v2, ptr %vp) {
 ; CHECK-P9-BE:       # %bb.0:
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI21_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI21_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
 ; CHECK-P9-BE-NEXT:    blr
   %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -814,9 +814,9 @@ define void @swap_store51(<4 x float> %v1, <4 x float> %v2, ptr %vp) {
 ; CHECK-P9-BE:       # %bb.0:
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI22_0 at toc@ha
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI22_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-P9-BE-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v3, v3, vs0
+; CHECK-P9-BE-NEXT:    stxv v3, 0(r7)
 ; CHECK-P9-BE-NEXT:    blr
   %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
   store <4 x float> %v3, ptr %vp

diff  --git a/llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll b/llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
index 2b65ed6483566..606155de7020b 100644
--- a/llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
@@ -81,8 +81,8 @@ entry:
 define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
 ; CHECK-LE-LABEL: test_vrlqnm:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    plxv v5, .LCPI4_0 at PCREL(0), 1
-; CHECK-LE-NEXT:    vperm v3, v4, v3, v5
+; CHECK-LE-NEXT:    plxv vs0, .LCPI4_0 at PCREL(0), 1
+; CHECK-LE-NEXT:    xxperm v3, v4, vs0
 ; CHECK-LE-NEXT:    vrlqnm v2, v2, v3
 ; CHECK-LE-NEXT:    blr
 ;
@@ -90,9 +90,9 @@ define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxv v5, 0(r3)
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v5
-; CHECK-BE-NEXT:    vrlqnm v2, v2, v3
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xxperm v4, v3, vs0
+; CHECK-BE-NEXT:    vrlqnm v2, v2, v4
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast <1 x i128> %b to <16 x i8>

diff  --git a/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll b/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
index 12a83da66a83b..69d52fda79bae 100644
--- a/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
+++ b/llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
@@ -451,8 +451,8 @@ define <8 x i16> @shuffle_vector_halfword_0_4(<8 x i16> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI16_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI16_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 4, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -464,8 +464,8 @@ define <8 x i16> @shuffle_vector_halfword_1_3(<8 x i16> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI17_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI17_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_halfword_1_3:
@@ -482,8 +482,8 @@ define <8 x i16> @shuffle_vector_halfword_2_3(<8 x i16> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI18_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI18_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_halfword_2_3:
@@ -505,8 +505,8 @@ define <8 x i16> @shuffle_vector_halfword_3_4(<8 x i16> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI19_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI19_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
@@ -518,8 +518,8 @@ define <8 x i16> @shuffle_vector_halfword_4_3(<8 x i16> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI20_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI20_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_halfword_4_3:
@@ -536,8 +536,8 @@ define <8 x i16> @shuffle_vector_halfword_5_3(<8 x i16> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI21_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI21_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_halfword_5_3:
@@ -559,8 +559,8 @@ define <8 x i16> @shuffle_vector_halfword_6_4(<8 x i16> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI22_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI22_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 4, i32 7>
@@ -577,8 +577,8 @@ define <8 x i16> @shuffle_vector_halfword_7_4(<8 x i16> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI23_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI23_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
@@ -1455,8 +1455,8 @@ define <16 x i8> @shuffle_vector_byte_0_7(<16 x i8> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI56_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI56_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_byte_0_7:
@@ -1478,8 +1478,8 @@ define <16 x i8> @shuffle_vector_byte_1_8(<16 x i8> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI57_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI57_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 8, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1496,8 +1496,8 @@ define <16 x i8> @shuffle_vector_byte_2_8(<16 x i8> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI58_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI58_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1509,8 +1509,8 @@ define <16 x i8> @shuffle_vector_byte_3_7(<16 x i8> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI59_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI59_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_byte_3_7:
@@ -1527,8 +1527,8 @@ define <16 x i8> @shuffle_vector_byte_4_7(<16 x i8> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI60_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI60_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_byte_4_7:
@@ -1550,8 +1550,8 @@ define <16 x i8> @shuffle_vector_byte_5_8(<16 x i8> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI61_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI61_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 8, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1568,8 +1568,8 @@ define <16 x i8> @shuffle_vector_byte_6_8(<16 x i8> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI62_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI62_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1586,8 +1586,8 @@ define <16 x i8> @shuffle_vector_byte_7_8(<16 x i8> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI63_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI63_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 8, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1599,8 +1599,8 @@ define <16 x i8> @shuffle_vector_byte_8_7(<16 x i8> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI64_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI64_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_byte_8_7:
@@ -1617,8 +1617,8 @@ define <16 x i8> @shuffle_vector_byte_9_7(<16 x i8> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI65_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI65_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_byte_9_7:
@@ -1635,8 +1635,8 @@ define <16 x i8> @shuffle_vector_byte_10_7(<16 x i8> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI66_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI66_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_byte_10_7:
@@ -1658,8 +1658,8 @@ define <16 x i8> @shuffle_vector_byte_11_8(<16 x i8> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI67_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI67_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 8, i32 12, i32 13, i32 14, i32 15>
@@ -1676,8 +1676,8 @@ define <16 x i8> @shuffle_vector_byte_12_8(<16 x i8> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI68_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI68_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 8, i32 13, i32 14, i32 15>
@@ -1689,8 +1689,8 @@ define <16 x i8> @shuffle_vector_byte_13_7(<16 x i8> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI69_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI69_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_byte_13_7:
@@ -1707,8 +1707,8 @@ define <16 x i8> @shuffle_vector_byte_14_7(<16 x i8> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI70_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI70_0 at toc@l
-; CHECK-NEXT:    lxv 35, 0(3)
-; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    lxv 0, 0(3)
+; CHECK-NEXT:    xxperm 34, 34, 0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: shuffle_vector_byte_14_7:
@@ -1730,8 +1730,8 @@ define <16 x i8> @shuffle_vector_byte_15_8(<16 x i8> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI71_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI71_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:
   %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
index 544683d7aa229..9cef5c37a2b85 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
@@ -22,10 +22,10 @@ define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 {
 ; AIX-LABEL: shufflevector_combine:
 ; AIX:       # %bb.0: # %newFuncRoot
 ; AIX-NEXT:    ld 3, L..C0(2) # %const.0
-; AIX-NEXT:    xxlxor 36, 36, 36
-; AIX-NEXT:    lxv 35, 0(3)
+; AIX-NEXT:    xxlxor 1, 1, 1
+; AIX-NEXT:    lxv 0, 0(3)
 ; AIX-NEXT:    li 3, 0
-; AIX-NEXT:    vperm 2, 4, 2, 3
+; AIX-NEXT:    xxperm 34, 1, 0
 ; AIX-NEXT:    vinsw 2, 3, 8
 ; AIX-NEXT:    vpkuwum 2, 2, 2
 ; AIX-NEXT:    blr
@@ -33,44 +33,44 @@ define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 {
 ; AIX-32-LABEL: shufflevector_combine:
 ; AIX-32:       # %bb.0: # %newFuncRoot
 ; AIX-32-NEXT:    lwz 3, L..C0(2) # %const.0
-; AIX-32-NEXT:    xxlxor 36, 36, 36
-; AIX-32-NEXT:    lxv 35, 0(3)
+; AIX-32-NEXT:    xxlxor 1, 1, 1
+; AIX-32-NEXT:    lxv 0, 0(3)
 ; AIX-32-NEXT:    li 3, 0
-; AIX-32-NEXT:    vperm 2, 4, 2, 3
+; AIX-32-NEXT:    xxperm 34, 1, 0
 ; AIX-32-NEXT:    vinsw 2, 3, 8
 ; AIX-32-NEXT:    vpkuwum 2, 2, 2
 ; AIX-32-NEXT:    blr
 ;
 ; LE-LABEL: shufflevector_combine:
 ; LE:       # %bb.0: # %newFuncRoot
-; LE-NEXT:    plxv v3, .LCPI0_0 at PCREL(0), 1
-; LE-NEXT:    xxlxor v4, v4, v4
+; LE-NEXT:    plxv vs0, .LCPI0_0 at PCREL(0), 1
+; LE-NEXT:    xxlxor v3, v3, v3
 ; LE-NEXT:    li r3, 0
-; LE-NEXT:    vperm v2, v2, v4, v3
-; LE-NEXT:    vinsw v2, r3, 4
-; LE-NEXT:    vpkuwum v2, v2, v2
+; LE-NEXT:    xxperm v3, v2, vs0
+; LE-NEXT:    vinsw v3, r3, 4
+; LE-NEXT:    vpkuwum v2, v3, v3
 ; LE-NEXT:    blr
 ;
 ; LE-32-LABEL: shufflevector_combine:
 ; LE-32:       # %bb.0: # %newFuncRoot
 ; LE-32-NEXT:    li r3, .LCPI0_0 at l
 ; LE-32-NEXT:    lis r4, .LCPI0_0 at ha
-; LE-32-NEXT:    xxlxor v4, v4, v4
-; LE-32-NEXT:    lxvx v3, r4, r3
+; LE-32-NEXT:    xxlxor v3, v3, v3
+; LE-32-NEXT:    lxvx vs0, r4, r3
 ; LE-32-NEXT:    li r3, 0
-; LE-32-NEXT:    vperm v2, v2, v4, v3
-; LE-32-NEXT:    vinsw v2, r3, 4
-; LE-32-NEXT:    vpkuwum v2, v2, v2
+; LE-32-NEXT:    xxperm v3, v2, vs0
+; LE-32-NEXT:    vinsw v3, r3, 4
+; LE-32-NEXT:    vpkuwum v2, v3, v3
 ; LE-32-NEXT:    blr
 ;
 ; BE-LABEL: shufflevector_combine:
 ; BE:       # %bb.0: # %newFuncRoot
 ; BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; BE-NEXT:    xxlxor v4, v4, v4
+; BE-NEXT:    xxlxor vs0, vs0, vs0
 ; BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; BE-NEXT:    lxv v3, 0(r3)
+; BE-NEXT:    lxv vs1, 0(r3)
 ; BE-NEXT:    li r3, 0
-; BE-NEXT:    vperm v2, v4, v2, v3
+; BE-NEXT:    xxperm v2, vs0, vs1
 ; BE-NEXT:    vinsw v2, r3, 8
 ; BE-NEXT:    vpkuwum v2, v2, v2
 ; BE-NEXT:    blr
@@ -79,10 +79,10 @@ define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 {
 ; BE-32:       # %bb.0: # %newFuncRoot
 ; BE-32-NEXT:    li r3, .LCPI0_0 at l
 ; BE-32-NEXT:    lis r4, .LCPI0_0 at ha
-; BE-32-NEXT:    xxlxor v4, v4, v4
-; BE-32-NEXT:    lxvx v3, r4, r3
+; BE-32-NEXT:    xxlxor vs1, vs1, vs1
+; BE-32-NEXT:    lxvx vs0, r4, r3
 ; BE-32-NEXT:    li r3, 0
-; BE-32-NEXT:    vperm v2, v4, v2, v3
+; BE-32-NEXT:    xxperm v2, vs1, vs0
 ; BE-32-NEXT:    vinsw v2, r3, 8
 ; BE-32-NEXT:    vpkuwum v2, v2, v2
 ; BE-32-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index 94407cd4f8ed8..7c39584b8d096 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -544,6 +544,7 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r10, 7
 ; CHECK-PWR9-BE-NEXT:    vextublx r11, r10, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r10, r10, v3
+; CHECK-PWR9-BE-NEXT:    mtfprwz f2, r9
 ; CHECK-PWR9-BE-NEXT:    clrlwi r11, r11, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r10, r10, 24
 ; CHECK-PWR9-BE-NEXT:    sub r10, r11, r10
@@ -562,7 +563,6 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r12, 9
 ; CHECK-PWR9-BE-NEXT:    vextublx r0, r12, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r12, r12, v3
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v0, r11
 ; CHECK-PWR9-BE-NEXT:    clrlwi r0, r0, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r12, r12, 24
 ; CHECK-PWR9-BE-NEXT:    sub r12, r0, r12
@@ -572,6 +572,7 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r0, 10
 ; CHECK-PWR9-BE-NEXT:    vextublx r30, r0, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r0, r0, v3
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v4, r12
 ; CHECK-PWR9-BE-NEXT:    clrlwi r30, r30, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r0, r0, 24
 ; CHECK-PWR9-BE-NEXT:    sub r0, r30, r0
@@ -599,8 +600,6 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r28, 13
 ; CHECK-PWR9-BE-NEXT:    vextublx r27, r28, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r28, r28, v3
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r29
-; CHECK-PWR9-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-BE-NEXT:    clrlwi r27, r27, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r28, r28, 24
 ; CHECK-PWR9-BE-NEXT:    sub r28, r27, r28
@@ -619,12 +618,14 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r26, 15
 ; CHECK-PWR9-BE-NEXT:    vextublx r25, r26, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r26, r26, v3
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r27
+; CHECK-PWR9-BE-NEXT:    mtfprwz f0, r27
 ; CHECK-PWR9-BE-NEXT:    addis r27, r2, .LCPI9_0 at toc@ha
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r28
+; CHECK-PWR9-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-BE-NEXT:    addi r27, r27, .LCPI9_0 at toc@l
 ; CHECK-PWR9-BE-NEXT:    clrlwi r25, r25, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r26, r26, 24
-; CHECK-PWR9-BE-NEXT:    lxv v4, 0(r27)
+; CHECK-PWR9-BE-NEXT:    lxv vs1, 0(r27)
 ; CHECK-PWR9-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-BE-NEXT:    sub r26, r25, r26
 ; CHECK-PWR9-BE-NEXT:    srawi r25, r26, 31
@@ -633,32 +634,31 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-BE-NEXT:    mtvsrwz v2, r26
 ; CHECK-PWR9-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT:    vperm v2, v3, v2, v4
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r28
-; CHECK-PWR9-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT:    vperm v3, v5, v3, v4
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r0
+; CHECK-PWR9-BE-NEXT:    xxperm v2, vs0, vs1
+; CHECK-PWR9-BE-NEXT:    mtfprwz f0, r29
+; CHECK-PWR9-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-PWR9-BE-NEXT:    xxperm v3, vs0, vs1
+; CHECK-PWR9-BE-NEXT:    mtfprwz f0, r0
 ; CHECK-PWR9-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r30
 ; CHECK-PWR9-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT:    vperm v3, v5, v3, v4
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r12
-; CHECK-PWR9-BE-NEXT:    vperm v5, v0, v5, v4
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-PWR9-BE-NEXT:    vmrghh v3, v5, v3
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r7
+; CHECK-PWR9-BE-NEXT:    xxperm v3, vs0, vs1
+; CHECK-PWR9-BE-NEXT:    mtfprwz f0, r11
+; CHECK-PWR9-BE-NEXT:    xxperm v4, vs0, vs1
+; CHECK-PWR9-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-PWR9-BE-NEXT:    xxmrghw vs0, v3, v2
 ; CHECK-PWR9-BE-NEXT:    mtvsrwz v2, r10
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r9
-; CHECK-PWR9-BE-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r8
-; CHECK-PWR9-BE-NEXT:    vperm v3, v5, v3, v4
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r5
+; CHECK-PWR9-BE-NEXT:    xxperm v2, vs2, vs1
+; CHECK-PWR9-BE-NEXT:    mtfprwz f2, r7
+; CHECK-PWR9-BE-NEXT:    xxperm v3, vs2, vs1
+; CHECK-PWR9-BE-NEXT:    mtfprwz f2, r5
 ; CHECK-PWR9-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r6
-; CHECK-PWR9-BE-NEXT:    vperm v3, v5, v3, v4
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-PWR9-BE-NEXT:    vperm v4, v0, v5, v4
+; CHECK-PWR9-BE-NEXT:    xxperm v3, vs2, vs1
+; CHECK-PWR9-BE-NEXT:    mtfprwz f2, r3
+; CHECK-PWR9-BE-NEXT:    xxperm v4, vs2, vs1
 ; CHECK-PWR9-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-PWR9-BE-NEXT:    xxmrghw vs1, v3, v2
 ; CHECK-PWR9-BE-NEXT:    xxmrghd v2, vs1, vs0

diff  --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index c33ab66cd98ca..204b3f1bde8fe 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -19,20 +19,20 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9LE-LABEL: test64:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    add 5, 3, 4
-; P9LE-NEXT:    lxsdx 2, 3, 4
+; P9LE-NEXT:    lfdx 0, 3, 4
 ; P9LE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; P9LE-NEXT:    xxlxor 4, 4, 4
+; P9LE-NEXT:    xxlxor 2, 2, 2
+; P9LE-NEXT:    vspltisw 4, 8
+; P9LE-NEXT:    lxsd 3, 4(5)
 ; P9LE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; P9LE-NEXT:    lxv 3, 0(3)
+; P9LE-NEXT:    vadduwm 4, 4, 4
+; P9LE-NEXT:    lxv 1, 0(3)
 ; P9LE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
 ; P9LE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; P9LE-NEXT:    vperm 2, 2, 4, 3
-; P9LE-NEXT:    lxsd 3, 4(5)
-; P9LE-NEXT:    lxv 4, 0(3)
-; P9LE-NEXT:    vperm 3, 3, 3, 4
-; P9LE-NEXT:    vspltisw 4, 8
+; P9LE-NEXT:    xxperm 2, 0, 1
+; P9LE-NEXT:    lxv 0, 0(3)
+; P9LE-NEXT:    xxperm 3, 3, 0
 ; P9LE-NEXT:    vnegw 3, 3
-; P9LE-NEXT:    vadduwm 4, 4, 4
 ; P9LE-NEXT:    vslw 3, 3, 4
 ; P9LE-NEXT:    vsubuwm 2, 3, 2
 ; P9LE-NEXT:    xxswapd 0, 2
@@ -44,18 +44,18 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-NEXT:    add 5, 3, 4
 ; P9BE-NEXT:    lxsdx 2, 3, 4
 ; P9BE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; P9BE-NEXT:    xxlxor 4, 4, 4
+; P9BE-NEXT:    xxlxor 0, 0, 0
+; P9BE-NEXT:    vspltisw 4, 8
+; P9BE-NEXT:    lxsd 3, 4(5)
 ; P9BE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; P9BE-NEXT:    lxv 3, 0(3)
+; P9BE-NEXT:    vadduwm 4, 4, 4
+; P9BE-NEXT:    lxv 1, 0(3)
 ; P9BE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
 ; P9BE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; P9BE-NEXT:    vperm 2, 4, 2, 3
-; P9BE-NEXT:    lxsd 3, 4(5)
-; P9BE-NEXT:    lxv 4, 0(3)
-; P9BE-NEXT:    vperm 3, 3, 3, 4
-; P9BE-NEXT:    vspltisw 4, 8
+; P9BE-NEXT:    xxperm 2, 0, 1
+; P9BE-NEXT:    lxv 0, 0(3)
+; P9BE-NEXT:    xxperm 3, 3, 0
 ; P9BE-NEXT:    vnegw 3, 3
-; P9BE-NEXT:    vadduwm 4, 4, 4
 ; P9BE-NEXT:    vslw 3, 3, 4
 ; P9BE-NEXT:    vsubuwm 2, 3, 2
 ; P9BE-NEXT:    xxswapd 0, 2
@@ -67,16 +67,16 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX-NEXT:    add 5, 3, 4
 ; P9BE-AIX-NEXT:    lxsdx 2, 3, 4
 ; P9BE-AIX-NEXT:    ld 3, L..C0(2) # %const.0
-; P9BE-AIX-NEXT:    xxlxor 4, 4, 4
-; P9BE-AIX-NEXT:    lxv 3, 0(3)
-; P9BE-AIX-NEXT:    ld 3, L..C1(2) # %const.1
-; P9BE-AIX-NEXT:    vperm 2, 4, 2, 3
-; P9BE-AIX-NEXT:    lxsd 3, 4(5)
-; P9BE-AIX-NEXT:    lxv 4, 0(3)
-; P9BE-AIX-NEXT:    vperm 3, 3, 3, 4
+; P9BE-AIX-NEXT:    xxlxor 1, 1, 1
 ; P9BE-AIX-NEXT:    vspltisw 4, 8
-; P9BE-AIX-NEXT:    vnegw 3, 3
+; P9BE-AIX-NEXT:    lxsd 3, 4(5)
+; P9BE-AIX-NEXT:    lxv 0, 0(3)
+; P9BE-AIX-NEXT:    ld 3, L..C1(2) # %const.1
 ; P9BE-AIX-NEXT:    vadduwm 4, 4, 4
+; P9BE-AIX-NEXT:    xxperm 2, 1, 0
+; P9BE-AIX-NEXT:    lxv 0, 0(3)
+; P9BE-AIX-NEXT:    xxperm 3, 3, 0
+; P9BE-AIX-NEXT:    vnegw 3, 3
 ; P9BE-AIX-NEXT:    vslw 3, 3, 4
 ; P9BE-AIX-NEXT:    vsubuwm 2, 3, 2
 ; P9BE-AIX-NEXT:    xxswapd 0, 2
@@ -86,26 +86,26 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX32-LABEL: test64:
 ; P9BE-AIX32:       # %bb.0: # %entry
 ; P9BE-AIX32-NEXT:    lwzux 4, 3, 4
-; P9BE-AIX32-NEXT:    xxlxor 4, 4, 4
+; P9BE-AIX32-NEXT:    xxlxor 2, 2, 2
+; P9BE-AIX32-NEXT:    vspltisw 4, 8
 ; P9BE-AIX32-NEXT:    stw 4, -48(1)
+; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
 ; P9BE-AIX32-NEXT:    lwz 4, 4(3)
 ; P9BE-AIX32-NEXT:    lxv 0, -48(1)
 ; P9BE-AIX32-NEXT:    stw 4, -32(1)
 ; P9BE-AIX32-NEXT:    lwz 4, L..C0(2) # %const.0
-; P9BE-AIX32-NEXT:    lwz 3, 8(3)
 ; P9BE-AIX32-NEXT:    lxv 1, -32(1)
-; P9BE-AIX32-NEXT:    lxv 3, 0(4)
+; P9BE-AIX32-NEXT:    lwz 3, 8(3)
 ; P9BE-AIX32-NEXT:    stw 3, -16(1)
 ; P9BE-AIX32-NEXT:    lwz 3, L..C1(2) # %const.1
 ; P9BE-AIX32-NEXT:    xxmrghw 2, 0, 1
+; P9BE-AIX32-NEXT:    lxv 0, 0(4)
+; P9BE-AIX32-NEXT:    xxperm 2, 2, 0
 ; P9BE-AIX32-NEXT:    lxv 0, -16(1)
-; P9BE-AIX32-NEXT:    vperm 2, 4, 2, 3
-; P9BE-AIX32-NEXT:    lxv 4, 0(3)
 ; P9BE-AIX32-NEXT:    xxmrghw 3, 1, 0
-; P9BE-AIX32-NEXT:    vperm 3, 3, 3, 4
-; P9BE-AIX32-NEXT:    vspltisw 4, 8
+; P9BE-AIX32-NEXT:    lxv 0, 0(3)
+; P9BE-AIX32-NEXT:    xxperm 3, 3, 0
 ; P9BE-AIX32-NEXT:    vnegw 3, 3
-; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
 ; P9BE-AIX32-NEXT:    vslw 3, 3, 4
 ; P9BE-AIX32-NEXT:    vsubuwm 2, 3, 2
 ; P9BE-AIX32-NEXT:    xxswapd 0, 2
@@ -137,20 +137,21 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9LE-LABEL: test32:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    add 5, 3, 4
-; P9LE-NEXT:    lxsiwzx 2, 3, 4
+; P9LE-NEXT:    lfiwzx 0, 3, 4
 ; P9LE-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
 ; P9LE-NEXT:    xxlxor 3, 3, 3
+; P9LE-NEXT:    vspltisw 4, 8
 ; P9LE-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; P9LE-NEXT:    lxv 4, 0(3)
+; P9LE-NEXT:    lxv 1, 0(3)
 ; P9LE-NEXT:    li 3, 4
-; P9LE-NEXT:    lxsiwzx 5, 5, 3
-; P9LE-NEXT:    vperm 2, 2, 3, 4
-; P9LE-NEXT:    vperm 3, 5, 3, 4
-; P9LE-NEXT:    vspltisw 4, 8
-; P9LE-NEXT:    vnegw 3, 3
+; P9LE-NEXT:    xxlxor 2, 2, 2
 ; P9LE-NEXT:    vadduwm 4, 4, 4
-; P9LE-NEXT:    vslw 3, 3, 4
-; P9LE-NEXT:    vsubuwm 2, 3, 2
+; P9LE-NEXT:    xxperm 3, 0, 1
+; P9LE-NEXT:    lfiwzx 0, 5, 3
+; P9LE-NEXT:    xxperm 2, 0, 1
+; P9LE-NEXT:    vnegw 2, 2
+; P9LE-NEXT:    vslw 2, 2, 4
+; P9LE-NEXT:    vsubuwm 2, 2, 3
 ; P9LE-NEXT:    xxswapd 0, 2
 ; P9LE-NEXT:    stxv 0, 0(3)
 ; P9LE-NEXT:    blr
@@ -158,20 +159,21 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-LABEL: test32:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    add 5, 3, 4
-; P9BE-NEXT:    lxsiwzx 2, 3, 4
+; P9BE-NEXT:    lfiwzx 0, 3, 4
 ; P9BE-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
 ; P9BE-NEXT:    xxlxor 3, 3, 3
+; P9BE-NEXT:    vspltisw 4, 8
 ; P9BE-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; P9BE-NEXT:    lxv 4, 0(3)
+; P9BE-NEXT:    lxv 1, 0(3)
 ; P9BE-NEXT:    li 3, 4
-; P9BE-NEXT:    lxsiwzx 5, 5, 3
-; P9BE-NEXT:    vperm 2, 3, 2, 4
-; P9BE-NEXT:    vperm 3, 3, 5, 4
-; P9BE-NEXT:    vspltisw 4, 8
-; P9BE-NEXT:    vnegw 3, 3
+; P9BE-NEXT:    xxlxor 2, 2, 2
 ; P9BE-NEXT:    vadduwm 4, 4, 4
-; P9BE-NEXT:    vslw 3, 3, 4
-; P9BE-NEXT:    vsubuwm 2, 3, 2
+; P9BE-NEXT:    xxperm 3, 0, 1
+; P9BE-NEXT:    lfiwzx 0, 5, 3
+; P9BE-NEXT:    xxperm 2, 0, 1
+; P9BE-NEXT:    vnegw 2, 2
+; P9BE-NEXT:    vslw 2, 2, 4
+; P9BE-NEXT:    vsubuwm 2, 2, 3
 ; P9BE-NEXT:    xxswapd 0, 2
 ; P9BE-NEXT:    stxv 0, 0(3)
 ; P9BE-NEXT:    blr
@@ -179,19 +181,20 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX-LABEL: test32:
 ; P9BE-AIX:       # %bb.0: # %entry
 ; P9BE-AIX-NEXT:    add 5, 3, 4
-; P9BE-AIX-NEXT:    lxsiwzx 2, 3, 4
+; P9BE-AIX-NEXT:    lfiwzx 0, 3, 4
 ; P9BE-AIX-NEXT:    ld 3, L..C2(2) # %const.0
 ; P9BE-AIX-NEXT:    xxlxor 3, 3, 3
-; P9BE-AIX-NEXT:    lxv 4, 0(3)
-; P9BE-AIX-NEXT:    li 3, 4
-; P9BE-AIX-NEXT:    lxsiwzx 5, 5, 3
-; P9BE-AIX-NEXT:    vperm 2, 3, 2, 4
-; P9BE-AIX-NEXT:    vperm 3, 3, 5, 4
+; P9BE-AIX-NEXT:    xxlxor 2, 2, 2
 ; P9BE-AIX-NEXT:    vspltisw 4, 8
-; P9BE-AIX-NEXT:    vnegw 3, 3
 ; P9BE-AIX-NEXT:    vadduwm 4, 4, 4
-; P9BE-AIX-NEXT:    vslw 3, 3, 4
-; P9BE-AIX-NEXT:    vsubuwm 2, 3, 2
+; P9BE-AIX-NEXT:    lxv 1, 0(3)
+; P9BE-AIX-NEXT:    li 3, 4
+; P9BE-AIX-NEXT:    xxperm 3, 0, 1
+; P9BE-AIX-NEXT:    lfiwzx 0, 5, 3
+; P9BE-AIX-NEXT:    xxperm 2, 0, 1
+; P9BE-AIX-NEXT:    vnegw 2, 2
+; P9BE-AIX-NEXT:    vslw 2, 2, 4
+; P9BE-AIX-NEXT:    vsubuwm 2, 2, 3
 ; P9BE-AIX-NEXT:    xxswapd 0, 2
 ; P9BE-AIX-NEXT:    stxv 0, 0(3)
 ; P9BE-AIX-NEXT:    blr
@@ -199,19 +202,20 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX32-LABEL: test32:
 ; P9BE-AIX32:       # %bb.0: # %entry
 ; P9BE-AIX32-NEXT:    add 5, 3, 4
-; P9BE-AIX32-NEXT:    lxsiwzx 2, 3, 4
+; P9BE-AIX32-NEXT:    lfiwzx 0, 3, 4
 ; P9BE-AIX32-NEXT:    lwz 3, L..C2(2) # %const.0
 ; P9BE-AIX32-NEXT:    xxlxor 3, 3, 3
-; P9BE-AIX32-NEXT:    lxv 4, 0(3)
-; P9BE-AIX32-NEXT:    li 3, 4
-; P9BE-AIX32-NEXT:    lxsiwzx 5, 5, 3
-; P9BE-AIX32-NEXT:    vperm 2, 3, 2, 4
-; P9BE-AIX32-NEXT:    vperm 3, 3, 5, 4
+; P9BE-AIX32-NEXT:    xxlxor 2, 2, 2
 ; P9BE-AIX32-NEXT:    vspltisw 4, 8
-; P9BE-AIX32-NEXT:    vnegw 3, 3
 ; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
-; P9BE-AIX32-NEXT:    vslw 3, 3, 4
-; P9BE-AIX32-NEXT:    vsubuwm 2, 3, 2
+; P9BE-AIX32-NEXT:    lxv 1, 0(3)
+; P9BE-AIX32-NEXT:    li 3, 4
+; P9BE-AIX32-NEXT:    xxperm 3, 0, 1
+; P9BE-AIX32-NEXT:    lfiwzx 0, 5, 3
+; P9BE-AIX32-NEXT:    xxperm 2, 0, 1
+; P9BE-AIX32-NEXT:    vnegw 2, 2
+; P9BE-AIX32-NEXT:    vslw 2, 2, 4
+; P9BE-AIX32-NEXT:    vsubuwm 2, 2, 3
 ; P9BE-AIX32-NEXT:    xxswapd 0, 2
 ; P9BE-AIX32-NEXT:    stxv 0, 0(3)
 ; P9BE-AIX32-NEXT:    blr
@@ -249,15 +253,15 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9LE-NEXT:    li 6, 0
 ; P9LE-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
 ; P9LE-NEXT:    mtvsrd 3, 6
+; P9LE-NEXT:    lxv 0, 0(3)
+; P9LE-NEXT:    li 3, 0
 ; P9LE-NEXT:    vmrghh 4, 3, 4
 ; P9LE-NEXT:    vmrghh 2, 3, 2
 ; P9LE-NEXT:    vsplth 3, 3, 3
 ; P9LE-NEXT:    xxmrglw 3, 4, 3
-; P9LE-NEXT:    lxv 4, 0(3)
-; P9LE-NEXT:    li 3, 0
-; P9LE-NEXT:    vperm 2, 2, 3, 4
-; P9LE-NEXT:    xxspltw 3, 2, 2
-; P9LE-NEXT:    vadduwm 2, 2, 3
+; P9LE-NEXT:    xxperm 3, 2, 0
+; P9LE-NEXT:    xxspltw 2, 3, 2
+; P9LE-NEXT:    vadduwm 2, 3, 2
 ; P9LE-NEXT:    vextuwrx 3, 3, 2
 ; P9LE-NEXT:    cmpw 3, 5
 ; P9LE-NEXT:    bgelr+ 0
@@ -268,24 +272,25 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9BE-NEXT:    sldi 4, 4, 1
 ; P9BE-NEXT:    li 7, 16
 ; P9BE-NEXT:    add 6, 3, 4
-; P9BE-NEXT:    lxsihzx 5, 3, 4
-; P9BE-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
-; P9BE-NEXT:    lxsihzx 2, 6, 7
+; P9BE-NEXT:    lxsihzx 0, 6, 7
 ; P9BE-NEXT:    addis 6, 2, .LCPI2_0 at toc@ha
-; P9BE-NEXT:    addi 3, 3, .LCPI2_1 at toc@l
 ; P9BE-NEXT:    addi 6, 6, .LCPI2_0 at toc@l
-; P9BE-NEXT:    lxv 3, 0(6)
+; P9BE-NEXT:    lxv 1, 0(6)
 ; P9BE-NEXT:    li 6, 0
-; P9BE-NEXT:    mtvsrwz 4, 6
-; P9BE-NEXT:    vperm 2, 4, 2, 3
-; P9BE-NEXT:    vperm 3, 4, 5, 3
-; P9BE-NEXT:    vsplth 4, 4, 3
-; P9BE-NEXT:    xxmrghw 3, 4, 3
-; P9BE-NEXT:    lxv 4, 0(3)
+; P9BE-NEXT:    mtvsrwz 2, 6
+; P9BE-NEXT:    vmr 3, 2
+; P9BE-NEXT:    vsplth 4, 2, 3
+; P9BE-NEXT:    xxperm 3, 0, 1
+; P9BE-NEXT:    lxsihzx 0, 3, 4
+; P9BE-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
+; P9BE-NEXT:    addi 3, 3, .LCPI2_1 at toc@l
+; P9BE-NEXT:    xxperm 2, 0, 1
+; P9BE-NEXT:    lxv 1, 0(3)
 ; P9BE-NEXT:    li 3, 0
-; P9BE-NEXT:    vperm 2, 3, 2, 4
-; P9BE-NEXT:    xxspltw 3, 2, 1
-; P9BE-NEXT:    vadduwm 2, 2, 3
+; P9BE-NEXT:    xxmrghw 0, 4, 2
+; P9BE-NEXT:    xxperm 3, 0, 1
+; P9BE-NEXT:    xxspltw 2, 3, 1
+; P9BE-NEXT:    vadduwm 2, 3, 2
 ; P9BE-NEXT:    vextuwlx 3, 3, 2
 ; P9BE-NEXT:    cmpw 3, 5
 ; P9BE-NEXT:    bgelr+ 0
@@ -296,22 +301,23 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9BE-AIX-NEXT:    sldi 4, 4, 1
 ; P9BE-AIX-NEXT:    li 7, 16
 ; P9BE-AIX-NEXT:    add 6, 3, 4
-; P9BE-AIX-NEXT:    lxsihzx 5, 3, 4
-; P9BE-AIX-NEXT:    ld 3, L..C3(2) # %const.1
-; P9BE-AIX-NEXT:    lxsihzx 2, 6, 7
-; P9BE-AIX-NEXT:    ld 6, L..C4(2) # %const.0
-; P9BE-AIX-NEXT:    lxv 3, 0(6)
+; P9BE-AIX-NEXT:    lxsihzx 0, 6, 7
+; P9BE-AIX-NEXT:    ld 6, L..C3(2) # %const.0
+; P9BE-AIX-NEXT:    lxv 1, 0(6)
 ; P9BE-AIX-NEXT:    li 6, 0
-; P9BE-AIX-NEXT:    mtvsrwz 4, 6
-; P9BE-AIX-NEXT:    vperm 2, 4, 2, 3
-; P9BE-AIX-NEXT:    vperm 3, 4, 5, 3
-; P9BE-AIX-NEXT:    vsplth 4, 4, 3
-; P9BE-AIX-NEXT:    xxmrghw 3, 4, 3
-; P9BE-AIX-NEXT:    lxv 4, 0(3)
+; P9BE-AIX-NEXT:    mtvsrwz 2, 6
+; P9BE-AIX-NEXT:    vmr 3, 2
+; P9BE-AIX-NEXT:    vsplth 4, 2, 3
+; P9BE-AIX-NEXT:    xxperm 3, 0, 1
+; P9BE-AIX-NEXT:    lxsihzx 0, 3, 4
+; P9BE-AIX-NEXT:    ld 3, L..C4(2) # %const.1
+; P9BE-AIX-NEXT:    xxperm 2, 0, 1
+; P9BE-AIX-NEXT:    lxv 1, 0(3)
 ; P9BE-AIX-NEXT:    li 3, 0
-; P9BE-AIX-NEXT:    vperm 2, 3, 2, 4
-; P9BE-AIX-NEXT:    xxspltw 3, 2, 1
-; P9BE-AIX-NEXT:    vadduwm 2, 2, 3
+; P9BE-AIX-NEXT:    xxmrghw 0, 4, 2
+; P9BE-AIX-NEXT:    xxperm 3, 0, 1
+; P9BE-AIX-NEXT:    xxspltw 2, 3, 1
+; P9BE-AIX-NEXT:    vadduwm 2, 3, 2
 ; P9BE-AIX-NEXT:    vextuwlx 3, 3, 2
 ; P9BE-AIX-NEXT:    cmpw 3, 5
 ; P9BE-AIX-NEXT:    bgelr+ 0
@@ -331,13 +337,13 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9BE-AIX32-NEXT:    lwz 3, L..C3(2) # %const.0
 ; P9BE-AIX32-NEXT:    lxv 3, -32(1)
 ; P9BE-AIX32-NEXT:    vmrghh 4, 2, 4
+; P9BE-AIX32-NEXT:    lxv 0, 0(3)
 ; P9BE-AIX32-NEXT:    vmrghh 3, 2, 3
 ; P9BE-AIX32-NEXT:    vsplth 2, 2, 0
 ; P9BE-AIX32-NEXT:    xxmrghw 2, 2, 4
-; P9BE-AIX32-NEXT:    lxv 4, 0(3)
-; P9BE-AIX32-NEXT:    vperm 2, 2, 3, 4
-; P9BE-AIX32-NEXT:    xxspltw 3, 2, 1
-; P9BE-AIX32-NEXT:    vadduwm 2, 2, 3
+; P9BE-AIX32-NEXT:    xxperm 3, 2, 0
+; P9BE-AIX32-NEXT:    xxspltw 2, 3, 1
+; P9BE-AIX32-NEXT:    vadduwm 2, 3, 2
 ; P9BE-AIX32-NEXT:    stxv 2, -16(1)
 ; P9BE-AIX32-NEXT:    lwz 3, -16(1)
 ; P9BE-AIX32-NEXT:    cmpw 3, 5
@@ -389,13 +395,13 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9LE-NEXT:    vmrghb 2, 3, 2
 ; P9LE-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
 ; P9LE-NEXT:    vmrglh 2, 2, 4
+; P9LE-NEXT:    lxv 1, 0(3)
+; P9LE-NEXT:    li 3, 0
 ; P9LE-NEXT:    vmrghb 3, 3, 5
 ; P9LE-NEXT:    xxmrglw 2, 2, 4
 ; P9LE-NEXT:    vmrglh 3, 3, 4
-; P9LE-NEXT:    xxmrglw 3, 4, 3
-; P9LE-NEXT:    lxv 4, 0(3)
-; P9LE-NEXT:    li 3, 0
-; P9LE-NEXT:    vperm 2, 3, 2, 4
+; P9LE-NEXT:    xxmrglw 0, 4, 3
+; P9LE-NEXT:    xxperm 2, 0, 1
 ; P9LE-NEXT:    xxspltw 3, 2, 2
 ; P9LE-NEXT:    vadduwm 2, 2, 3
 ; P9LE-NEXT:    vextuwrx 3, 3, 2
@@ -407,24 +413,27 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    add 6, 3, 4
 ; P9BE-NEXT:    li 7, 8
-; P9BE-NEXT:    lxsibzx 5, 3, 4
-; P9BE-NEXT:    addis 3, 2, .LCPI3_1 at toc@ha
-; P9BE-NEXT:    lxsibzx 2, 6, 7
+; P9BE-NEXT:    lxsibzx 0, 6, 7
 ; P9BE-NEXT:    addis 6, 2, .LCPI3_0 at toc@ha
-; P9BE-NEXT:    addi 3, 3, .LCPI3_1 at toc@l
 ; P9BE-NEXT:    addi 6, 6, .LCPI3_0 at toc@l
-; P9BE-NEXT:    lxv 3, 0(6)
+; P9BE-NEXT:    lxv 1, 0(6)
 ; P9BE-NEXT:    li 6, 0
-; P9BE-NEXT:    mtvsrwz 4, 6
-; P9BE-NEXT:    vperm 2, 4, 2, 3
-; P9BE-NEXT:    vperm 3, 4, 5, 3
-; P9BE-NEXT:    vspltb 4, 4, 7
-; P9BE-NEXT:    vmrghh 3, 3, 4
-; P9BE-NEXT:    xxspltw 4, 4, 0
-; P9BE-NEXT:    xxmrghw 2, 3, 2
-; P9BE-NEXT:    lxv 3, 0(3)
+; P9BE-NEXT:    mtvsrwz 2, 6
+; P9BE-NEXT:    vspltb 3, 2, 7
+; P9BE-NEXT:    xxperm 0, 2, 1
+; P9BE-NEXT:    lxsibzx 1, 3, 4
+; P9BE-NEXT:    addis 3, 2, .LCPI3_1 at toc@ha
+; P9BE-NEXT:    addi 3, 3, .LCPI3_1 at toc@l
+; P9BE-NEXT:    lxv 2, 0(3)
+; P9BE-NEXT:    addis 3, 2, .LCPI3_2 at toc@ha
+; P9BE-NEXT:    addi 3, 3, .LCPI3_2 at toc@l
+; P9BE-NEXT:    xxperm 2, 1, 2
+; P9BE-NEXT:    xxspltw 1, 3, 0
+; P9BE-NEXT:    vmrghh 2, 2, 3
+; P9BE-NEXT:    xxmrghw 2, 2, 0
+; P9BE-NEXT:    lxv 0, 0(3)
 ; P9BE-NEXT:    li 3, 0
-; P9BE-NEXT:    vperm 2, 4, 2, 3
+; P9BE-NEXT:    xxperm 2, 1, 0
 ; P9BE-NEXT:    xxspltw 3, 2, 1
 ; P9BE-NEXT:    vadduwm 2, 2, 3
 ; P9BE-NEXT:    vextuwlx 3, 3, 2
@@ -436,22 +445,24 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE-AIX:       # %bb.0: # %entry
 ; P9BE-AIX-NEXT:    add 6, 3, 4
 ; P9BE-AIX-NEXT:    li 7, 8
-; P9BE-AIX-NEXT:    lxsibzx 5, 3, 4
-; P9BE-AIX-NEXT:    ld 3, L..C5(2) # %const.1
-; P9BE-AIX-NEXT:    lxsibzx 2, 6, 7
-; P9BE-AIX-NEXT:    ld 6, L..C6(2) # %const.0
-; P9BE-AIX-NEXT:    lxv 3, 0(6)
+; P9BE-AIX-NEXT:    lxsibzx 0, 6, 7
+; P9BE-AIX-NEXT:    ld 6, L..C5(2) # %const.0
+; P9BE-AIX-NEXT:    lxv 1, 0(6)
 ; P9BE-AIX-NEXT:    li 6, 0
-; P9BE-AIX-NEXT:    mtvsrwz 4, 6
-; P9BE-AIX-NEXT:    vperm 2, 4, 2, 3
-; P9BE-AIX-NEXT:    vperm 3, 4, 5, 3
-; P9BE-AIX-NEXT:    vspltb 4, 4, 7
-; P9BE-AIX-NEXT:    vmrghh 3, 3, 4
-; P9BE-AIX-NEXT:    xxspltw 4, 4, 0
-; P9BE-AIX-NEXT:    xxmrghw 2, 3, 2
-; P9BE-AIX-NEXT:    lxv 3, 0(3)
+; P9BE-AIX-NEXT:    mtvsrwz 2, 6
+; P9BE-AIX-NEXT:    vspltb 3, 2, 7
+; P9BE-AIX-NEXT:    xxperm 0, 2, 1
+; P9BE-AIX-NEXT:    lxsibzx 1, 3, 4
+; P9BE-AIX-NEXT:    ld 3, L..C6(2) # %const.1
+; P9BE-AIX-NEXT:    lxv 2, 0(3)
+; P9BE-AIX-NEXT:    ld 3, L..C7(2) # %const.2
+; P9BE-AIX-NEXT:    xxperm 2, 1, 2
+; P9BE-AIX-NEXT:    xxspltw 1, 3, 0
+; P9BE-AIX-NEXT:    vmrghh 2, 2, 3
+; P9BE-AIX-NEXT:    xxmrghw 2, 2, 0
+; P9BE-AIX-NEXT:    lxv 0, 0(3)
 ; P9BE-AIX-NEXT:    li 3, 0
-; P9BE-AIX-NEXT:    vperm 2, 4, 2, 3
+; P9BE-AIX-NEXT:    xxperm 2, 1, 0
 ; P9BE-AIX-NEXT:    xxspltw 3, 2, 1
 ; P9BE-AIX-NEXT:    vadduwm 2, 2, 3
 ; P9BE-AIX-NEXT:    vextuwlx 3, 3, 2
@@ -463,21 +474,23 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE-AIX32:       # %bb.0: # %entry
 ; P9BE-AIX32-NEXT:    add 6, 3, 4
 ; P9BE-AIX32-NEXT:    li 7, 8
-; P9BE-AIX32-NEXT:    lxsibzx 5, 3, 4
-; P9BE-AIX32-NEXT:    lwz 3, L..C4(2) # %const.1
-; P9BE-AIX32-NEXT:    lxsibzx 2, 6, 7
-; P9BE-AIX32-NEXT:    lwz 6, L..C5(2) # %const.0
-; P9BE-AIX32-NEXT:    lxv 3, 0(6)
+; P9BE-AIX32-NEXT:    lxsibzx 0, 6, 7
+; P9BE-AIX32-NEXT:    lwz 6, L..C4(2) # %const.0
+; P9BE-AIX32-NEXT:    lxv 1, 0(6)
 ; P9BE-AIX32-NEXT:    li 6, 0
-; P9BE-AIX32-NEXT:    mtvsrwz 4, 6
-; P9BE-AIX32-NEXT:    vperm 2, 4, 2, 3
-; P9BE-AIX32-NEXT:    vperm 3, 4, 5, 3
-; P9BE-AIX32-NEXT:    vspltb 4, 4, 7
-; P9BE-AIX32-NEXT:    vmrghh 3, 3, 4
-; P9BE-AIX32-NEXT:    xxspltw 4, 4, 0
-; P9BE-AIX32-NEXT:    xxmrghw 2, 3, 2
-; P9BE-AIX32-NEXT:    lxv 3, 0(3)
-; P9BE-AIX32-NEXT:    vperm 2, 4, 2, 3
+; P9BE-AIX32-NEXT:    mtvsrwz 2, 6
+; P9BE-AIX32-NEXT:    vspltb 3, 2, 7
+; P9BE-AIX32-NEXT:    xxperm 0, 2, 1
+; P9BE-AIX32-NEXT:    lxsibzx 1, 3, 4
+; P9BE-AIX32-NEXT:    lwz 3, L..C5(2) # %const.1
+; P9BE-AIX32-NEXT:    lxv 2, 0(3)
+; P9BE-AIX32-NEXT:    lwz 3, L..C6(2) # %const.2
+; P9BE-AIX32-NEXT:    xxperm 2, 1, 2
+; P9BE-AIX32-NEXT:    xxspltw 1, 3, 0
+; P9BE-AIX32-NEXT:    vmrghh 2, 2, 3
+; P9BE-AIX32-NEXT:    xxmrghw 2, 2, 0
+; P9BE-AIX32-NEXT:    lxv 0, 0(3)
+; P9BE-AIX32-NEXT:    xxperm 2, 1, 0
 ; P9BE-AIX32-NEXT:    xxspltw 3, 2, 1
 ; P9BE-AIX32-NEXT:    vadduwm 2, 2, 3
 ; P9BE-AIX32-NEXT:    stxv 2, -16(1)

diff  --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
index a3719331c589b..558ab57e1ecb0 100644
--- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
@@ -81,7 +81,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, -124
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -21386
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    ori r4, r4, 37253
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -94,15 +94,15 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -16728
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; P9BE-NEXT:    ori r4, r4, 63249
 ; P9BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    srwi r5, r4, 31
 ; P9BE-NEXT:    srawi r4, r4, 8
@@ -110,7 +110,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, -1003
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 21399
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    ori r4, r4, 33437
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -121,9 +121,9 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 98
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v2, r3
-; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    xxmrghw v2, v3, v2
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    xxmrghw v2, vs0, vs1
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_srem_vec_1:
@@ -309,7 +309,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    add r5, r5, r6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
@@ -320,14 +320,14 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    add r5, r5, r6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhw r5, r3, r4
 ; P9BE-NEXT:    add r5, r5, r3
 ; P9BE-NEXT:    srwi r6, r5, 31
@@ -335,7 +335,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    add r5, r5, r6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
@@ -346,9 +346,9 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v2, r3
-; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    xxmrghw v2, v2, v3
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_srem_vec_2:
@@ -536,7 +536,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r6
 ; P9BE-NEXT:    mulli r6, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r6
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r6, r3
@@ -547,14 +547,14 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    add r6, r6, r7
 ; P9BE-NEXT:    mulli r7, r6, 95
 ; P9BE-NEXT:    sub r3, r3, r7
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r7, r3
-; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhw r8, r7, r5
 ; P9BE-NEXT:    add r7, r8, r7
 ; P9BE-NEXT:    srwi r8, r7, 31
@@ -562,7 +562,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    add r7, r7, r8
 ; P9BE-NEXT:    mulli r8, r7, 95
 ; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
@@ -572,17 +572,17 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    srawi r5, r5, 6
 ; P9BE-NEXT:    add r5, r5, r8
 ; P9BE-NEXT:    mulli r8, r5, 95
-; P9BE-NEXT:    mtvsrwz v0, r5
 ; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    mtvsrwz v2, r3
-; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    mtvsrwz v4, r6
-; P9BE-NEXT:    xxmrghw v2, v2, v3
-; P9BE-NEXT:    mtvsrwz v3, r4
-; P9BE-NEXT:    vperm v3, v4, v3, v5
-; P9BE-NEXT:    mtvsrwz v4, r7
-; P9BE-NEXT:    vperm v4, v0, v4, v5
-; P9BE-NEXT:    xxmrghw v3, v4, v3
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    mtfprwz f3, r5
+; P9BE-NEXT:    xxmrghw v2, vs1, vs0
+; P9BE-NEXT:    mtfprwz f0, r4
+; P9BE-NEXT:    mtfprwz f1, r6
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
+; P9BE-NEXT:    mtfprwz f1, r7
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    xxmrghw v3, vs1, vs0
 ; P9BE-NEXT:    vadduhm v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -768,7 +768,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    addze r4, r4
 ; P9BE-NEXT:    slwi r4, r4, 5
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
@@ -777,15 +777,15 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    slwi r4, r4, 6
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -21386
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; P9BE-NEXT:    ori r4, r4, 37253
 ; P9BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    add r4, r4, r3
 ; P9BE-NEXT:    srwi r5, r4, 31
@@ -793,7 +793,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
@@ -801,9 +801,9 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    addze r4, r4
 ; P9BE-NEXT:    slwi r4, r4, 3
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v2, r3
-; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    xxmrghw v2, v3, v2
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    xxmrghw v2, vs0, vs1
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_srem_power_of_two:
@@ -959,7 +959,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 23
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 24749
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    ori r4, r4, 47143
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -971,15 +971,15 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 5423
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -14230
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; P9BE-NEXT:    ori r4, r4, 30865
 ; P9BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    vperm v3, v3, v4, v5
+; P9BE-NEXT:    xxperm vs1, vs0, vs2
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    add r4, r4, r3
 ; P9BE-NEXT:    srwi r5, r4, 31
@@ -987,11 +987,11 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 654
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    mtvsrwz v4, r3
-; P9BE-NEXT:    vperm v2, v4, v2, v5
-; P9BE-NEXT:    xxmrghw v2, v2, v3
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs0, vs3, vs2
+; P9BE-NEXT:    xxmrghw v2, vs0, vs1
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_srem_one:
@@ -1150,7 +1150,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 23
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 24749
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    ori r4, r4, 47143
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -1161,23 +1161,23 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 5423
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    vperm v3, v3, v4, v5
+; P9BE-NEXT:    xxperm vs1, vs0, vs2
 ; P9BE-NEXT:    srawi r4, r3, 15
 ; P9BE-NEXT:    addze r4, r4
 ; P9BE-NEXT:    slwi r4, r4, 15
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    mtvsrwz v4, r3
-; P9BE-NEXT:    vperm v2, v4, v2, v5
-; P9BE-NEXT:    xxmrghw v2, v2, v3
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs0, vs3, vs2
+; P9BE-NEXT:    xxmrghw v2, vs0, vs1
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_urem_i16_smax:

diff  --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
index 87e6ae20ff2bc..0dd9fc6bd5b1b 100644
--- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
@@ -74,7 +74,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 1003
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 21399
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    ori r4, r4, 33437
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -83,21 +83,21 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r4, r4, 5
 ; P9BE-NEXT:    mulli r4, r4, 98
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r4, r3, 16
 ; P9BE-NEXT:    rlwinm r3, r3, 30, 18, 31
-; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhwu r3, r3, r5
 ; P9BE-NEXT:    srwi r3, r3, 2
 ; P9BE-NEXT:    mulli r3, r3, 124
 ; P9BE-NEXT:    sub r3, r4, r3
 ; P9BE-NEXT:    lis r4, 22765
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    ori r4, r4, 8969
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -109,9 +109,9 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r4, r4, 6
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v2, r3
-; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    xxmrghw v2, v2, v3
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_urem_vec_1:
@@ -285,7 +285,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r5, r5, 6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
@@ -296,14 +296,14 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r5, r5, 6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhwu r5, r3, r4
 ; P9BE-NEXT:    sub r6, r3, r5
 ; P9BE-NEXT:    srwi r6, r6, 1
@@ -311,7 +311,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r5, r5, 6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
@@ -322,9 +322,9 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r4, r4, 6
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v2, r3
-; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    xxmrghw v2, v2, v3
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_urem_vec_2:
@@ -512,7 +512,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r4, r4, 6
 ; P9BE-NEXT:    mulli r6, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r6
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r6, r3, 16
@@ -523,14 +523,14 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r6, r6, 6
 ; P9BE-NEXT:    mulli r7, r6, 95
 ; P9BE-NEXT:    sub r3, r3, r7
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r7, r3, 16
-; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhwu r8, r7, r5
 ; P9BE-NEXT:    sub r7, r7, r8
 ; P9BE-NEXT:    srwi r7, r7, 1
@@ -538,7 +538,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r7, r7, 6
 ; P9BE-NEXT:    mulli r8, r7, 95
 ; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
@@ -548,17 +548,17 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    add r5, r8, r5
 ; P9BE-NEXT:    srwi r5, r5, 6
 ; P9BE-NEXT:    mulli r8, r5, 95
-; P9BE-NEXT:    mtvsrwz v0, r5
 ; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    mtvsrwz v2, r3
-; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    mtvsrwz v4, r6
-; P9BE-NEXT:    xxmrghw v2, v2, v3
-; P9BE-NEXT:    mtvsrwz v3, r4
-; P9BE-NEXT:    vperm v3, v4, v3, v5
-; P9BE-NEXT:    mtvsrwz v4, r7
-; P9BE-NEXT:    vperm v4, v0, v4, v5
-; P9BE-NEXT:    xxmrghw v3, v4, v3
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    mtfprwz f3, r5
+; P9BE-NEXT:    xxmrghw v2, vs1, vs0
+; P9BE-NEXT:    mtfprwz f0, r4
+; P9BE-NEXT:    mtfprwz f1, r6
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
+; P9BE-NEXT:    mtfprwz f1, r7
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    xxmrghw v3, vs1, vs0
 ; P9BE-NEXT:    vadduhm v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -730,18 +730,18 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 8969
 ; P9BE-NEXT:    clrlwi r3, r3, 27
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 26
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    sub r5, r3, r4
 ; P9BE-NEXT:    srwi r5, r5, 1
@@ -749,13 +749,13 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r4, r4, 6
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 29
-; P9BE-NEXT:    mtvsrwz v2, r3
-; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    xxmrghw v2, v3, v2
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    xxmrghw v2, vs0, vs1
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_urem_power_of_two:
@@ -879,7 +879,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 5423
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -19946
-; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    ori r4, r4, 17097
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -888,24 +888,24 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    srwi r4, r4, 4
 ; P9BE-NEXT:    mulli r4, r4, 23
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; P9BE-NEXT:    lxv v5, 0(r3)
+; P9BE-NEXT:    lxv vs2, 0(r3)
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r4, r3, 16
 ; P9BE-NEXT:    rlwinm r3, r3, 31, 17, 31
-; P9BE-NEXT:    vperm v3, v4, v3, v5
+; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhwu r3, r3, r5
 ; P9BE-NEXT:    srwi r3, r3, 8
 ; P9BE-NEXT:    mulli r3, r3, 654
 ; P9BE-NEXT:    sub r3, r4, r3
-; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    mtvsrwz v4, r3
-; P9BE-NEXT:    vperm v2, v4, v2, v5
-; P9BE-NEXT:    xxmrghw v2, v2, v3
+; P9BE-NEXT:    mtfprwz f3, r3
+; P9BE-NEXT:    xxperm vs1, vs3, vs2
+; P9BE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_urem_one:

diff  --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 8db66f37a97b9..50b4a079d151d 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -57,11 +57,11 @@ define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-BE-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxsibzx v3, 0, r4
-; CHECK-BE-P9-NEXT:    lxsibzx v4, 0, r3
+; CHECK-BE-P9-NEXT:    lxsibzx v2, 0, r4
+; CHECK-BE-P9-NEXT:    lxsibzx f1, 0, r3
 ; CHECK-BE-P9-NEXT:    addi r5, r5, .LCPI0_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-BE-P9-NEXT:    lxv vs0, 0(r5)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8:
@@ -78,10 +78,10 @@ define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    ld r5, L..C0(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxsibzx v3, 0, r4
-; CHECK-AIX-64-P9-NEXT:    lxsibzx v4, 0, r3
-; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r5)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-AIX-64-P9-NEXT:    lxsibzx v2, 0, r4
+; CHECK-AIX-64-P9-NEXT:    lxsibzx f1, 0, r3
+; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r5)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8:
@@ -98,10 +98,10 @@ define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-32-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lwz r5, L..C0(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsibzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxsibzx v4, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lxv v2, 0(r5)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxsibzx f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r5)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <1 x i8>, ptr %a, align 4
@@ -184,23 +184,22 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_none_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_none_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_none_v16i8:
@@ -215,12 +214,12 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-BE-P9-LABEL: test_none_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v16i8:
@@ -234,11 +233,11 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C1(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v16i8:
@@ -432,23 +431,22 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_none_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_none_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_none_v8i16:
@@ -463,12 +461,12 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-BE-P9-LABEL: test_none_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
@@ -482,11 +480,11 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
@@ -845,9 +843,9 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
 ; CHECK-LE-P9-NEXT:    lxsibzx v3, 0, r4
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    vspltb v3, v3, 7
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    xxperm v2, v3, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_none:
@@ -864,13 +862,13 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxsibzx v3, 0, r4
+; CHECK-BE-P9-NEXT:    lxsibzx v2, 0, r4
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vspltb v3, v3, 7
-; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    vspltb v2, v2, 7
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
@@ -886,12 +884,12 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxsibzx v3, 0, r4
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vspltb v3, v3, 7
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT:    lxsibzx v2, 0, r4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    vspltb v2, v2, 7
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
@@ -907,12 +905,12 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C1(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsibzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vspltb v3, v3, 7
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    vspltb v2, v2, 7
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <4 x i8>, ptr %a, align 4
@@ -1102,11 +1100,11 @@ define dso_local <16 x i8> @test_1_2(ptr nocapture noundef readonly %a, ptr noca
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsibzx v2, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI13_0 at toc@ha
-; CHECK-LE-P9-NEXT:    lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI13_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-LE-P9-NEXT:    vspltb v2, v2, 7
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_1_2:
@@ -1158,10 +1156,10 @@ define dso_local <16 x i8> @test_1_2(ptr nocapture noundef readonly %a, ptr noca
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C2(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vspltb v2, v2, 7
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT:    vspltb v3, v2, 7
+; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, v3, vs0
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <1 x i8>, ptr %a, align 4
@@ -1194,11 +1192,11 @@ define <16 x i8> @test_none_v2i64(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsibzx v2, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI14_0 at toc@ha
-; CHECK-LE-P9-NEXT:    lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI14_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-LE-P9-NEXT:    vspltb v2, v2, 7
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_none_v2i64:
@@ -1250,10 +1248,10 @@ define <16 x i8> @test_none_v2i64(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C3(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vspltb v2, v2, 7
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT:    vspltb v3, v2, 7
+; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, v3, vs0
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <1 x i8>, ptr %a, align 4
@@ -1288,9 +1286,9 @@ define <16 x i8> @test_v2i64_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI15_0 at toc@ha
 ; CHECK-LE-P9-NEXT:    lxsibzx v3, 0, r4
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI15_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    vspltb v3, v3, 7
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    xxperm v2, v3, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_none:
@@ -1385,11 +1383,11 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
 ; CHECK-BE-P9-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    addis r5, r2, .LCPI16_0 at toc@ha
-; CHECK-BE-P9-NEXT:    mtvsrwz v3, r4
-; CHECK-BE-P9-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-P9-NEXT:    addi r5, r5, .LCPI16_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-BE-P9-NEXT:    lxv vs0, 0(r5)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs:
@@ -1404,10 +1402,10 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    ld r5, L..C4(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r4
-; CHECK-AIX-64-P9-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r5)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r3
+; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r5)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16rhs:
@@ -1803,11 +1801,11 @@ define <16 x i8> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lxvwsx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C4(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <8 x i8>, ptr %a, align 4
@@ -1998,9 +1996,9 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI24_0 at toc@l
 ; CHECK-LE-P9-NEXT:    xxswapd v2, f0
 ; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
 ; CHECK-LE-P9-NEXT:    xxswapd v3, f0
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v2, v3, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
@@ -2018,11 +2016,11 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI24_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxsd v3, 0(r4)
+; CHECK-BE-P9-NEXT:    lxsd v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI24_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    xxsldwi v2, f0, f0, 1
-; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
@@ -2039,10 +2037,10 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r4)
-; CHECK-AIX-64-P9-NEXT:    xxsldwi v2, f0, f0, 1
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
@@ -2064,17 +2062,17 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, 4(r4)
 ; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
 ; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    xxmrghw v3, vs1, vs0
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT:    lxv vs2, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs2, vs1
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <4 x i8>, ptr %a, align 4

diff  --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
index 4623d34c27ac0..d96cf3eb72277 100644
--- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
@@ -1611,16 +1611,16 @@ define <2 x i64> @test_v4i32_v2i64(i32 %arg1, i64 %arg) {
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P9-NEXT:    stw r3, -48(r1)
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C0(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, -48(r1)
 ; CHECK-AIX-32-P9-NEXT:    stw r5, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs1, vs0
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v4, v3
+; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT:    xxmrghw v3, vs1, vs0
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, v3, vs0
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0

diff  --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
index a8ca0f69f2cc8..a22eb055630e1 100644
--- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -42,13 +42,13 @@ define void @test_none_v8i16(ptr %a) {
 ;
 ; CHECK-LE-P9-LABEL: test_none_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT:    lxsd v3, 0(r3)
+; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT:    lfd f1, 0(r3)
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, v2
+; CHECK-LE-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-LE-P9-NEXT:    xxswapd vs0, vs1
 ; CHECK-LE-P9-NEXT:    stfd f0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -227,11 +227,11 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    li r3, 0
 ; CHECK-LE-P9-NEXT:    vextuwrx r3, r3, v2
-; CHECK-LE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    stxv v2, 0(r5)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -251,12 +251,12 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    li r3, 0
 ; CHECK-BE-P9-NEXT:    vextuwlx r3, r3, v2
-; CHECK-BE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-BE-P9-NEXT:    stxv v2, 0(r5)
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-BE-P9-NEXT:    stxv vs0, 0(r5)
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
@@ -274,11 +274,11 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    li r4, 0
 ; CHECK-AIX-64-P9-NEXT:    vextuwlx r4, r4, v2
-; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r4
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
 ; CHECK-AIX-64-P9-NEXT:    ld r4, L..C0(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r4)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-AIX-64-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
@@ -296,11 +296,11 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    addi r4, r1, -16
 ; CHECK-AIX-32-P9-NEXT:    stxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lwz r4, L..C0(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-AIX-32-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = extractelement <2 x i32> %vec, i64 0
@@ -332,12 +332,12 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    li r3, 0
 ; CHECK-LE-P9-NEXT:    vextuwrx r3, r3, v2
-; CHECK-LE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-LE-P9-NEXT:    stxv v2, 0(r5)
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-LE-P9-NEXT:    stxv vs0, 0(r5)
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_none:
@@ -356,11 +356,11 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    li r3, 0
 ; CHECK-BE-P9-NEXT:    vextuwlx r3, r3, v2
-; CHECK-BE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    stxv v2, 0(r5)
 ; CHECK-BE-P9-NEXT:    blr
 ;
@@ -379,10 +379,10 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    li r4, 0
 ; CHECK-AIX-64-P9-NEXT:    vextuwlx r4, r4, v2
-; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r4
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
 ; CHECK-AIX-64-P9-NEXT:    ld r4, L..C1(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r4)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
@@ -401,10 +401,10 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    addi r4, r1, -16
 ; CHECK-AIX-32-P9-NEXT:    stxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lwz r4, L..C1(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -438,13 +438,13 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ;
 ; CHECK-LE-P9-LABEL: test_none_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsd v3, 0(r3)
+; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-LE-P9-NEXT:    mtfprwz f0, r4
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-LE-P9-NEXT:    xxinsertw v2, vs0, 12
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    xxinsertw v2, vs1, 12
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
 ;
 ; CHECK-BE-P8-LABEL: test_none_v2i64:
@@ -463,14 +463,14 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ;
 ; CHECK-BE-P9-LABEL: test_none_v2i64:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsd v3, 0(r3)
+; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-P9-NEXT:    mtfprwz f0, r4
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-P9-NEXT:    xxinsertw v2, vs0, 0
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT:    xxinsertw v2, vs1, 0
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
@@ -486,13 +486,13 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
-; CHECK-AIX-64-P9-NEXT:    xxinsertw v2, vs0, 0
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxinsertw v2, vs1, 0
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
@@ -510,13 +510,13 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ;
 ; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C2(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    mtfprwz f0, r4
-; CHECK-AIX-32-P9-NEXT:    xxinsertw v2, vs0, 0
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-32-P9-NEXT:    xxinsertw v2, vs1, 0
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 entry:
   %0 = load <2 x i32>, ptr %ptr, align 4
   %tmp = insertelement <2 x i32> %vec, i32 %v1, i32 0
@@ -895,10 +895,10 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
-; CHECK-LE-P9-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
@@ -913,12 +913,12 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-BE-P9-NEXT:    lxsiwzx v2, 0, r4
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
@@ -932,11 +932,11 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
@@ -950,11 +950,11 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C3(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %load1 = load <4 x i8>, ptr %a

diff  --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
index 933b7860de5c8..10131f8f6931d 100644
--- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -41,11 +41,11 @@ define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-LE-P9-LABEL: test_none_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsihzx v3, 0, r3
+; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -62,12 +62,12 @@ define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-BE-P9-LABEL: test_none_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsihzx v3, 0, r3
+; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
@@ -82,11 +82,11 @@ define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsihzx v3, 0, r3
+; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C0(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
@@ -101,11 +101,11 @@ define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-AIX-32-P9-LABEL: test_none_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lxsihzx v3, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C0(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %load0.tmp = load <2 x i8>, ptr %a0
@@ -140,14 +140,14 @@ define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_none:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsihzx v3, 0, r3
+; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-LE-P9-NEXT:    mtvsrwz v4, r9
+; CHECK-LE-P9-NEXT:    mtvsrwz v3, r9
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-LE-P9-NEXT:    vinsertb v2, v4, 15
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v2, v3, v4
-; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-LE-P9-NEXT:    vinsertb v2, v3, 15
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm vs0, v2, vs1
+; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_none:
@@ -168,13 +168,13 @@ define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsihzx v3, 0, r3
+; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-P9-NEXT:    mtvsrwz v4, r9
+; CHECK-BE-P9-NEXT:    mtvsrwz v3, r9
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-P9-NEXT:    vinsertb v2, v4, 0
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-BE-P9-NEXT:    vinsertb v2, v3, 0
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;
@@ -194,12 +194,12 @@ define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsihzx v3, 0, r3
+; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C1(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    mtvsrwz v4, r5
-; CHECK-AIX-64-P9-NEXT:    vinsertb v2, v4, 0
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r5
+; CHECK-AIX-64-P9-NEXT:    vinsertb v2, v3, 0
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
@@ -219,12 +219,12 @@ define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_none:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lxsihzx v3, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C1(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    mtvsrwz v4, r5
-; CHECK-AIX-32-P9-NEXT:    vinsertb v2, v4, 0
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT:    mtvsrwz v3, r5
+; CHECK-AIX-32-P9-NEXT:    vinsertb v2, v3, 0
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -263,11 +263,11 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P9-NEXT:    mtvsrd v3, r5
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_1 at toc@ha
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_1 at toc@l
-; CHECK-LE-P9-NEXT:    vperm v3, v3, v3, v4
 ; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v3, v3, vs0
 ; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P9-NEXT:    stfd f0, 0(r3)
@@ -294,11 +294,11 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-P9-NEXT:    mtvsrwz v3, r5
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_1 at toc@ha
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_1 at toc@l
-; CHECK-BE-P9-NEXT:    vperm v3, v3, v3, v4
 ; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v3, v3, vs0
 ; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P9-NEXT:    stxsd v2, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
@@ -321,10 +321,10 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r5
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.1
-; CHECK-AIX-64-P9-NEXT:    vperm v3, v3, v3, v4
 ; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v3, v3, vs0
 ; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P9-NEXT:    stxsd v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
@@ -387,13 +387,13 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_none:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-LE-P9-NEXT:    xxlxor vs1, vs1, vs1
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-LE-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm vs0, vs1, vs2
+; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_none:
@@ -409,13 +409,13 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-P9-NEXT:    xxlxor vs1, vs1, vs1
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm vs0, vs1, vs2
+; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
@@ -430,12 +430,12 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C4(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    xxlxor v4, v4, v4
-; CHECK-AIX-64-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxlxor vs2, vs2, vs2
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
@@ -450,12 +450,12 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C3(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    xxlxor v4, v4, v4
-; CHECK-AIX-32-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxlxor vs2, vs2, vs2
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <2 x i16>, ptr %ptr1, align 1
@@ -470,15 +470,14 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-LE-P8-LABEL: test_none_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI4_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    lxsdx v2, 0, r3
+; CHECK-LE-P8-NEXT:    lxvd2x v3, 0, r4
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI4_1 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI4_0 at toc@l
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI4_1 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-LE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
@@ -489,16 +488,16 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-LE-P9-LABEL: test_none_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsd v2, 0(r3)
+; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-LE-P9-NEXT:    lxv v3, 0(r4)
+; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI4_1 at toc@ha
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI4_1 at toc@l
-; CHECK-LE-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-LE-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
@@ -520,11 +519,11 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxsd v2, 0(r3)
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxv v3, 0(r4)
-; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-BE-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-BE-P9-NEXT:    xxlxor v3, v3, v3
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
@@ -545,10 +544,10 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxv v3, 0(r4)
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    xxlxor v3, v3, v3
+; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
@@ -569,10 +568,10 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C4(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxv v3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-AIX-32-P9-NEXT:    xxlxor v3, v3, v3
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -602,13 +601,13 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_none:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsd v2, 0(r3)
+; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-LE-P9-NEXT:    xxlxor vs1, vs1, vs1
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-LE-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm vs0, vs1, vs2
+; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_none:
@@ -624,13 +623,13 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ;
 ; CHECK-BE-P9-LABEL: test_v2i64_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsd v2, 0(r3)
+; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-P9-NEXT:    xxlxor vs1, vs1, vs1
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm vs0, vs1, vs2
+; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
@@ -645,12 +644,12 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C6(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    xxlxor v4, v4, v4
-; CHECK-AIX-64-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxlxor vs2, vs2, vs2
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
@@ -674,17 +673,17 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lwz r4, 4(r3)
-; CHECK-AIX-32-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-AIX-32-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-AIX-32-P9-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
 ; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs1, vs0
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <4 x i16>, ptr %ptr1, align 1
@@ -712,10 +711,10 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
-; CHECK-LE-P9-NEXT:    lxsihzx v3, 0, r4
+; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r4
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v8i16:
@@ -732,12 +731,12 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxsihzx v3, 0, r4
+; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r4
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16:
@@ -753,11 +752,11 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C7(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxsihzx v3, 0, r4
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
@@ -773,11 +772,11 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lxsihzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C6(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsihzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT:    lxsihzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %load1 = load <2 x i8>, ptr %a
@@ -993,16 +992,16 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
-; CHECK-LE-P9-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI9_1 at toc@ha
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI9_1 at toc@l
-; CHECK-LE-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-LE-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
@@ -1024,11 +1023,11 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-BE-P9-NEXT:    xxlxor v3, v3, v3
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
@@ -1049,10 +1048,10 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C8(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-64-P9-NEXT:    xxlxor v3, v3, v3
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
@@ -1073,10 +1072,10 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C7(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    xxlxor v3, v3, v3
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1245,11 +1244,11 @@ define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C8(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <2 x i8>, ptr %a, align 4
@@ -1286,16 +1285,16 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsd v2, 0(r3)
+; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI12_0 at toc@ha
-; CHECK-LE-P9-NEXT:    lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT:    lxsd v2, 0(r4)
+; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI12_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI12_1 at toc@ha
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI12_1 at toc@l
-; CHECK-LE-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-LE-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
@@ -1317,11 +1316,11 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxsd v2, 0(r3)
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI12_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxsd v3, 0(r4)
-; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI12_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-BE-P9-NEXT:    lfd f0, 0(r4)
 ; CHECK-BE-P9-NEXT:    xxlxor v3, v3, v3
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI12_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
@@ -1342,10 +1341,10 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C9(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r4)
-; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    xxlxor v3, v3, v3
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
@@ -1366,10 +1365,10 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C9(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    xxlxor v3, v3, v3
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
@@ -1446,9 +1445,9 @@ define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C10(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <2 x i8>, ptr %a, align 4

diff  --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
index 9fcc57ad78d5f..a54c704aa040c 100644
--- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
@@ -203,13 +203,13 @@ define void @test2(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ;
 ; CHECK-P9-LABEL: test2:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    xxlxor vs0, vs0, vs0
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-P9-NEXT:    lxv v3, 0(r4)
-; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-P9-NEXT:    xvcvuxddp vs0, v2
+; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-P9-NEXT:    xvcvuxddp vs0, vs1
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
@@ -459,8 +459,8 @@ define void @stest2(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
-; CHECK-P9-NEXT:    lxv v3, 0(r4)
-; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-NEXT:    lxv vs0, 0(r4)
+; CHECK-P9-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp vs0, v2
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
@@ -471,8 +471,8 @@ define void @stest2(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-BE-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r4)
-; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v2
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
index 3b96286ad0f41..eadb1c4371c5b 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
@@ -50,19 +50,19 @@ define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs2, vs1
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -130,31 +130,31 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-BE-NEXT:    xxswapd vs2, v2
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xscvspdpn f0, v2
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v2, v3
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v4
+; CHECK-BE-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
+; CHECK-BE-NEXT:    xxperm vs1, vs2, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, v2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xxperm vs3, vs2, vs0
+; CHECK-BE-NEXT:    xxmrghw vs0, vs3, vs1
 ; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -273,51 +273,51 @@ define <8 x i16> @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs4, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r3
+; CHECK-BE-NEXT:    xxperm vs3, vs4, vs2
+; CHECK-BE-NEXT:    xscvspdpn f4, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs1, v4, v3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs0
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm vs1, vs4, vs2
+; CHECK-BE-NEXT:    xxswapd vs4, vs0
+; CHECK-BE-NEXT:    xxmrghw vs1, vs1, vs3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs0, vs0, 3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r3
+; CHECK-BE-NEXT:    xxperm vs3, vs4, vs2
+; CHECK-BE-NEXT:    xscvspdpn f4, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtfprwz f0, r3
+; CHECK-BE-NEXT:    xxperm vs0, vs4, vs2
+; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs3
 ; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -534,104 +534,104 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr n
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs1, 16(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
 ; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xscvspdpn f4, vs1
+; CHECK-BE-NEXT:    lxv vs3, 0(r5)
+; CHECK-BE-NEXT:    xscvspdpn f6, vs2
+; CHECK-BE-NEXT:    xxsldwi vs4, vs2, vs2, 3
+; CHECK-BE-NEXT:    xscvspdpn f9, vs1
+; CHECK-BE-NEXT:    xxswapd vs5, vs2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-BE-NEXT:    xxsldwi vs7, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs8, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxsldwi vs10, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs11, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f9, f9
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f10, vs10
+; CHECK-BE-NEXT:    xscvspdpn f11, vs11
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    xscvdpsxws f8, f8
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f10, f10
+; CHECK-BE-NEXT:    xscvdpsxws f11, f11
+; CHECK-BE-NEXT:    mffprwz r5, f6
+; CHECK-BE-NEXT:    mtfprwz f6, r5
+; CHECK-BE-NEXT:    mffprwz r5, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r5
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r5
+; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f4
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs5
-; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxperm vs4, vs5, vs3
+; CHECK-BE-NEXT:    xscvspdpn f5, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mtvsrwz v5, r5
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtfprwz f2, r5
+; CHECK-BE-NEXT:    mffprwz r5, f7
+; CHECK-BE-NEXT:    mtfprwz f7, r5
+; CHECK-BE-NEXT:    mffprwz r5, f8
+; CHECK-BE-NEXT:    xxperm vs2, vs6, vs3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mtfprwz f8, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
-; CHECK-BE-NEXT:    mffprwz r5, f0
-; CHECK-BE-NEXT:    lxv vs0, 48(r4)
-; CHECK-BE-NEXT:    vperm v0, v1, v0, v2
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
-; CHECK-BE-NEXT:    vperm v5, v5, v1, v2
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs0
-; CHECK-BE-NEXT:    xscvspdpn f4, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs4
+; CHECK-BE-NEXT:    lxv vs4, 32(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    lxv vs1, 32(r4)
-; CHECK-BE-NEXT:    mffprwz r4, f4
-; CHECK-BE-NEXT:    mtvsrwz v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xxmrghw vs3, v5, v0
-; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mtfprwz f1, r5
+; CHECK-BE-NEXT:    xxperm vs7, vs8, vs3
+; CHECK-BE-NEXT:    mffprwz r5, f10
+; CHECK-BE-NEXT:    xxperm vs1, vs9, vs3
+; CHECK-BE-NEXT:    mtfprwz f10, r5
+; CHECK-BE-NEXT:    mffprwz r5, f11
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mtfprwz f11, r5
+; CHECK-BE-NEXT:    xxmrghw vs1, vs1, vs7
+; CHECK-BE-NEXT:    mtfprwz f5, r4
+; CHECK-BE-NEXT:    xxperm vs10, vs11, vs3
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    stxv vs2, 0(r3)
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xxmrghw vs0, v4, v3
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    mtvsrwz v3, r4
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs1, v2, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs4, vs4, 3
+; CHECK-BE-NEXT:    mtfprwz f0, r4
+; CHECK-BE-NEXT:    xxperm vs0, vs5, vs3
+; CHECK-BE-NEXT:    xxswapd vs5, vs4
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs10
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r4
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r4
+; CHECK-BE-NEXT:    xxperm vs2, vs5, vs3
+; CHECK-BE-NEXT:    xscvspdpn f5, vs4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs4, vs4, 1
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r4
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r4
+; CHECK-BE-NEXT:    xxperm vs4, vs5, vs3
+; CHECK-BE-NEXT:    xxmrghw vs2, vs4, vs2
+; CHECK-BE-NEXT:    xxmrghd vs0, vs2, vs0
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -682,19 +682,19 @@ define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs2, vs1
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -762,31 +762,31 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-BE-NEXT:    xxswapd vs2, v2
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xscvspdpn f0, v2
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v2, v3
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v4
+; CHECK-BE-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
+; CHECK-BE-NEXT:    xxperm vs1, vs2, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, v2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xxperm vs3, vs2, vs0
+; CHECK-BE-NEXT:    xxmrghw vs0, vs3, vs1
 ; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -905,51 +905,51 @@ define <8 x i16> @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs4, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r3
+; CHECK-BE-NEXT:    xxperm vs3, vs4, vs2
+; CHECK-BE-NEXT:    xscvspdpn f4, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs1, v4, v3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs0
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm vs1, vs4, vs2
+; CHECK-BE-NEXT:    xxswapd vs4, vs0
+; CHECK-BE-NEXT:    xxmrghw vs1, vs1, vs3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs0, vs0, 3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r3
+; CHECK-BE-NEXT:    xxperm vs3, vs4, vs2
+; CHECK-BE-NEXT:    xscvspdpn f4, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtfprwz f0, r3
+; CHECK-BE-NEXT:    xxperm vs0, vs4, vs2
+; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs3
 ; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -1166,104 +1166,104 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs1, 16(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
 ; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xscvspdpn f4, vs1
+; CHECK-BE-NEXT:    lxv vs3, 0(r5)
+; CHECK-BE-NEXT:    xscvspdpn f6, vs2
+; CHECK-BE-NEXT:    xxsldwi vs4, vs2, vs2, 3
+; CHECK-BE-NEXT:    xscvspdpn f9, vs1
+; CHECK-BE-NEXT:    xxswapd vs5, vs2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-BE-NEXT:    xxsldwi vs7, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs8, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxsldwi vs10, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs11, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f9, f9
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f10, vs10
+; CHECK-BE-NEXT:    xscvspdpn f11, vs11
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    xscvdpsxws f8, f8
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f10, f10
+; CHECK-BE-NEXT:    xscvdpsxws f11, f11
+; CHECK-BE-NEXT:    mffprwz r5, f6
+; CHECK-BE-NEXT:    mtfprwz f6, r5
+; CHECK-BE-NEXT:    mffprwz r5, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r5
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r5
+; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f4
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs5
-; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxperm vs4, vs5, vs3
+; CHECK-BE-NEXT:    xscvspdpn f5, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mtvsrwz v5, r5
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtfprwz f2, r5
+; CHECK-BE-NEXT:    mffprwz r5, f7
+; CHECK-BE-NEXT:    mtfprwz f7, r5
+; CHECK-BE-NEXT:    mffprwz r5, f8
+; CHECK-BE-NEXT:    xxperm vs2, vs6, vs3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mtfprwz f8, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
-; CHECK-BE-NEXT:    mffprwz r5, f0
-; CHECK-BE-NEXT:    lxv vs0, 48(r4)
-; CHECK-BE-NEXT:    vperm v0, v1, v0, v2
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
-; CHECK-BE-NEXT:    vperm v5, v5, v1, v2
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs0
-; CHECK-BE-NEXT:    xscvspdpn f4, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs4
+; CHECK-BE-NEXT:    lxv vs4, 32(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    lxv vs1, 32(r4)
-; CHECK-BE-NEXT:    mffprwz r4, f4
-; CHECK-BE-NEXT:    mtvsrwz v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xxmrghw vs3, v5, v0
-; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mtfprwz f1, r5
+; CHECK-BE-NEXT:    xxperm vs7, vs8, vs3
+; CHECK-BE-NEXT:    mffprwz r5, f10
+; CHECK-BE-NEXT:    xxperm vs1, vs9, vs3
+; CHECK-BE-NEXT:    mtfprwz f10, r5
+; CHECK-BE-NEXT:    mffprwz r5, f11
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mtfprwz f11, r5
+; CHECK-BE-NEXT:    xxmrghw vs1, vs1, vs7
+; CHECK-BE-NEXT:    mtfprwz f5, r4
+; CHECK-BE-NEXT:    xxperm vs10, vs11, vs3
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    stxv vs2, 0(r3)
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xxmrghw vs0, v4, v3
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    mtvsrwz v3, r4
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs1, v2, v3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs4, vs4, 3
+; CHECK-BE-NEXT:    mtfprwz f0, r4
+; CHECK-BE-NEXT:    xxperm vs0, vs5, vs3
+; CHECK-BE-NEXT:    xxswapd vs5, vs4
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs10
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r4
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r4
+; CHECK-BE-NEXT:    xxperm vs2, vs5, vs3
+; CHECK-BE-NEXT:    xscvspdpn f5, vs4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs4, vs4, 1
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r4
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    mtfprwz f4, r4
+; CHECK-BE-NEXT:    xxperm vs4, vs5, vs3
+; CHECK-BE-NEXT:    xxmrghw vs2, vs4, vs2
+; CHECK-BE-NEXT:    xxmrghd vs0, vs2, vs0
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
index 851ce3271f7e3..908a711195ee2 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
@@ -55,19 +55,19 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs2, vs1
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
@@ -138,32 +138,32 @@ define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xscvspdpn f0, v2
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, v2
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm v3, vs1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vperm v2, v5, v2, v3
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -285,52 +285,52 @@ define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xxperm v2, vs3, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs3, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm v3, vs1, vs2
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs1, vs2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
 ; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -550,97 +550,97 @@ define <16 x i8> @test16elt(ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    xxswapd vs4, vs3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    xscvspdpn f4, vs3
+; CHECK-BE-NEXT:    lxv vs4, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi vs5, vs3, vs3, 3
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    xxswapd vs5, vs3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
+; CHECK-BE-NEXT:    xxperm v2, vs5, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs5, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs3, vs4
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    xxmrghw vs2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xxperm v2, vs3, vs4
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm v3, vs1, vs4
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs1, vs4
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
 ; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -695,19 +695,19 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs2, vs1
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
@@ -778,32 +778,32 @@ define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xscvspdpn f0, v2
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, v2
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm v3, vs1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vperm v2, v5, v2, v3
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -925,52 +925,52 @@ define i64 @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xxperm v2, vs3, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs3, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm v3, vs1, vs2
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs1, vs2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
 ; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -1190,97 +1190,97 @@ define <16 x i8> @test16elt_signed(ptr nocapture readonly) local_unnamed_addr #3
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    xxswapd vs4, vs3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    xscvspdpn f4, vs3
+; CHECK-BE-NEXT:    lxv vs4, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi vs5, vs3, vs3, 3
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    xxswapd vs5, vs3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
+; CHECK-BE-NEXT:    xxperm v2, vs5, vs4
+; CHECK-BE-NEXT:    xscvspdpn f5, vs3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs5, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs3, vs4
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    xxmrghw vs2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xxperm v2, vs3, vs4
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm v3, vs1, vs4
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs1, vs4
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
 ; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
index 3b422dfc7a4fb..256ad2769587e 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
@@ -40,18 +40,18 @@ define i32 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    xxswapd vs2, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, v2
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -117,24 +117,24 @@ define i64 @test4elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm vs1, vs3, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtfprwz f0, r3
+; CHECK-BE-NEXT:    xxperm vs0, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -242,41 +242,41 @@ define <8 x i16> @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    lxv vs4, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f5, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xxperm vs3, vs5, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
-; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
+; CHECK-BE-NEXT:    xxperm vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm vs1, vs3, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtfprwz f0, r3
+; CHECK-BE-NEXT:    xxperm vs0, vs3, vs4
+; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -465,87 +465,87 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr n
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 48(r4)
-; CHECK-BE-NEXT:    lxv vs1, 32(r4)
-; CHECK-BE-NEXT:    lxv vs0, 16(r4)
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-NEXT:    xscvdpsxws f3, f2
-; CHECK-BE-NEXT:    xscvdpsxws f4, f1
-; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f5, f0
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    lxv vs7, 48(r4)
+; CHECK-BE-NEXT:    lxv vs0, 64(r4)
+; CHECK-BE-NEXT:    lxv vs1, 80(r4)
+; CHECK-BE-NEXT:    lxv vs2, 96(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f9, f7
+; CHECK-BE-NEXT:    xxswapd vs7, vs7
 ; CHECK-BE-NEXT:    lxv vs3, 112(r4)
-; CHECK-BE-NEXT:    mtvsrwz v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f4
 ; CHECK-BE-NEXT:    lxv vs4, 0(r4)
-; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f7, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    mtvsrwz v5, r5
-; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    lxv vs2, 96(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f5, f4
+; CHECK-BE-NEXT:    lxv vs5, 16(r4)
+; CHECK-BE-NEXT:    lxv vs6, 32(r4)
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    lxv vs8, 0(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mffprwz r4, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r4
+; CHECK-BE-NEXT:    mffprwz r4, f7
+; CHECK-BE-NEXT:    mtfprwz f7, r4
+; CHECK-BE-NEXT:    xxperm vs7, vs9, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f6
+; CHECK-BE-NEXT:    xxswapd vs6, vs6
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    mffprwz r4, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r4
+; CHECK-BE-NEXT:    mffprwz r4, f6
+; CHECK-BE-NEXT:    mtfprwz f6, r4
+; CHECK-BE-NEXT:    xxperm vs6, vs9, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f5
+; CHECK-BE-NEXT:    xxswapd vs5, vs5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xxmrghw vs6, vs6, vs7
+; CHECK-BE-NEXT:    mffprwz r4, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r4
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r4
+; CHECK-BE-NEXT:    xxperm vs5, vs9, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    lxv vs1, 80(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    vperm v3, v3, v0, v2
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r5, f0
-; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    vperm v4, v4, v0, v2
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
-; CHECK-BE-NEXT:    xxmrghw vs6, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f4
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    mffprwz r4, f7
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    mtfprwz f4, r4
+; CHECK-BE-NEXT:    xxperm vs4, vs9, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xxmrghw vs4, vs4, vs5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xxmrghw vs4, v3, v5
-; CHECK-BE-NEXT:    mtvsrwz v3, r4
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs6
+; CHECK-BE-NEXT:    mffprwz r4, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r4
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r4
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    xxperm vs3, vs9, vs8
+; CHECK-BE-NEXT:    mtfprwz f2, r4
+; CHECK-BE-NEXT:    xxperm vs2, vs5, vs8
+; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-NEXT:    mtfprwz f3, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    mtfprwz f1, r4
+; CHECK-BE-NEXT:    xxperm vs1, vs3, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtfprwz f0, r4
+; CHECK-BE-NEXT:    xxperm vs0, vs3, vs8
+; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
@@ -587,18 +587,18 @@ define i32 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    xxswapd vs2, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, v2
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -664,24 +664,24 @@ define i64 @test4elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm vs1, vs3, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtfprwz f0, r3
+; CHECK-BE-NEXT:    xxperm vs0, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -789,41 +789,41 @@ define <8 x i16> @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    lxv vs4, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f5, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xxperm vs3, vs5, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f5, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
-; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
+; CHECK-BE-NEXT:    xxperm vs2, vs5, vs4
+; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    xxperm vs1, vs3, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtfprwz f0, r3
+; CHECK-BE-NEXT:    xxperm vs0, vs3, vs4
+; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -1012,87 +1012,87 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 48(r4)
-; CHECK-BE-NEXT:    lxv vs1, 32(r4)
-; CHECK-BE-NEXT:    lxv vs0, 16(r4)
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-NEXT:    xscvdpsxws f3, f2
-; CHECK-BE-NEXT:    xscvdpsxws f4, f1
-; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f5, f0
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    lxv vs7, 48(r4)
+; CHECK-BE-NEXT:    lxv vs0, 64(r4)
+; CHECK-BE-NEXT:    lxv vs1, 80(r4)
+; CHECK-BE-NEXT:    lxv vs2, 96(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f9, f7
+; CHECK-BE-NEXT:    xxswapd vs7, vs7
 ; CHECK-BE-NEXT:    lxv vs3, 112(r4)
-; CHECK-BE-NEXT:    mtvsrwz v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f4
 ; CHECK-BE-NEXT:    lxv vs4, 0(r4)
-; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f7, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    mtvsrwz v5, r5
-; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    lxv vs2, 96(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f5, f4
+; CHECK-BE-NEXT:    lxv vs5, 16(r4)
+; CHECK-BE-NEXT:    lxv vs6, 32(r4)
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
+; CHECK-BE-NEXT:    lxv vs8, 0(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mffprwz r4, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r4
+; CHECK-BE-NEXT:    mffprwz r4, f7
+; CHECK-BE-NEXT:    mtfprwz f7, r4
+; CHECK-BE-NEXT:    xxperm vs7, vs9, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f6
+; CHECK-BE-NEXT:    xxswapd vs6, vs6
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    mffprwz r4, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r4
+; CHECK-BE-NEXT:    mffprwz r4, f6
+; CHECK-BE-NEXT:    mtfprwz f6, r4
+; CHECK-BE-NEXT:    xxperm vs6, vs9, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f5
+; CHECK-BE-NEXT:    xxswapd vs5, vs5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xxmrghw vs6, vs6, vs7
+; CHECK-BE-NEXT:    mffprwz r4, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r4
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r4
+; CHECK-BE-NEXT:    xxperm vs5, vs9, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    lxv vs1, 80(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    vperm v3, v3, v0, v2
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r5, f0
-; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    vperm v4, v4, v0, v2
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
-; CHECK-BE-NEXT:    xxmrghw vs6, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f4
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    mffprwz r4, f7
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    mtfprwz f4, r4
+; CHECK-BE-NEXT:    xxperm vs4, vs9, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f9, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xxmrghw vs4, vs4, vs5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xxmrghw vs4, v3, v5
-; CHECK-BE-NEXT:    mtvsrwz v3, r4
-; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs6
+; CHECK-BE-NEXT:    mffprwz r4, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r4
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r4
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    xxperm vs3, vs9, vs8
+; CHECK-BE-NEXT:    mtfprwz f2, r4
+; CHECK-BE-NEXT:    xxperm vs2, vs5, vs8
+; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-NEXT:    mtfprwz f3, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    mtfprwz f1, r4
+; CHECK-BE-NEXT:    xxperm vs1, vs3, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtfprwz f0, r4
+; CHECK-BE-NEXT:    xxperm vs0, vs3, vs8
+; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
index 8870ccc2fc55d..56b47c0634f68 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
@@ -45,18 +45,18 @@ define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    xxswapd vs2, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, v2
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
@@ -125,25 +125,25 @@ define i32 @test4elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs3, vs2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    xxperm v3, vs1, vs2
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -254,42 +254,42 @@ define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    lxv vs4, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f5, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs5, vs4
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs2, vs4
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs1, vs4
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
 ; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -479,7 +479,7 @@ define <16 x i8> @test16elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f8, f7
+; CHECK-BE-NEXT:    xscvdpsxws f9, f7
 ; CHECK-BE-NEXT:    xxswapd vs7, vs7
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs4, 64(r3)
@@ -487,75 +487,75 @@ define <16 x i8> @test16elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    lxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv vs8, 0(r3)
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mffprwz r3, f8
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f7
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f6
 ; CHECK-BE-NEXT:    xxswapd vs6, vs6
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs9, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f7
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtfprwz f7, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f6
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f5
 ; CHECK-BE-NEXT:    xxswapd vs5, vs5
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs7, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f6
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mtfprwz f6, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs6, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs5, vs8
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghw vs4, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    xxmrghw vs4, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mtfprwz f5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs5, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs3, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs2, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs1, vs8
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
 ; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs4
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -600,18 +600,18 @@ define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    xxswapd vs2, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, v2
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
@@ -680,25 +680,25 @@ define i32 @test4elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs3, vs2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    xxperm v3, vs1, vs2
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -809,42 +809,42 @@ define i64 @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    lxv vs4, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f5, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs5, vs4
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs2, vs4
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs1, vs4
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
 ; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -1034,7 +1034,7 @@ define <16 x i8> @test16elt_signed(ptr nocapture readonly) local_unnamed_addr #2
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f8, f7
+; CHECK-BE-NEXT:    xscvdpsxws f9, f7
 ; CHECK-BE-NEXT:    xxswapd vs7, vs7
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs4, 64(r3)
@@ -1042,75 +1042,75 @@ define <16 x i8> @test16elt_signed(ptr nocapture readonly) local_unnamed_addr #2
 ; CHECK-BE-NEXT:    lxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv vs8, 0(r3)
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mffprwz r3, f8
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f9
+; CHECK-BE-NEXT:    mtfprwz f9, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f7
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f6
 ; CHECK-BE-NEXT:    xxswapd vs6, vs6
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs9, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f7
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtfprwz f7, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f6
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f5
 ; CHECK-BE-NEXT:    xxswapd vs5, vs5
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs7, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f6
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mtfprwz f6, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs6, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mtfprwz f5, r3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs5, vs8
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghw vs4, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    xxmrghw vs4, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mtfprwz f5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
+; CHECK-BE-NEXT:    xxperm v2, vs5, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mtfprwz f3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs3, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mtfprwz f2, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xxperm v3, vs2, vs8
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxperm v4, vs1, vs8
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
 ; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs4
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index f521e45ef0f6d..64022b5503e01 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -80,13 +80,13 @@ define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mtfprd f1, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    xxlxor vs0, vs0, vs0
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
-; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-BE-NEXT:    xvcvuxwsp v2, vs1
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <4 x i16>

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index 210f0edc0b96b..408953ad519e1 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -24,24 +24,24 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-P9-NEXT:    mtfprwz f1, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    xxlxor vs0, vs0, vs0
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-P9-NEXT:    xvcvuxddp v2, v2
+; CHECK-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-P9-NEXT:    xvcvuxddp v2, vs1
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    xxlxor vs0, vs0, vs0
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
-; CHECK-BE-NEXT:    xvcvuxddp v2, v2
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-BE-NEXT:    xvcvuxddp v2, vs1
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i32 %a.coerce to <2 x i16>
@@ -399,8 +399,8 @@ define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp v2, v2
 ; CHECK-P9-NEXT:    blr
@@ -410,8 +410,8 @@ define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp v2, v2
 ; CHECK-BE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index 1bf167a1f415d..31436c5baa50e 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -76,24 +76,24 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-P9-NEXT:    mtfprwz f1, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    xxlxor vs0, vs0, vs0
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-P9-NEXT:    xvcvuxwsp v2, v2
+; CHECK-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-P9-NEXT:    xvcvuxwsp v2, vs1
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    xxlxor vs0, vs0, vs0
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
-; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-BE-NEXT:    xvcvuxwsp v2, vs1
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i32 %a.coerce to <4 x i8>
@@ -348,8 +348,8 @@ define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-NEXT:    vextsb2w v2, v2
 ; CHECK-P9-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-P9-NEXT:    blr
@@ -359,8 +359,8 @@ define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-BE-NEXT:    vextsb2w v2, v2
 ; CHECK-BE-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-BE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index 78229f35df82d..024c85cd7bcf0 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -24,24 +24,24 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-P9-NEXT:    mtfprwz f1, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    xxlxor vs0, vs0, vs0
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
-; CHECK-P9-NEXT:    xvcvuxddp v2, v2
+; CHECK-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-P9-NEXT:    xvcvuxddp v2, vs1
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    mtfprwz f1, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    xxlxor vs0, vs0, vs0
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
-; CHECK-BE-NEXT:    xvcvuxddp v2, v2
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-BE-NEXT:    xvcvuxddp v2, vs1
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i16 %a.coerce to <2 x i8>
@@ -434,8 +434,8 @@ define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    mtvsrwz v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xxperm v2, v2, vs0
 ; CHECK-P9-NEXT:    vextsb2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp v2, v2
 ; CHECK-P9-NEXT:    blr
@@ -445,8 +445,8 @@ define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r3)
-; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xxperm v2, v2, vs0
 ; CHECK-BE-NEXT:    vextsb2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp v2, v2
 ; CHECK-BE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
index 3b4fce3f58eea..2c969fe76e18d 100644
--- a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
@@ -192,11 +192,11 @@ define double @test10(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
 ; CHECK-LE-NEXT:    addi 3, 3, .LCPI9_0 at toc@l
-; CHECK-LE-NEXT:    lxv 36, 0(3)
+; CHECK-LE-NEXT:    lxv 0, 0(3)
 ; CHECK-LE-NEXT:    addis 3, 2, .LCPI9_1 at toc@ha
 ; CHECK-LE-NEXT:    lfs 1, .LCPI9_1 at toc@l(3)
-; CHECK-LE-NEXT:    vperm 2, 2, 3, 4
-; CHECK-LE-NEXT:    xxswapd 0, 34
+; CHECK-LE-NEXT:    xxperm 35, 34, 0
+; CHECK-LE-NEXT:    xxswapd 0, 35
 ; CHECK-LE-NEXT:    xsadddp 1, 0, 1
 ; CHECK-LE-NEXT:    blr
 ;
@@ -204,10 +204,10 @@ define double @test10(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI9_0 at toc@l
-; CHECK-BE-NEXT:    lxv 36, 0(3)
+; CHECK-BE-NEXT:    lxv 0, 0(3)
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI9_1 at toc@ha
+; CHECK-BE-NEXT:    xxperm 34, 35, 0
 ; CHECK-BE-NEXT:    lfs 0, .LCPI9_1 at toc@l(3)
-; CHECK-BE-NEXT:    vperm 2, 3, 2, 4
 ; CHECK-BE-NEXT:    xsadddp 1, 34, 0
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
index 8e5e5d95fbec4..be1dc57bbf1ff 100644
--- a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
@@ -12,8 +12,8 @@ define <4 x i32> @vextsb2wLE(<16 x i8> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    vextsb2w 2, 2
 ; CHECK-BE-NEXT:    blr
 
@@ -43,8 +43,8 @@ define <2 x i64> @vextsb2dLE(<16 x i8> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    vextsb2d 2, 2
 ; CHECK-BE-NEXT:    blr
 
@@ -68,8 +68,8 @@ define <4 x i32> @vextsh2wLE(<8 x i16> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    vextsh2w 2, 2
 ; CHECK-BE-NEXT:    blr
 
@@ -99,8 +99,8 @@ define <2 x i64> @vextsh2dLE(<8 x i16> %a) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
 ; CHECK-BE-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    lxv 35, 0(3)
-; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
+; CHECK-BE-NEXT:    lxv 0, 0(3)
+; CHECK-BE-NEXT:    xxperm 34, 34, 0
 ; CHECK-BE-NEXT:    vextsh2d 2, 2
 ; CHECK-BE-NEXT:    blr
 
@@ -339,39 +339,39 @@ define <8 x i16> @testInvalidExtend(<16 x i8> %a) {
 ; CHECK-BE-NEXT:    vextublx 10, 10, 2
 ; CHECK-BE-NEXT:    vextublx 7, 7, 2
 ; CHECK-BE-NEXT:    vextublx 8, 8, 2
-; CHECK-BE-NEXT:    li 3, 0
-; CHECK-BE-NEXT:    li 4, 2
 ; CHECK-BE-NEXT:    li 5, 4
 ; CHECK-BE-NEXT:    li 6, 6
+; CHECK-BE-NEXT:    li 3, 0
+; CHECK-BE-NEXT:    li 4, 2
 ; CHECK-BE-NEXT:    extsb 9, 9
 ; CHECK-BE-NEXT:    extsb 10, 10
-; CHECK-BE-NEXT:    vextublx 3, 3, 2
-; CHECK-BE-NEXT:    vextublx 4, 4, 2
-; CHECK-BE-NEXT:    vextublx 5, 5, 2
 ; CHECK-BE-NEXT:    extsb 7, 7
 ; CHECK-BE-NEXT:    extsb 8, 8
-; CHECK-BE-NEXT:    extsb 5, 5
-; CHECK-BE-NEXT:    extsb 3, 3
-; CHECK-BE-NEXT:    extsb 4, 4
-; CHECK-BE-NEXT:    mtvsrwz 35, 9
-; CHECK-BE-NEXT:    addis 9, 2, .LCPI11_0 at toc@ha
+; CHECK-BE-NEXT:    vextublx 5, 5, 2
 ; CHECK-BE-NEXT:    vextublx 6, 6, 2
-; CHECK-BE-NEXT:    mtvsrwz 34, 10
-; CHECK-BE-NEXT:    mtvsrwz 37, 7
+; CHECK-BE-NEXT:    extsb 5, 5
 ; CHECK-BE-NEXT:    extsb 6, 6
+; CHECK-BE-NEXT:    mtfprwz 1, 9
+; CHECK-BE-NEXT:    addis 9, 2, .LCPI11_0 at toc@ha
+; CHECK-BE-NEXT:    mtfprwz 0, 10
+; CHECK-BE-NEXT:    mtfprwz 3, 7
+; CHECK-BE-NEXT:    vextublx 3, 3, 2
+; CHECK-BE-NEXT:    extsb 3, 3
+; CHECK-BE-NEXT:    mtfprwz 4, 3
 ; CHECK-BE-NEXT:    addi 9, 9, .LCPI11_0 at toc@l
-; CHECK-BE-NEXT:    lxv 36, 0(9)
-; CHECK-BE-NEXT:    vperm 2, 3, 2, 4
-; CHECK-BE-NEXT:    mtvsrwz 35, 8
-; CHECK-BE-NEXT:    vperm 3, 5, 3, 4
-; CHECK-BE-NEXT:    mtvsrwz 37, 3
-; CHECK-BE-NEXT:    xxmrghw 0, 35, 34
-; CHECK-BE-NEXT:    mtvsrwz 34, 6
-; CHECK-BE-NEXT:    mtvsrwz 35, 5
-; CHECK-BE-NEXT:    vperm 2, 3, 2, 4
-; CHECK-BE-NEXT:    mtvsrwz 35, 4
-; CHECK-BE-NEXT:    vperm 3, 5, 3, 4
-; CHECK-BE-NEXT:    xxmrghw 1, 35, 34
+; CHECK-BE-NEXT:    vextublx 4, 4, 2
+; CHECK-BE-NEXT:    extsb 4, 4
+; CHECK-BE-NEXT:    lxv 2, 0(9)
+; CHECK-BE-NEXT:    xxperm 0, 1, 2
+; CHECK-BE-NEXT:    mtfprwz 1, 8
+; CHECK-BE-NEXT:    xxperm 1, 3, 2
+; CHECK-BE-NEXT:    mtfprwz 3, 5
+; CHECK-BE-NEXT:    xxmrghw 0, 1, 0
+; CHECK-BE-NEXT:    mtfprwz 1, 6
+; CHECK-BE-NEXT:    xxperm 1, 3, 2
+; CHECK-BE-NEXT:    mtfprwz 3, 4
+; CHECK-BE-NEXT:    xxperm 3, 4, 2
+; CHECK-BE-NEXT:    xxmrghw 1, 3, 1
 ; CHECK-BE-NEXT:    xxmrghd 34, 1, 0
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index f2ea2f3a3b4ea..9721231174f1f 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -81,7 +81,6 @@ define <3 x float> @constrained_vector_fdiv_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 3, 3
-; PC64LE9-NEXT:    lxv 36, 0(3)
 ; PC64LE9-NEXT:    xsdivsp 0, 1, 0
 ; PC64LE9-NEXT:    xxswapd 1, 35
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
@@ -89,11 +88,12 @@ define <3 x float> @constrained_vector_fdiv_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xsdivsp 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
@@ -362,15 +362,15 @@ define <3 x float> @constrained_vector_frem_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 36, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -678,19 +678,19 @@ define <3 x float> @constrained_vector_fmul_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 3, 3
-; PC64LE9-NEXT:    lxv 36, 0(3)
 ; PC64LE9-NEXT:    xsmulsp 0, 1, 0
 ; PC64LE9-NEXT:    xxswapd 1, 35
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xsmulsp 1, 2, 1
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
-; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xsmulsp 2, 3, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
@@ -840,19 +840,19 @@ define <3 x float> @constrained_vector_fadd_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 3, 3
-; PC64LE9-NEXT:    lxv 36, 0(3)
 ; PC64LE9-NEXT:    xsaddsp 0, 1, 0
 ; PC64LE9-NEXT:    xxswapd 1, 35
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xsaddsp 1, 2, 1
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
-; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xsaddsp 2, 3, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
@@ -1002,19 +1002,19 @@ define <3 x float> @constrained_vector_fsub_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 3, 3
-; PC64LE9-NEXT:    lxv 36, 0(3)
 ; PC64LE9-NEXT:    xssubsp 0, 1, 0
 ; PC64LE9-NEXT:    xxswapd 1, 35
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xssubsp 1, 2, 1
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
-; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xssubsp 2, 3, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
@@ -1154,15 +1154,15 @@ define <3 x float> @constrained_vector_sqrt_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xssqrtsp 1, 1
 ; PC64LE9-NEXT:    xssqrtsp 2, 2
 ; PC64LE9-NEXT:    xssqrtsp 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 36, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
@@ -1422,15 +1422,15 @@ define <3 x float> @constrained_vector_pow_v3f32(<3 x float> %x, <3 x float> %y)
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI32_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI32_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 36, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI32_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -1850,15 +1850,15 @@ define <3 x float> @constrained_vector_powi_v3f32(<3 x float> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI37_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    ld 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI37_0 at toc@l
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 36, 0(3)
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2231,14 +2231,14 @@ define <3 x float> @constrained_vector_sin_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI42_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI42_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI42_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2582,14 +2582,14 @@ define <3 x float> @constrained_vector_cos_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI47_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI47_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI47_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2933,14 +2933,14 @@ define <3 x float> @constrained_vector_exp_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI52_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI52_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI52_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3284,14 +3284,14 @@ define <3 x float> @constrained_vector_exp2_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI57_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI57_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI57_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3635,14 +3635,14 @@ define <3 x float> @constrained_vector_log_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI62_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI62_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI62_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3986,14 +3986,14 @@ define <3 x float> @constrained_vector_log10_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI67_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI67_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI67_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -4337,14 +4337,14 @@ define <3 x float> @constrained_vector_log2_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI72_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI72_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI72_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -4594,15 +4594,15 @@ define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xsrdpic 1, 1
 ; PC64LE9-NEXT:    xsrdpic 2, 2
 ; PC64LE9-NEXT:    xsrdpic 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 36, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    blr
  entry:
   %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
@@ -4827,14 +4827,14 @@ define <3 x float> @constrained_vector_nearbyint_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI82_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI82_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI82_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    xscvdpspn 1, 31
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -5152,15 +5152,15 @@ define <3 x float> @constrained_vector_maxnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI87_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI87_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 36, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI87_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -5394,15 +5394,15 @@ define <3 x float> @constrained_vector_minnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI92_0 at toc@ha
-; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 3, 3, .LCPI92_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lxv 36, 0(3)
+; PC64LE9-NEXT:    addi 3, 3, .LCPI92_0 at toc@l
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -5587,22 +5587,23 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
+; PC64LE9-NEXT:    xxsldwi 2, 34, 34, 1
+; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    xscvdpsxws 1, 1
+; PC64LE9-NEXT:    xscvdpsxws 2, 2
 ; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    mffprwz 3, 1
 ; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI97_0 at toc@ha
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI97_0 at toc@l
-; PC64LE9-NEXT:    lxv 36, 0(3)
-; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    xscvdpsxws 0, 0
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrwz 34, 3
-; PC64LE9-NEXT:    vperm 2, 2, 3, 4
+; PC64LE9-NEXT:    xxmrghw 0, 1, 0
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    mffprwz 3, 2
+; PC64LE9-NEXT:    mtfprwz 2, 3
+; PC64LE9-NEXT:    xxperm 0, 2, 1
+; PC64LE9-NEXT:    xxlor 34, 0, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(
@@ -5856,12 +5857,12 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI105_0 at toc@ha
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpsxws 0, 3
+; PC64LE9-NEXT:    xscvdpsxws 1, 3
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI105_0 at toc@l
-; PC64LE9-NEXT:    lxv 35, 0(3)
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrwz 36, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtfprwz 1, 3
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(
@@ -6083,22 +6084,23 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
+; PC64LE9-NEXT:    xxsldwi 2, 34, 34, 1
+; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    xscvdpuxws 1, 1
+; PC64LE9-NEXT:    xscvdpuxws 2, 2
 ; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    mffprwz 3, 1
 ; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI113_0 at toc@ha
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI113_0 at toc@l
-; PC64LE9-NEXT:    lxv 36, 0(3)
-; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    xscvdpuxws 0, 0
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrwz 34, 3
-; PC64LE9-NEXT:    vperm 2, 2, 3, 4
+; PC64LE9-NEXT:    xxmrghw 0, 1, 0
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    mffprwz 3, 2
+; PC64LE9-NEXT:    mtfprwz 2, 3
+; PC64LE9-NEXT:    xxperm 0, 2, 1
+; PC64LE9-NEXT:    xxlor 34, 0, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(
@@ -6351,12 +6353,12 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI121_0 at toc@ha
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xscvdpuxws 0, 3
+; PC64LE9-NEXT:    xscvdpuxws 1, 3
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI121_0 at toc@l
-; PC64LE9-NEXT:    lxv 35, 0(3)
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrwz 36, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtfprwz 1, 3
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(
@@ -6559,13 +6561,13 @@ define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    xsrsp 1, 2
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI129_0 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI129_0 at toc@l
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    xsrsp 0, 3
-; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    xsrsp 1, 3
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
@@ -6769,15 +6771,15 @@ define <3 x float> @constrained_vector_ceil_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xsrdpip 1, 1
 ; PC64LE9-NEXT:    xsrdpip 2, 2
 ; PC64LE9-NEXT:    xsrdpip 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 36, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
@@ -6885,15 +6887,15 @@ define <3 x float> @constrained_vector_floor_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xsrdpim 1, 1
 ; PC64LE9-NEXT:    xsrdpim 2, 2
 ; PC64LE9-NEXT:    xsrdpim 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 36, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
@@ -7000,15 +7002,15 @@ define <3 x float> @constrained_vector_round_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xsrdpi 1, 1
 ; PC64LE9-NEXT:    xsrdpi 2, 2
 ; PC64LE9-NEXT:    xsrdpi 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 36, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
@@ -7116,15 +7118,15 @@ define <3 x float> @constrained_vector_trunc_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xsrdpiz 1, 1
 ; PC64LE9-NEXT:    xsrdpiz 2, 2
 ; PC64LE9-NEXT:    xsrdpiz 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 2, 2
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 36, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 2
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
@@ -7265,8 +7267,8 @@ define <2 x double> @constrained_vector_sitofp_v2f64_v2i16(<2 x i16> %x) #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI155_0 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI155_0 at toc@l
-; PC64LE9-NEXT:    lxv 35, 0(3)
-; PC64LE9-NEXT:    vperm 2, 2, 2, 3
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 34, 0
 ; PC64LE9-NEXT:    vextsh2d 2, 2
 ; PC64LE9-NEXT:    xvcvsxddp 34, 34
 ; PC64LE9-NEXT:    blr
@@ -7479,14 +7481,15 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE9-NEXT:    xscvsxdsp 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI161_0 at toc@l
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    lxv 36, 0(3)
-; PC64LE9-NEXT:    mfvsrwz 3, 34
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    mtfprwa 0, 3
-; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 0
-; PC64LE9-NEXT:    vperm 2, 2, 3, 4
+; PC64LE9-NEXT:    xxmrghw 0, 1, 0
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    mfvsrwz 3, 34
+; PC64LE9-NEXT:    mtfprwa 2, 3
+; PC64LE9-NEXT:    xscvsxdsp 2, 2
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxperm 0, 2, 1
+; PC64LE9-NEXT:    xxlor 34, 0, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float>
@@ -7552,14 +7555,14 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    xscvsxdsp 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI163_0 at toc@l
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    mtfprd 0, 5
-; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    mtfprd 1, 5
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xscvsxdsp 1, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float>
@@ -7830,10 +7833,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i16(<2 x i16> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_uitofp_v2f64_v2i16:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI173_0 at toc@ha
-; PC64LE9-NEXT:    xxlxor 36, 36, 36
+; PC64LE9-NEXT:    xxlxor 0, 0, 0
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI173_0 at toc@l
-; PC64LE9-NEXT:    lxv 35, 0(3)
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 0, 1
 ; PC64LE9-NEXT:    xvcvuxddp 34, 34
 ; PC64LE9-NEXT:    blr
 entry:
@@ -8045,14 +8048,15 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE9-NEXT:    xscvuxdsp 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI179_0 at toc@l
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    lxv 36, 0(3)
-; PC64LE9-NEXT:    mfvsrwz 3, 34
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxmrghw 35, 1, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
-; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 0
-; PC64LE9-NEXT:    vperm 2, 2, 3, 4
+; PC64LE9-NEXT:    xxmrghw 0, 1, 0
+; PC64LE9-NEXT:    lxv 1, 0(3)
+; PC64LE9-NEXT:    mfvsrwz 3, 34
+; PC64LE9-NEXT:    mtfprwz 2, 3
+; PC64LE9-NEXT:    xscvuxdsp 2, 2
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxperm 0, 2, 1
+; PC64LE9-NEXT:    xxlor 34, 0, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float>
@@ -8118,14 +8122,14 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
 ; PC64LE9-NEXT:    xscvuxdsp 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI181_0 at toc@l
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xxmrghw 34, 1, 0
-; PC64LE9-NEXT:    mtfprd 0, 5
-; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    mtfprd 1, 5
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xscvuxdsp 1, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <3 x float>


        


More information about the llvm-commits mailing list