[llvm] [PowerPC] Fix vector_shuffle combines when inputs are scalar_to_vector of differing types. (PR #80784)

Amy Kwan via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 5 08:51:03 PDT 2024


https://github.com/amy-kwan updated https://github.com/llvm/llvm-project/pull/80784

>From 4ac7274c83b54110c9c4bcc4600ec262c72fdc34 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Fri, 2 Feb 2024 10:27:28 -0600
Subject: [PATCH 1/2] [PowerPC] Fix vector_shuffle combines when inputs are
 scalar_to_vector of differing types.

This patch fixes the combines for vector_shuffles when either or both of its
left and right hand side inputs are scalar_to_vector nodes.

Previously, when both left and right side inputs are scalar_to_vector nodes,
the current combine could not handle this situation, as the shuffle mask was
updated incorrectly. To temporarily solve this solution, this combine was
simply disabled and not performed.

Now, not only does this patch aim to resolve the previous issue of the incorrect
shuffle mask adjustments respectively, but it also updates any test cases
that are affected by this change.

Patch migrated from https://reviews.llvm.org/D130487.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 109 +++--
 .../PowerPC/p8-scalar_vector_conversions.ll   |   6 +-
 .../PowerPC/v16i8_scalar_to_vector_shuffle.ll | 459 +++++++++---------
 .../PowerPC/v2i64_scalar_to_vector_shuffle.ll | 434 +++++++----------
 .../PowerPC/v4i32_scalar_to_vector_shuffle.ll | 418 ++++++++--------
 .../PowerPC/v8i16_scalar_to_vector_shuffle.ll | 351 ++++++--------
 6 files changed, 811 insertions(+), 966 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 51becf1d5b8584..5df905a5f03b14 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15238,16 +15238,18 @@ static SDValue isScalarToVec(SDValue Op) {
 // On little endian, that's just the corresponding element in the other
 // half of the vector. On big endian, it is in the same half but right
 // justified rather than left justified in that half.
-static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
-                                            int LHSMaxIdx, int RHSMinIdx,
-                                            int RHSMaxIdx, int HalfVec,
-                                            unsigned ValidLaneWidth,
-                                            const PPCSubtarget &Subtarget) {
+static void fixupShuffleMaskForPermutedSToV(
+    SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
+    int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
+    unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
   for (int i = 0, e = ShuffV.size(); i < e; i++) {
     int Idx = ShuffV[i];
-    if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
+    if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
       ShuffV[i] +=
-          Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
+          Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
+    if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
+      ShuffV[i] +=
+          Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
   }
 }
 
@@ -15286,6 +15288,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
                      OrigSToV.getOperand(0));
 }
 
+static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
+                                 int HalfVec, int LHSLastElementDefined,
+                                 int RHSLastElementDefined) {
+  for (int i : seq<int>(0, ShuffV.size())) {
+    int Index = ShuffV[i];
+    if (Index < 0) // Skip explicitly undefined mask indices.
+      continue;
+    // Handle first input vector of the vector_shuffle.
+    if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
+        (Index > LHSLastElementDefined))
+      return false;
+    // Handle second input vector of the vector_shuffle.
+    if ((RHSLastElementDefined >= 0) &&
+        (Index > HalfVec + RHSLastElementDefined))
+      return false;
+  }
+  return true;
+}
+
 // On little endian subtargets, combine shuffles such as:
 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
 // into:
@@ -15333,36 +15354,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
   SDValue SToVLHS = isScalarToVec(LHS);
   SDValue SToVRHS = isScalarToVec(RHS);
   if (SToVLHS || SToVRHS) {
-    // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
-    // same type and have differing element sizes, then do not perform
-    // the following transformation. The current transformation for
-    // SCALAR_TO_VECTOR assumes that both input vectors have the same
-    // element size. This will be updated in the future to account for
-    // differing sizes of the LHS and RHS.
-    if (SToVLHS && SToVRHS &&
-        (SToVLHS.getValueType().getScalarSizeInBits() !=
-         SToVRHS.getValueType().getScalarSizeInBits()))
-      return Res;
-
-    int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
-                            : SToVRHS.getValueType().getVectorNumElements();
-    int NumEltsOut = ShuffV.size();
+    EVT VT = SVN->getValueType(0);
+    uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
+    int ShuffleNumElts = ShuffV.size();
+    int HalfVec = ShuffleNumElts / 2;
     // The width of the "valid lane" (i.e. the lane that contains the value that
     // is vectorized) needs to be expressed in terms of the number of elements
     // of the shuffle. It is thereby the ratio of the values before and after
-    // any bitcast.
-    unsigned ValidLaneWidth =
-        SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
-                      LHS.getValueType().getScalarSizeInBits()
-                : SToVRHS.getValueType().getScalarSizeInBits() /
-                      RHS.getValueType().getScalarSizeInBits();
+    // any bitcast, which will be set later on if the LHS or RHS are
+    // SCALAR_TO_VECTOR nodes.
+    unsigned LHSNumValidElts = HalfVec;
+    unsigned RHSNumValidElts = HalfVec;
 
     // Initially assume that neither input is permuted. These will be adjusted
-    // accordingly if either input is.
-    int LHSMaxIdx = -1;
-    int RHSMinIdx = -1;
-    int RHSMaxIdx = -1;
-    int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
+    // accordingly if either input is. Note, that -1 means that all elements
+    // are undefined.
+    int LHSFirstElt = 0;
+    int RHSFirstElt = ShuffleNumElts;
+    int LHSLastElt = -1;
+    int RHSLastElt = -1;
 
     // Get the permuted scalar to vector nodes for the source(s) that come from
     // ISD::SCALAR_TO_VECTOR.
@@ -15371,33 +15381,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
     // the value into element zero. Since scalar size of LHS and RHS may differ
     // after isScalarToVec, this should be checked using their own sizes.
     if (SToVLHS) {
-      if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
+      int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
+      if (!IsLittleEndian && LHSScalarSize >= 64)
         return Res;
       // Set up the values for the shuffle vector fixup.
-      LHSMaxIdx = NumEltsOut / NumEltsIn;
+      LHSNumValidElts =
+          LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
+      // The last element that comes from the LHS. For example:
+      // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
+      // The last element that comes from the LHS is actually 0, not 3
+      // because elements 1 and higher of a scalar_to_vector are undefined.
+      LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
       SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
       if (SToVLHS.getValueType() != LHS.getValueType())
         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
       LHS = SToVLHS;
     }
     if (SToVRHS) {
-      if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
+      int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
+      if (!IsLittleEndian && RHSScalarSize >= 64)
         return Res;
-      RHSMinIdx = NumEltsOut;
-      RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
+      RHSNumValidElts =
+          RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
+      // The last element that comes from the RHS. For example:
+      // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
+      // The last element that comes from the RHS is actually 5, not 7
+      // because elements 1 and higher of a scalar_to_vector are undefined.
+      // It is also not 4 because the original scalar_to_vector is wider and
+      // actually contains two i32 elements.
+      RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
       SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
       if (SToVRHS.getValueType() != RHS.getValueType())
         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
       RHS = SToVRHS;
     }
 
+    if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
+      return Res;
+
     // Fix up the shuffle mask to reflect where the desired element actually is.
     // The minimum and maximum indices that correspond to element zero for both
     // the LHS and RHS are computed and will control which shuffle mask entries
     // are to be changed. For example, if the RHS is permuted, any shuffle mask
-    // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
-    fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
-                                    HalfVec, ValidLaneWidth, Subtarget);
+    // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
+    fixupShuffleMaskForPermutedSToV(
+        ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
+        LHSNumValidElts, RHSNumValidElts, Subtarget);
     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
 
     // We may have simplified away the shuffle. We won't be able to do anything
diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
index 87b8a64cc67bdf..506645669928ae 100644
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -2505,11 +2505,9 @@ define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
 ;
 ; CHECK-LE-LABEL: buildi2:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mtfprd f0, r4
+; CHECK-LE-NEXT:    mtfprwz f0, r4
 ; CHECK-LE-NEXT:    mtfprd f1, r3
-; CHECK-LE-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-NEXT:    xxswapd v2, vs1
-; CHECK-LE-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-AIX-LABEL: buildi2:
diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 11cc8abd2c7fa3..aab754d4d655d9 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -266,56 +266,54 @@ entry:
 define <16 x i8> @test_v16i8_v8i16(i16 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P9-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
@@ -348,56 +346,54 @@ entry:
 define <16 x i8> @test_v8i16_v16i8(i16 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P9-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
@@ -472,7 +468,7 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C5(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
@@ -481,7 +477,7 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C4(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
@@ -545,7 +541,7 @@ define <8 x i16> @test_v8i16_none(<8 x i16> %a, i16 %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r4, L..C6(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
@@ -580,53 +576,54 @@ entry:
 define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %b) {
 ; CHECK-LE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-LE-P8-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    mtvsrws v3, r4
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P9-NEXT:    mtvsrwz v3, r4
+; CHECK-LE-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 32
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P9-NEXT:    mtvsrws v3, r4
-; CHECK-BE-P9-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P9-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghb v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT:    mtvsrws v3, r4
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P9-NEXT:    vmrghb v2, v2, v3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
@@ -660,53 +657,54 @@ entry:
 define <16 x i8> @test_v4i32_v16i8(i32 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    mtvsrws v3, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-LE-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI8_0 at toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI8_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P9-NEXT:    mtvsrws v3, r3
-; CHECK-BE-P9-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI8_0 at toc@ha
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI8_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P9-NEXT:    mtvsrws v3, r3
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C6(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
@@ -781,9 +779,9 @@ define <4 x i32> @test_none_v4i32(<4 x i32> %a, i64 %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C5(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r4, L..C9(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C6(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C10(r2) # %const.1
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
@@ -875,7 +873,7 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lbzx r4, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C11(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vspltb v2, v2, 7
@@ -885,7 +883,7 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C7(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxsibzx v2, 0, r4
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    vspltb v2, v2, 7
@@ -928,20 +926,16 @@ entry:
 define <16 x i8> @test_v16i8_v2i64(i8 %arg, i64 %arg1, <16 x i8> %a, <2 x i64> %b) {
 ; CHECK-LE-P8-LABEL: test_v16i8_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P8-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v2i64:
@@ -1007,20 +1001,16 @@ entry:
 define <16 x i8> @test_v2i64_v16i8(i64 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v16i8:
@@ -1392,7 +1382,7 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C12(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r5
@@ -1401,7 +1391,7 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    ld r5, L..C4(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r5, L..C8(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r3
 ; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r5)
@@ -1439,53 +1429,54 @@ entry:
 define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r7
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r8
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r7
+; CHECK-LE-P8-NEXT:    mtvsrwz v3, r8
+; CHECK-LE-P8-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r7
-; CHECK-LE-P9-NEXT:    mtvsrws v3, r8
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r7
+; CHECK-LE-P9-NEXT:    mtvsrwz v3, r8
+; CHECK-LE-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r7, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r8, 32
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI17_0 at toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r7
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r8
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI17_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r7, 48
-; CHECK-BE-P9-NEXT:    mtvsrws v3, r8
-; CHECK-BE-P9-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P9-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI17_0 at toc@ha
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r7
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r8
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI17_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghb v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C13(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT:    mtvsrws v3, r4
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P9-NEXT:    vmrghb v2, v2, v3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C9(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
@@ -1519,20 +1510,16 @@ entry:
 define <16 x i8> @test_v8i16_v2i64(<8 x i16> %a, <2 x i64> %b, i16 %arg, i64 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r7
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r8
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r7
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r8
+; CHECK-LE-P8-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r7
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r8
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r7
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r8
+; CHECK-LE-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
@@ -1669,53 +1656,54 @@ entry:
 define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    mtvsrws v2, r3
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI20_0 at toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI20_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    mtvsrws v2, r3
-; CHECK-BE-P9-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI20_0 at toc@ha
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI20_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C14(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    mtvsrws v2, r3
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C10(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
@@ -1824,18 +1812,15 @@ define <16 x i8> @test_v2i64_v4i32(i64 %arg, i32 %arg1, <2 x i64> %a, <4 x i32>
 ; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrglw v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxmrglw v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
@@ -1899,20 +1884,16 @@ entry:
 define <16 x i8> @test_v2i64_v8i16(i64 %arg, i16 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
@@ -1978,27 +1959,23 @@ entry:
 define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI24_0 at toc@ha
-; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI24_0 at toc@l
-; CHECK-LE-P8-NEXT:    xxswapd v2, f0
-; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI24_0 at toc@ha
+; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-LE-P8-NEXT:    lxsdx v4, 0, r4
+; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI24_0 at toc@l
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
+; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI24_0 at toc@ha
-; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI24_0 at toc@l
-; CHECK-LE-P9-NEXT:    xxswapd v2, f0
 ; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT:    xxswapd v3, f0
-; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
-; CHECK-LE-P9-NEXT:    xxperm v2, v3, vs0
+; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI24_0 at toc@l
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
@@ -2026,7 +2003,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C9(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C15(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
@@ -2036,7 +2013,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C11(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    xxsldwi vs0, f0, f0, 1
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
index 8bb71e073e8146..7388d4a5672098 100644
--- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
@@ -30,42 +30,42 @@ define <2 x i64> @test_v16i8_v16i8(i8 %arg1, i8 %arg) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8:
@@ -102,42 +102,42 @@ define <2 x i64> @test_none_v16i8(i8 %arg1, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_none_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_none_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_none_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v16i8:
@@ -170,42 +170,42 @@ define <2 x i64> @test_v16i8_none(i8 %arg1, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_none:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_none:
@@ -237,54 +237,42 @@ define <2 x i64> @test_v16i8_v8i16(i8 %arg1, i16 %arg) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P9-NEXT:    mtfprd f0, r3
-; CHECK-BE-P9-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P9-NEXT:    mtfprd f1, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
@@ -320,54 +308,42 @@ define <2 x i64> @test_v8i16_v16i8(i8 %arg1, i16 %arg) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P9-NEXT:    mtfprd f0, r3
-; CHECK-BE-P9-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P9-NEXT:    mtfprd f1, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
@@ -404,42 +380,42 @@ define <2 x i64> @test_v8i16_none(i16 %arg1, ptr nocapture noundef readonly %b)
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_none:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
@@ -472,42 +448,42 @@ define <2 x i64> @test_none_v8i16(i16 %arg1, ptr nocapture noundef readonly %b)
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_none_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_none_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_none_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
@@ -538,52 +514,43 @@ define <2 x i64> @test_v16i8_v4i32(i8 %arg1, i32 %arg) {
 ; CHECK-LE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 32
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-BE-P9-NEXT:    mtfprd f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
@@ -618,52 +585,43 @@ define <2 x i64> @test_v4i32_v16i8(i8 %arg1, i32 %arg) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 32
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-BE-P9-NEXT:    mtfprd f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
@@ -700,42 +658,42 @@ define <2 x i64> @test_none_v4i32(i32 %arg1, ptr nocapture noundef readonly %b)
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_none_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_none_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_none_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
@@ -768,42 +726,42 @@ define <2 x i64> @test_v4i32_none(i32 %arg1, ptr nocapture noundef readonly %b)
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_none:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
@@ -835,18 +793,14 @@ define <2 x i64> @test_v16i8_v2i64(i8 %arg1, i64 %arg) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v2i64:
@@ -923,18 +877,14 @@ define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v16i8:
@@ -942,14 +892,14 @@ define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) {
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r4
 ; CHECK-BE-P8-NEXT:    xxspltd v2, vs0, 0
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v2i64_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    mtvsrdd v2, r4, r4
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8:
@@ -957,14 +907,14 @@ define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) {
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r4
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs0
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P9-NEXT:    mtvsrdd v2, r4, r4
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8:
@@ -1159,42 +1109,42 @@ define <2 x i64> @test_v8i16_v8i16(i16 %arg1, i16 %arg) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
@@ -1229,52 +1179,43 @@ define <2 x i64> @test_v8i16_v4i32(i16 %arg1, i32 %arg) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 32
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-BE-P9-NEXT:    mtfprd f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
@@ -1310,18 +1251,14 @@ define <2 x i64> @test_v8i16_v2i64(i16 %arg1, i64 %arg) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
@@ -1398,42 +1335,42 @@ define <2 x i64> @test_v4i32_v4i32(i32 %arg1, i32 %arg) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
@@ -1467,53 +1404,44 @@ entry:
 define <2 x i64> @test_v4i32_v8i16(i32 %arg1, i16 %arg) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
+; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    mtvsrws vs0, r3
-; CHECK-LE-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    mtvsrws vs0, r3
-; CHECK-BE-P9-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P9-NEXT:    mtfprd f1, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    mtvsrws vs0, r3
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
@@ -1547,19 +1475,16 @@ entry:
 define <2 x i64> @test_v4i32_v2i64(i32 %arg1, i64 %arg) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
+; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    mtvsrws vs0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
@@ -1744,18 +1669,15 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtvsrws vs0, r4
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
@@ -1763,14 +1685,14 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) {
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-BE-P8-NEXT:    xxspltd v2, vs0, 0
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r4
 ; CHECK-BE-P9-NEXT:    mtvsrdd v2, r3, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
@@ -1778,14 +1700,14 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) {
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs0
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
 ; CHECK-AIX-64-P9-NEXT:    mtvsrdd v2, r3, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
@@ -1827,19 +1749,15 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    mtfprd f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    mtfprd f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
@@ -1847,14 +1765,14 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) {
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-BE-P8-NEXT:    xxspltd v2, vs0, 0
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r4
 ; CHECK-BE-P9-NEXT:    mtvsrdd v2, r3, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
@@ -1862,14 +1780,14 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) {
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs0
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
 ; CHECK-AIX-64-P9-NEXT:    mtvsrdd v2, r3, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
index 4ca55d276647bf..424a6dc7446a44 100644
--- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -28,15 +28,11 @@
 define void @test_none_v8i16(ptr %a) {
 ; CHECK-LE-P8-LABEL: test_none_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxsdx v4, 0, r3
-; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v4, v2
-; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
+; CHECK-LE-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stfdx f0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -44,11 +40,8 @@ define void @test_none_v8i16(ptr %a) {
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-LE-P9-NEXT:    lfd f1, 0(r3)
-; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv vs2, 0(r3)
-; CHECK-LE-P9-NEXT:    xxperm vs1, vs0, vs2
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs1
+; CHECK-LE-P9-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P9-NEXT:    stfd f0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -131,11 +124,15 @@ define void @test_v8i16_none(ptr %a) {
 ; CHECK-LE-P8-LABEL: test_v8i16_none:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -143,59 +140,76 @@ define void @test_v8i16_none(ptr %a) {
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs1, vs0
+; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-LE-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm vs0, vs1, vs2
 ; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-BE-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-BE-P8-NEXT:    mtfprwz f0, r4
-; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-BE-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C0(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C0(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-AIX-64-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    mtfprwz f0, r4
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C0(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_none:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C0(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-AIX-32-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <2 x i8>, ptr undef, align 1
@@ -264,7 +278,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-AIX-64-P8-NEXT:    mffprwz r4, f0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C0(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r4, L..C1(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
@@ -275,7 +289,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-64-P9-NEXT:    li r4, 0
 ; CHECK-AIX-64-P9-NEXT:    vextuwlx r4, r4, v2
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
-; CHECK-AIX-64-P9-NEXT:    ld r4, L..C0(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r4, L..C1(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    xxperm vs0, v2, vs1
 ; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
@@ -286,7 +300,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C0(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C1(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
@@ -297,7 +311,7 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-32-P9-NEXT:    addi r4, r1, -16
 ; CHECK-AIX-32-P9-NEXT:    stxv v2, -16(r1)
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lwz r4, L..C0(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r4, L..C1(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r4)
 ; CHECK-AIX-32-P9-NEXT:    xxperm vs0, v2, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
@@ -369,7 +383,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-AIX-64-P8-NEXT:    mffprwz r4, f0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C1(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r4, L..C2(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
@@ -380,7 +394,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-64-P9-NEXT:    li r4, 0
 ; CHECK-AIX-64-P9-NEXT:    vextuwlx r4, r4, v2
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
-; CHECK-AIX-64-P9-NEXT:    ld r4, L..C1(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r4, L..C2(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
@@ -391,7 +405,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C1(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C2(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
@@ -402,7 +416,7 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-32-P9-NEXT:    addi r4, r1, -16
 ; CHECK-AIX-32-P9-NEXT:    stxv v2, -16(r1)
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lwz r4, L..C1(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r4, L..C2(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r4)
 ; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
@@ -474,12 +488,12 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C2(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C3(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C3(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C4(r2) # %const.1
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
@@ -487,7 +501,7 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
 ; CHECK-AIX-64-P9-NEXT:    xxinsertw v2, vs1, 0
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
@@ -497,12 +511,12 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v5, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.1
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C4(r2) # %const.1
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v5, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
@@ -511,7 +525,7 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C3(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    mtfprwz f1, r4
 ; CHECK-AIX-32-P9-NEXT:    xxinsertw v2, vs1, 0
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
@@ -595,7 +609,7 @@ define void @test_v8i16_v8i16(ptr %a) {
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r4
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r3
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -604,7 +618,7 @@ define void @test_v8i16_v8i16(ptr %a) {
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-LE-P9-NEXT:    lxsihzx f1, 0, r3
-; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -614,7 +628,7 @@ define void @test_v8i16_v8i16(ptr %a) {
 ; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f1, r3
-; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT:    xxmrglw vs0, vs0, vs1
 ; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -622,7 +636,7 @@ define void @test_v8i16_v8i16(ptr %a) {
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-BE-P9-NEXT:    lxsihzx f1, 0, r3
-; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-BE-P9-NEXT:    xxmrglw vs0, vs0, vs1
 ; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;
@@ -632,7 +646,7 @@ define void @test_v8i16_v8i16(ptr %a) {
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    xxmrglw vs0, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -640,7 +654,7 @@ define void @test_v8i16_v8i16(ptr %a) {
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-AIX-64-P9-NEXT:    lxsihzx f1, 0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    xxmrglw vs0, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
@@ -650,7 +664,7 @@ define void @test_v8i16_v8i16(ptr %a) {
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-AIX-32-P8-NEXT:    mtfprwz f1, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT:    xxmrglw vs0, vs0, vs1
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -658,7 +672,7 @@ define void @test_v8i16_v8i16(ptr %a) {
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-AIX-32-P9-NEXT:    lxsihzx f1, 0, r3
-; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P9-NEXT:    xxmrglw vs0, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -681,89 +695,82 @@ define void @test_v8i16_v4i32(ptr %a) {
 ; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
 ; CHECK-LE-P8-NEXT:    lfiwzx f1, 0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs0, v2
+; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT:    lfiwzx f1, 0, r3
+; CHECK-LE-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxsldwi vs0, f0, f0, 1
-; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
-; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-BE-P9-NEXT:    xxsldwi vs0, f0, f0, 1
-; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-BE-P9-NEXT:    xxmrghw vs0, v2, vs0
-; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-BE-P9-NEXT:    lfiwzx f1, 0, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-BE-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, f0, f0, 1
-; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
-; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT:    xxsldwi vs0, f0, f0, 1
-; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, v2, vs0
-; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT:    lfiwzx f1, 0, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C4(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-AIX-64-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C5(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C4(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-AIX-32-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <2 x i8>, ptr undef, align 1
@@ -784,20 +791,16 @@ define void @test_v8i16_v2i64(ptr %a) {
 ; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
 ; CHECK-LE-P8-NEXT:    lfdx f1, 0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
-; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs0, v2
+; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT:    lfd f1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -842,27 +845,22 @@ define void @test_v8i16_v2i64(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C6(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm vs1, vs0, vs2
+; CHECK-AIX-32-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <2 x i8>, ptr undef, align 1
@@ -922,7 +920,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C6(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
@@ -931,7 +929,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r4
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
@@ -940,7 +938,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C4(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C7(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
@@ -949,7 +947,7 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C3(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C6(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
@@ -968,88 +966,81 @@ define void @test_v4i32_v8i16(ptr %a) {
 ; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
 ; CHECK-LE-P8-NEXT:    lfiwzx f1, 0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT:    xxmrglw vs0, v2, vs0
+; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT:    lfiwzx f1, 0, r3
+; CHECK-LE-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxsldwi vs0, f0, f0, 1
-; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-BE-P9-NEXT:    xxsldwi vs0, f0, f0, 1
-; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-BE-P9-NEXT:    xxmrghw vs0, vs0, v2
+; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-BE-P9-NEXT:    lfiwzx f1, 0, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm vs0, vs1, vs2
 ; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, f0, f0, 1
-; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT:    xxsldwi vs0, f0, f0, 1
-; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-AIX-64-P9-NEXT:    xxmrghw vs0, vs0, v2
+; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT:    lfiwzx f1, 0, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C6(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm vs0, vs1, vs2
 ; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
-; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C7(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm vs0, vs1, vs2
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
@@ -1070,9 +1061,7 @@ define void @test_v4i32_v2i64(ptr %a) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lfdx f0, 0, r3
 ; CHECK-LE-P8-NEXT:    lfiwzx f1, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -1081,9 +1070,7 @@ define void @test_v4i32_v2i64(ptr %a) {
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-LE-P9-NEXT:    lfiwzx f1, 0, r3
-; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P9-NEXT:    xxswapd vs1, f1
-; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs0, vs1
+; CHECK-LE-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -1253,9 +1240,7 @@ define void @test_v2i64_v4i32(ptr %a) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lfdx f0, 0, r3
 ; CHECK-LE-P8-NEXT:    lfiwzx f1, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -1264,9 +1249,7 @@ define void @test_v2i64_v4i32(ptr %a) {
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-LE-P9-NEXT:    lfiwzx f1, 0, r3
-; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P9-NEXT:    xxswapd vs1, f1
-; CHECK-LE-P9-NEXT:    xxmrglw vs0, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -1349,20 +1332,16 @@ define void @test_v2i64_v8i16(ptr %a) {
 ; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
 ; CHECK-LE-P8-NEXT:    lfdx f1, 0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
-; CHECK-LE-P9-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT:    xxmrglw vs0, v2, vs0
+; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT:    lfd f1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -1407,26 +1386,21 @@ define void @test_v2i64_v8i16(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
-; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f1, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm vs0, vs1, vs2
 ; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
index 201bc5be545068..bc756f07491d4a 100644
--- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -241,16 +241,13 @@ entry:
 define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 {
 ; CHECK-LE-P8-LABEL: test_none_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
+; CHECK-LE-P8-NEXT:    mtfprd f0, r5
 ; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P8-NEXT:    mtvsrd v3, r5
 ; CHECK-LE-P8-NEXT:    lxsiwzx v4, 0, r3
 ; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
-; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v2
 ; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
@@ -261,13 +258,11 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P9-NEXT:    mtvsrd v3, r5
+; CHECK-LE-P9-NEXT:    mtfprd f0, r5
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
-; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_1 at toc@ha
-; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    xxperm v3, v3, vs0
+; CHECK-LE-P9-NEXT:    vmrglh v3, v3, v3
 ; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P9-NEXT:    stfd f0, 0(r3)
@@ -275,15 +270,13 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ;
 ; CHECK-BE-P8-LABEL: test_none_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-P8-NEXT:    mtvsrwz v2, r5
-; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-BE-P8-NEXT:    vperm v2, v2, v2, v3
+; CHECK-BE-P8-NEXT:    sldi r4, r5, 56
 ; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r3
-; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI2_1 at toc@ha
-; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
 ; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v2
 ; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    stxsdx v2, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
@@ -291,27 +284,24 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ; CHECK-BE-P9-LABEL: test_none_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-BE-P9-NEXT:    sldi r3, r5, 56
+; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-P9-NEXT:    mtvsrwz v3, r5
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_1 at toc@ha
-; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-BE-P9-NEXT:    vmrghh v3, v3, v3
 ; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT:    xxperm v3, v3, vs0
 ; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P9-NEXT:    stxsd v2, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C3(r2) # %const.0
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r5
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    sldi r4, r5, 56
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C4(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v2
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    stxsdx v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
@@ -319,12 +309,11 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT:    sldi r3, r5, 56
+; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r5
-; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.1
+; CHECK-AIX-64-P9-NEXT:    vmrghh v3, v3, v3
 ; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    xxperm v3, v3, vs0
 ; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P9-NEXT:    stxsd v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
@@ -421,7 +410,7 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C4(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
@@ -431,7 +420,7 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C4(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    xxperm vs0, vs2, vs1
@@ -531,7 +520,7 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C6(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C5(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
@@ -543,7 +532,7 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C4(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    xxlxor v3, v3, v3
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
@@ -635,7 +624,7 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C6(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
@@ -645,7 +634,7 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C6(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    xxperm vs0, vs2, vs1
@@ -745,7 +734,7 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r4)
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C7(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
@@ -753,7 +742,7 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C6(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r4
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
@@ -790,82 +779,75 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v3, f0
-; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r4
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
-; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r4
-; CHECK-BE-P9-NEXT:    xxsldwi v3, f0, f0, 1
-; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r4
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r4
-; CHECK-AIX-64-P9-NEXT:    xxsldwi v3, f0, f0, 1
-; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C7(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <2 x i8>, ptr %a
@@ -884,20 +866,16 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT:    xxswapd v3, f0
-; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT:    lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
@@ -937,25 +915,20 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <2 x i8>, ptr %a
@@ -1059,7 +1032,7 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C10(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
@@ -1071,7 +1044,7 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C7(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C9(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    xxlxor v3, v3, v3
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
@@ -1092,82 +1065,75 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v3, f0
-; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-LE-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r4
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
-; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
 ; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r4
-; CHECK-BE-P9-NEXT:    xxsldwi v3, f0, f0, 1
-; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r4
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C10(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C9(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r4
-; CHECK-AIX-64-P9-NEXT:    xxsldwi v3, f0, f0, 1
-; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C11(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT:    lxsihzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C10(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <2 x i8>, ptr %a
@@ -1185,20 +1151,16 @@ entry:
 define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, f0
-; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, f0
-; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT:    xxswapd v3, f0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT:    lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
@@ -1236,7 +1198,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C12(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
@@ -1245,7 +1207,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C11(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
@@ -1328,7 +1290,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C10(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C11(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
@@ -1340,7 +1302,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C9(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C10(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    xxlxor v3, v3, v3
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
@@ -1352,7 +1314,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C10(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C13(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
@@ -1364,7 +1326,7 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C12(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    xxlxor v3, v3, v3
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
@@ -1384,20 +1346,16 @@ entry:
 define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, f0
-; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, f0
-; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT:    xxswapd v3, f0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT:    lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
@@ -1435,7 +1393,7 @@ define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C11(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C14(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
@@ -1444,7 +1402,7 @@ define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C10(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C13(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
@@ -1466,20 +1424,16 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
-; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT:    xxswapd v3, f0
-; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
-; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT:    lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
@@ -1519,25 +1473,20 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C15(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT:    lxsihzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C14(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
+; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <2 x i8>, ptr %a

>From 6df51c468874fd140f202c56c1a19264aa264181 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Fri, 5 Apr 2024 10:50:47 -0500
Subject: [PATCH 2/2] Address coding guideline comment

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5df905a5f03b14..88cb61b4179c58 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15242,13 +15242,13 @@ static void fixupShuffleMaskForPermutedSToV(
     SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
     int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
     unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
-  for (int i = 0, e = ShuffV.size(); i < e; i++) {
-    int Idx = ShuffV[i];
+  for (int I = 0, E = ShuffV.size(); I < E; ++I) {
+    int Idx = ShuffV[I];
     if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
-      ShuffV[i] +=
+      ShuffV[I] +=
           Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
     if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
-      ShuffV[i] +=
+      ShuffV[I] +=
           Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
   }
 }
@@ -15291,8 +15291,8 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
 static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
                                  int HalfVec, int LHSLastElementDefined,
                                  int RHSLastElementDefined) {
-  for (int i : seq<int>(0, ShuffV.size())) {
-    int Index = ShuffV[i];
+  for (int I : seq<int>(0, ShuffV.size())) {
+    int Index = ShuffV[I];
     if (Index < 0) // Skip explicitly undefined mask indices.
       continue;
     // Handle first input vector of the vector_shuffle.



More information about the llvm-commits mailing list