[llvm] [PowerPC] Use rldimi/rlwimi to optimize build_vector (PR #67640)

Qiu Chaofan via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 28 01:05:08 PDT 2023


https://github.com/ecnelises created https://github.com/llvm/llvm-project/pull/67640

Leverage rldimi/rlwimi instructions to generate better code for BUILD_VECTOR:

- For v16i8, four groups of `(i8 << 24) | (i8 << 16) | (i8 << 8) | i8` to construct a vector.
- For v8i16, four groups of `(i16 << 16) | i16` to construct a vector.

We already have patterns for v4i32 and v2i64 construction.

Migrated from https://reviews.llvm.org/D94467

>From 7d899942d50976d298b774a792e5f7b6fff9a887 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Thu, 28 Sep 2023 16:02:28 +0800
Subject: [PATCH] [PowerPC] Use rldimi/rlwimi to optimize build_vector

Leverage rldimi/rlwimi instructions to generate better code for BUILD_VECTOR:

- For v16i8, four groups of (i8 << 24) | (i8 << 16) | (i8 << 8) | i8 to construct a vector.
- For v8i16, four groups of (i16 << 16) | i16 to construct a vector.

We already have patterns for v4i32 and v2i64 construction.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |   50 +
 llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll   |  869 ++++----
 llvm/test/CodeGen/PowerPC/pr25080.ll          |  123 +-
 llvm/test/CodeGen/PowerPC/pre-inc-disable.ll  |  175 +-
 llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll  | 1606 ++++++++-------
 llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll  |  978 +++++----
 .../PowerPC/v16i8_scalar_to_vector_shuffle.ll |  177 +-
 llvm/test/CodeGen/PowerPC/vec-promote.ll      |   50 +-
 llvm/test/CodeGen/PowerPC/vec-trunc2.ll       |   72 +-
 .../PowerPC/vec_conv_fp32_to_i16_elts.ll      | 1747 ++++++++---------
 .../PowerPC/vec_conv_fp32_to_i8_elts.ll       | 1587 +++++++--------
 .../PowerPC/vec_conv_fp64_to_i16_elts.ll      | 1391 ++++++-------
 .../PowerPC/vec_conv_fp64_to_i8_elts.ll       | 1293 ++++++------
 llvm/test/CodeGen/PowerPC/vec_int_ext.ll      |  113 +-
 14 files changed, 4652 insertions(+), 5579 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 740e5e2ff4b4ccb..4b20e1a24aa57e6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9276,6 +9276,49 @@ bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
   return (!LosesInfo && !APFloatToConvert.isDenormal());
 }
 
+// Use rldimi/rlwimi to construct vectors:
+//   i32 = (i8 << 24) | (i8 << 16) | (i8 << 8) | i
+//   i32 = (i16 << 16) | i16
+//   i64 = (i32 << 32) | i32
+// And put two i64 together to get a vector.
+static SDValue tryMaskInsertVector(SDValue Op, SelectionDAG &DAG,
+                                   bool LittleEndian) {
+  EVT VT = Op.getValueType();
+  SDLoc dl(Op);
+
+  // There are already patterns for v4i32 and v2i64 construction.
+  if (VT == MVT::v16i8 || VT == MVT::v8i16) {
+    int NumElt = VT.getVectorNumElements();
+    int ScalarSize = VT.getScalarSizeInBits();
+    int EltsFor32 = NumElt / 4;
+    SDValue NewVecElts[4];
+    SDValue Parts[4];
+    for (int i = 0; i < 4; ++i) {
+      for (int j = 0; j < EltsFor32; ++j) {
+        SDValue Elt = LittleEndian
+                          ? Op.getOperand(i * EltsFor32 + EltsFor32 - j - 1)
+                          : Op.getOperand(i * EltsFor32 + j);
+        Parts[j] = DAG.getZExtOrTrunc(Elt, dl, MVT::i32);
+
+        // Left-shift elements to insert, except the last, because offset is 0.
+        if (j != EltsFor32 - 1)
+          Parts[j] =
+              DAG.getNode(ISD::SHL, dl, MVT::i32, Parts[j],
+                          DAG.getTargetConstant(
+                              ScalarSize * (EltsFor32 - j - 1), dl, MVT::i32));
+        if (j > 0)
+          Parts[j] = DAG.getNode(ISD::OR, dl, MVT::i32, Parts[j - 1], Parts[j]);
+      }
+      NewVecElts[i] = Parts[EltsFor32 - 1];
+    }
+
+    // Count on v4i32 to get optimized BUILD_VECTOR pattern.
+    return DAG.getBitcast(VT, DAG.getBuildVector(MVT::v4i32, dl, NewVecElts));
+  }
+
+  return SDValue();
+}
+
 static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
                              unsigned &Opcode) {
   LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
@@ -9457,6 +9500,13 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
                                         Subtarget.hasP8Vector()))
       return Op;
+
+    // Try to construct vector using masked insert.
+    if (!BVN->isConstant() && !DAG.isSplatValue(Op, true))
+      if (SDValue Res =
+              tryMaskInsertVector(Op, DAG, Subtarget.isLittleEndian()))
+        return Res;
+
     return SDValue();
   }
 
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index 7a6640fea2d1e42..8d3f5b66d516ea8 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -266,55 +266,65 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-LABEL: sub_absv_8_ext:
 ; CHECK-PWR9-LE:       # %bb.0: # %entry
 ; CHECK-PWR9-LE-NEXT:    li r3, 0
-; CHECK-PWR9-LE-NEXT:    li r5, 2
+; CHECK-PWR9-LE-NEXT:    li r7, 4
 ; CHECK-PWR9-LE-NEXT:    li r4, 1
+; CHECK-PWR9-LE-NEXT:    li r5, 2
 ; CHECK-PWR9-LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-PWR9-LE-NEXT:    vextubrx r6, r3, v2
-; CHECK-PWR9-LE-NEXT:    vextubrx r3, r3, v3
-; CHECK-PWR9-LE-NEXT:    vextubrx r8, r5, v2
-; CHECK-PWR9-LE-NEXT:    vextubrx r5, r5, v3
+; CHECK-PWR9-LE-NEXT:    li r6, 3
 ; CHECK-PWR9-LE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-PWR9-LE-NEXT:    vextubrx r8, r3, v2
+; CHECK-PWR9-LE-NEXT:    vextubrx r3, r3, v3
+; CHECK-PWR9-LE-NEXT:    vextubrx r9, r4, v2
 ; CHECK-PWR9-LE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-LE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-LE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-LE-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-PWR9-LE-NEXT:    clrlwi r6, r6, 24
-; CHECK-PWR9-LE-NEXT:    clrlwi r3, r3, 24
-; CHECK-PWR9-LE-NEXT:    clrlwi r8, r8, 24
-; CHECK-PWR9-LE-NEXT:    clrlwi r5, r5, 24
-; CHECK-PWR9-LE-NEXT:    vextubrx r7, r4, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r4, r4, v3
-; CHECK-PWR9-LE-NEXT:    sub r3, r6, r3
-; CHECK-PWR9-LE-NEXT:    sub r6, r8, r5
-; CHECK-PWR9-LE-NEXT:    clrlwi r7, r7, 24
+; CHECK-PWR9-LE-NEXT:    clrlwi r8, r8, 24
+; CHECK-PWR9-LE-NEXT:    clrlwi r3, r3, 24
+; CHECK-PWR9-LE-NEXT:    clrlwi r9, r9, 24
+; CHECK-PWR9-LE-NEXT:    vextubrx r10, r5, v2
+; CHECK-PWR9-LE-NEXT:    vextubrx r5, r5, v3
 ; CHECK-PWR9-LE-NEXT:    clrlwi r4, r4, 24
-; CHECK-PWR9-LE-NEXT:    sub r4, r7, r4
-; CHECK-PWR9-LE-NEXT:    srawi r5, r3, 31
-; CHECK-PWR9-LE-NEXT:    srawi r7, r4, 31
-; CHECK-PWR9-LE-NEXT:    xor r3, r3, r5
-; CHECK-PWR9-LE-NEXT:    xor r4, r4, r7
-; CHECK-PWR9-LE-NEXT:    sub r5, r3, r5
-; CHECK-PWR9-LE-NEXT:    srawi r3, r6, 31
-; CHECK-PWR9-LE-NEXT:    sub r4, r4, r7
-; CHECK-PWR9-LE-NEXT:    xor r6, r6, r3
-; CHECK-PWR9-LE-NEXT:    sub r3, r6, r3
-; CHECK-PWR9-LE-NEXT:    li r6, 3
-; CHECK-PWR9-LE-NEXT:    vextubrx r7, r6, v2
+; CHECK-PWR9-LE-NEXT:    vextubrx r11, r6, v2
+; CHECK-PWR9-LE-NEXT:    clrlwi r10, r10, 24
 ; CHECK-PWR9-LE-NEXT:    vextubrx r6, r6, v3
-; CHECK-PWR9-LE-NEXT:    clrlwi r7, r7, 24
+; CHECK-PWR9-LE-NEXT:    clrlwi r5, r5, 24
+; CHECK-PWR9-LE-NEXT:    clrlwi r11, r11, 24
+; CHECK-PWR9-LE-NEXT:    sub r3, r8, r3
+; CHECK-PWR9-LE-NEXT:    sub r4, r9, r4
+; CHECK-PWR9-LE-NEXT:    sub r5, r10, r5
 ; CHECK-PWR9-LE-NEXT:    clrlwi r6, r6, 24
-; CHECK-PWR9-LE-NEXT:    sub r6, r7, r6
-; CHECK-PWR9-LE-NEXT:    srawi r7, r6, 31
-; CHECK-PWR9-LE-NEXT:    xor r6, r6, r7
-; CHECK-PWR9-LE-NEXT:    sub r6, r6, r7
-; CHECK-PWR9-LE-NEXT:    li r7, 4
+; CHECK-PWR9-LE-NEXT:    sub r6, r11, r6
+; CHECK-PWR9-LE-NEXT:    srawi r8, r3, 31
+; CHECK-PWR9-LE-NEXT:    xor r3, r3, r8
+; CHECK-PWR9-LE-NEXT:    srawi r9, r4, 31
+; CHECK-PWR9-LE-NEXT:    xor r4, r4, r9
+; CHECK-PWR9-LE-NEXT:    srawi r10, r5, 31
+; CHECK-PWR9-LE-NEXT:    xor r5, r5, r10
+; CHECK-PWR9-LE-NEXT:    srawi r11, r6, 31
+; CHECK-PWR9-LE-NEXT:    xor r6, r6, r11
+; CHECK-PWR9-LE-NEXT:    sub r3, r3, r8
 ; CHECK-PWR9-LE-NEXT:    vextubrx r8, r7, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r7, r7, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r6
+; CHECK-PWR9-LE-NEXT:    sub r4, r4, r9
+; CHECK-PWR9-LE-NEXT:    sub r5, r5, r10
+; CHECK-PWR9-LE-NEXT:    sub r6, r6, r11
+; CHECK-PWR9-LE-NEXT:    li r11, 7
+; CHECK-PWR9-LE-NEXT:    slwi r5, r5, 16
+; CHECK-PWR9-LE-NEXT:    vextubrx r12, r11, v2
+; CHECK-PWR9-LE-NEXT:    vextubrx r11, r11, v3
+; CHECK-PWR9-LE-NEXT:    slwi r6, r6, 24
+; CHECK-PWR9-LE-NEXT:    slwi r4, r4, 8
 ; CHECK-PWR9-LE-NEXT:    clrlwi r8, r8, 24
 ; CHECK-PWR9-LE-NEXT:    clrlwi r7, r7, 24
+; CHECK-PWR9-LE-NEXT:    or r5, r6, r5
 ; CHECK-PWR9-LE-NEXT:    sub r7, r8, r7
+; CHECK-PWR9-LE-NEXT:    clrlwi r12, r12, 24
+; CHECK-PWR9-LE-NEXT:    clrlwi r11, r11, 24
+; CHECK-PWR9-LE-NEXT:    or r4, r5, r4
 ; CHECK-PWR9-LE-NEXT:    srawi r8, r7, 31
+; CHECK-PWR9-LE-NEXT:    or r3, r4, r3
 ; CHECK-PWR9-LE-NEXT:    xor r7, r7, r8
 ; CHECK-PWR9-LE-NEXT:    sub r7, r7, r8
 ; CHECK-PWR9-LE-NEXT:    li r8, 5
@@ -329,40 +339,41 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    li r9, 6
 ; CHECK-PWR9-LE-NEXT:    vextubrx r10, r9, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r9, r9, v3
+; CHECK-PWR9-LE-NEXT:    slwi r8, r8, 8
 ; CHECK-PWR9-LE-NEXT:    clrlwi r10, r10, 24
 ; CHECK-PWR9-LE-NEXT:    clrlwi r9, r9, 24
 ; CHECK-PWR9-LE-NEXT:    sub r9, r10, r9
 ; CHECK-PWR9-LE-NEXT:    srawi r10, r9, 31
 ; CHECK-PWR9-LE-NEXT:    xor r9, r9, r10
-; CHECK-PWR9-LE-NEXT:    sub r9, r9, r10
-; CHECK-PWR9-LE-NEXT:    li r10, 7
-; CHECK-PWR9-LE-NEXT:    vextubrx r11, r10, v2
-; CHECK-PWR9-LE-NEXT:    vextubrx r10, r10, v3
+; CHECK-PWR9-LE-NEXT:    sub r10, r9, r10
+; CHECK-PWR9-LE-NEXT:    sub r9, r12, r11
+; CHECK-PWR9-LE-NEXT:    srawi r11, r9, 31
+; CHECK-PWR9-LE-NEXT:    slwi r10, r10, 16
+; CHECK-PWR9-LE-NEXT:    xor r9, r9, r11
+; CHECK-PWR9-LE-NEXT:    sub r12, r9, r11
+; CHECK-PWR9-LE-NEXT:    li r9, 8
+; CHECK-PWR9-LE-NEXT:    vextubrx r11, r9, v2
+; CHECK-PWR9-LE-NEXT:    vextubrx r9, r9, v3
+; CHECK-PWR9-LE-NEXT:    slwi r12, r12, 24
+; CHECK-PWR9-LE-NEXT:    or r10, r12, r10
 ; CHECK-PWR9-LE-NEXT:    clrlwi r11, r11, 24
-; CHECK-PWR9-LE-NEXT:    clrlwi r10, r10, 24
-; CHECK-PWR9-LE-NEXT:    sub r10, r11, r10
-; CHECK-PWR9-LE-NEXT:    srawi r11, r10, 31
-; CHECK-PWR9-LE-NEXT:    xor r10, r10, r11
-; CHECK-PWR9-LE-NEXT:    sub r10, r10, r11
-; CHECK-PWR9-LE-NEXT:    li r11, 8
-; CHECK-PWR9-LE-NEXT:    vextubrx r12, r11, v2
+; CHECK-PWR9-LE-NEXT:    clrlwi r9, r9, 24
+; CHECK-PWR9-LE-NEXT:    or r8, r10, r8
+; CHECK-PWR9-LE-NEXT:    sub r9, r11, r9
+; CHECK-PWR9-LE-NEXT:    or r7, r8, r7
+; CHECK-PWR9-LE-NEXT:    srawi r11, r9, 31
+; CHECK-PWR9-LE-NEXT:    rldimi r3, r7, 32, 0
+; CHECK-PWR9-LE-NEXT:    xor r9, r9, r11
+; CHECK-PWR9-LE-NEXT:    sub r9, r9, r11
+; CHECK-PWR9-LE-NEXT:    li r11, 9
+; CHECK-PWR9-LE-NEXT:    vextubrx r0, r11, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r11, r11, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v5, r10
-; CHECK-PWR9-LE-NEXT:    clrlwi r12, r12, 24
-; CHECK-PWR9-LE-NEXT:    clrlwi r11, r11, 24
-; CHECK-PWR9-LE-NEXT:    sub r11, r12, r11
-; CHECK-PWR9-LE-NEXT:    srawi r12, r11, 31
-; CHECK-PWR9-LE-NEXT:    xor r11, r11, r12
-; CHECK-PWR9-LE-NEXT:    sub r11, r11, r12
-; CHECK-PWR9-LE-NEXT:    li r12, 9
-; CHECK-PWR9-LE-NEXT:    vextubrx r0, r12, v2
-; CHECK-PWR9-LE-NEXT:    vextubrx r12, r12, v3
 ; CHECK-PWR9-LE-NEXT:    clrlwi r0, r0, 24
-; CHECK-PWR9-LE-NEXT:    clrlwi r12, r12, 24
-; CHECK-PWR9-LE-NEXT:    sub r12, r0, r12
-; CHECK-PWR9-LE-NEXT:    srawi r0, r12, 31
-; CHECK-PWR9-LE-NEXT:    xor r12, r12, r0
-; CHECK-PWR9-LE-NEXT:    sub r12, r12, r0
+; CHECK-PWR9-LE-NEXT:    clrlwi r11, r11, 24
+; CHECK-PWR9-LE-NEXT:    sub r11, r0, r11
+; CHECK-PWR9-LE-NEXT:    srawi r0, r11, 31
+; CHECK-PWR9-LE-NEXT:    xor r11, r11, r0
+; CHECK-PWR9-LE-NEXT:    sub r11, r11, r0
 ; CHECK-PWR9-LE-NEXT:    li r0, 10
 ; CHECK-PWR9-LE-NEXT:    vextubrx r30, r0, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r0, r0, v3
@@ -384,6 +395,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    li r29, 12
 ; CHECK-PWR9-LE-NEXT:    vextubrx r28, r29, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r29, r29, v3
+; CHECK-PWR9-LE-NEXT:    slwi r6, r30, 24
+; CHECK-PWR9-LE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-LE-NEXT:    clrlwi r28, r28, 24
 ; CHECK-PWR9-LE-NEXT:    clrlwi r29, r29, 24
 ; CHECK-PWR9-LE-NEXT:    sub r29, r28, r29
@@ -411,105 +424,100 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    li r26, 15
 ; CHECK-PWR9-LE-NEXT:    vextubrx r25, r26, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r26, r26, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v2, r5
-; CHECK-PWR9-LE-NEXT:    mtvsrd v3, r4
-; CHECK-PWR9-LE-NEXT:    vmrghb v2, v3, v2
-; CHECK-PWR9-LE-NEXT:    mtvsrd v3, r3
+; CHECK-PWR9-LE-NEXT:    slwi r4, r27, 16
+; CHECK-PWR9-LE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-LE-NEXT:    clrlwi r25, r25, 24
 ; CHECK-PWR9-LE-NEXT:    clrlwi r26, r26, 24
-; CHECK-PWR9-LE-NEXT:    vmrghb v3, v4, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r8
 ; CHECK-PWR9-LE-NEXT:    sub r26, r25, r26
-; CHECK-PWR9-LE-NEXT:    vmrglh v2, v3, v2
-; CHECK-PWR9-LE-NEXT:    mtvsrd v3, r7
 ; CHECK-PWR9-LE-NEXT:    srawi r25, r26, 31
-; CHECK-PWR9-LE-NEXT:    vmrghb v3, v4, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r9
 ; CHECK-PWR9-LE-NEXT:    xor r26, r26, r25
-; CHECK-PWR9-LE-NEXT:    vmrghb v4, v5, v4
 ; CHECK-PWR9-LE-NEXT:    sub r26, r26, r25
 ; CHECK-PWR9-LE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT:    mtvsrd v5, r26
+; CHECK-PWR9-LE-NEXT:    slwi r5, r26, 24
 ; CHECK-PWR9-LE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT:    vmrglh v3, v4, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r30
-; CHECK-PWR9-LE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-PWR9-LE-NEXT:    mtvsrd v2, r11
-; CHECK-PWR9-LE-NEXT:    mtvsrd v3, r12
-; CHECK-PWR9-LE-NEXT:    vmrghb v2, v3, v2
-; CHECK-PWR9-LE-NEXT:    mtvsrd v3, r0
-; CHECK-PWR9-LE-NEXT:    vmrghb v3, v4, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r28
+; CHECK-PWR9-LE-NEXT:    or r4, r5, r4
+; CHECK-PWR9-LE-NEXT:    slwi r5, r28, 8
 ; CHECK-PWR9-LE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT:    vmrglh v2, v3, v2
-; CHECK-PWR9-LE-NEXT:    mtvsrd v3, r29
+; CHECK-PWR9-LE-NEXT:    or r4, r4, r5
+; CHECK-PWR9-LE-NEXT:    slwi r5, r0, 16
+; CHECK-PWR9-LE-NEXT:    or r5, r6, r5
+; CHECK-PWR9-LE-NEXT:    slwi r6, r11, 8
+; CHECK-PWR9-LE-NEXT:    or r4, r4, r29
 ; CHECK-PWR9-LE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT:    vmrghb v3, v4, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r27
-; CHECK-PWR9-LE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT:    vmrghb v4, v5, v4
-; CHECK-PWR9-LE-NEXT:    vmrglh v3, v4, v3
-; CHECK-PWR9-LE-NEXT:    xxmrglw vs1, v3, v2
-; CHECK-PWR9-LE-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-PWR9-LE-NEXT:    or r5, r5, r6
+; CHECK-PWR9-LE-NEXT:    or r5, r5, r9
+; CHECK-PWR9-LE-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-PWR9-LE-NEXT:    mtvsrdd v2, r5, r3
 ; CHECK-PWR9-LE-NEXT:    blr
 ;
 ; CHECK-PWR9-BE-LABEL: sub_absv_8_ext:
 ; CHECK-PWR9-BE:       # %bb.0: # %entry
 ; CHECK-PWR9-BE-NEXT:    li r3, 0
 ; CHECK-PWR9-BE-NEXT:    li r4, 1
-; CHECK-PWR9-BE-NEXT:    li r5, 2
+; CHECK-PWR9-BE-NEXT:    li r7, 4
+; CHECK-PWR9-BE-NEXT:    li r6, 3
 ; CHECK-PWR9-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-PWR9-BE-NEXT:    vextublx r6, r3, v2
-; CHECK-PWR9-BE-NEXT:    vextublx r3, r3, v3
-; CHECK-PWR9-BE-NEXT:    vextublx r7, r4, v2
-; CHECK-PWR9-BE-NEXT:    vextublx r4, r4, v3
+; CHECK-PWR9-BE-NEXT:    li r5, 2
 ; CHECK-PWR9-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-PWR9-BE-NEXT:    vextublx r8, r3, v2
+; CHECK-PWR9-BE-NEXT:    vextublx r3, r3, v3
+; CHECK-PWR9-BE-NEXT:    vextublx r9, r4, v2
 ; CHECK-PWR9-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-BE-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-PWR9-BE-NEXT:    clrlwi r6, r6, 24
+; CHECK-PWR9-BE-NEXT:    vextublx r4, r4, v3
+; CHECK-PWR9-BE-NEXT:    clrlwi r8, r8, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r3, r3, 24
-; CHECK-PWR9-BE-NEXT:    clrlwi r7, r7, 24
+; CHECK-PWR9-BE-NEXT:    clrlwi r9, r9, 24
+; CHECK-PWR9-BE-NEXT:    vextublx r11, r6, v2
+; CHECK-PWR9-BE-NEXT:    vextublx r6, r6, v3
 ; CHECK-PWR9-BE-NEXT:    clrlwi r4, r4, 24
-; CHECK-PWR9-BE-NEXT:    vextublx r8, r5, v2
+; CHECK-PWR9-BE-NEXT:    vextublx r10, r5, v2
+; CHECK-PWR9-BE-NEXT:    clrlwi r11, r11, 24
 ; CHECK-PWR9-BE-NEXT:    vextublx r5, r5, v3
-; CHECK-PWR9-BE-NEXT:    sub r3, r6, r3
-; CHECK-PWR9-BE-NEXT:    sub r4, r7, r4
-; CHECK-PWR9-BE-NEXT:    clrlwi r8, r8, 24
-; CHECK-PWR9-BE-NEXT:    clrlwi r5, r5, 24
-; CHECK-PWR9-BE-NEXT:    sub r5, r8, r5
-; CHECK-PWR9-BE-NEXT:    srawi r6, r3, 31
-; CHECK-PWR9-BE-NEXT:    srawi r7, r4, 31
-; CHECK-PWR9-BE-NEXT:    srawi r8, r5, 31
-; CHECK-PWR9-BE-NEXT:    xor r3, r3, r6
-; CHECK-PWR9-BE-NEXT:    xor r4, r4, r7
-; CHECK-PWR9-BE-NEXT:    xor r5, r5, r8
-; CHECK-PWR9-BE-NEXT:    sub r3, r3, r6
-; CHECK-PWR9-BE-NEXT:    li r6, 3
-; CHECK-PWR9-BE-NEXT:    sub r4, r4, r7
-; CHECK-PWR9-BE-NEXT:    sub r5, r5, r8
-; CHECK-PWR9-BE-NEXT:    vextublx r7, r6, v2
-; CHECK-PWR9-BE-NEXT:    vextublx r6, r6, v3
-; CHECK-PWR9-BE-NEXT:    clrlwi r7, r7, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r6, r6, 24
-; CHECK-PWR9-BE-NEXT:    sub r6, r7, r6
-; CHECK-PWR9-BE-NEXT:    srawi r7, r6, 31
-; CHECK-PWR9-BE-NEXT:    xor r6, r6, r7
-; CHECK-PWR9-BE-NEXT:    sub r6, r6, r7
-; CHECK-PWR9-BE-NEXT:    li r7, 4
+; CHECK-PWR9-BE-NEXT:    clrlwi r10, r10, 24
+; CHECK-PWR9-BE-NEXT:    sub r3, r8, r3
+; CHECK-PWR9-BE-NEXT:    sub r4, r9, r4
+; CHECK-PWR9-BE-NEXT:    sub r6, r11, r6
+; CHECK-PWR9-BE-NEXT:    clrlwi r5, r5, 24
+; CHECK-PWR9-BE-NEXT:    sub r5, r10, r5
+; CHECK-PWR9-BE-NEXT:    srawi r8, r3, 31
+; CHECK-PWR9-BE-NEXT:    xor r3, r3, r8
+; CHECK-PWR9-BE-NEXT:    srawi r9, r4, 31
+; CHECK-PWR9-BE-NEXT:    xor r12, r4, r9
+; CHECK-PWR9-BE-NEXT:    srawi r11, r6, 31
+; CHECK-PWR9-BE-NEXT:    xor r0, r6, r11
+; CHECK-PWR9-BE-NEXT:    srawi r10, r5, 31
+; CHECK-PWR9-BE-NEXT:    xor r5, r5, r10
+; CHECK-PWR9-BE-NEXT:    sub r4, r3, r8
 ; CHECK-PWR9-BE-NEXT:    vextublx r8, r7, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r7, r7, v3
+; CHECK-PWR9-BE-NEXT:    sub r6, r12, r9
+; CHECK-PWR9-BE-NEXT:    sub r5, r5, r10
+; CHECK-PWR9-BE-NEXT:    sub r3, r0, r11
+; CHECK-PWR9-BE-NEXT:    li r11, 7
+; CHECK-PWR9-BE-NEXT:    slwi r6, r6, 16
+; CHECK-PWR9-BE-NEXT:    vextublx r12, r11, v2
+; CHECK-PWR9-BE-NEXT:    vextublx r11, r11, v3
+; CHECK-PWR9-BE-NEXT:    slwi r4, r4, 24
+; CHECK-PWR9-BE-NEXT:    slwi r5, r5, 8
 ; CHECK-PWR9-BE-NEXT:    clrlwi r8, r8, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r7, r7, 24
+; CHECK-PWR9-BE-NEXT:    or r4, r4, r6
 ; CHECK-PWR9-BE-NEXT:    sub r7, r8, r7
+; CHECK-PWR9-BE-NEXT:    clrlwi r12, r12, 24
+; CHECK-PWR9-BE-NEXT:    clrlwi r11, r11, 24
+; CHECK-PWR9-BE-NEXT:    or r4, r4, r5
 ; CHECK-PWR9-BE-NEXT:    srawi r8, r7, 31
+; CHECK-PWR9-BE-NEXT:    or r3, r4, r3
 ; CHECK-PWR9-BE-NEXT:    xor r7, r7, r8
 ; CHECK-PWR9-BE-NEXT:    sub r7, r7, r8
 ; CHECK-PWR9-BE-NEXT:    li r8, 5
 ; CHECK-PWR9-BE-NEXT:    vextublx r9, r8, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r8, r8, v3
+; CHECK-PWR9-BE-NEXT:    slwi r5, r7, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r9, r9, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r8, r8, 24
 ; CHECK-PWR9-BE-NEXT:    sub r8, r9, r8
@@ -519,25 +527,25 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r9, 6
 ; CHECK-PWR9-BE-NEXT:    vextublx r10, r9, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r9, r9, v3
+; CHECK-PWR9-BE-NEXT:    slwi r4, r8, 16
+; CHECK-PWR9-BE-NEXT:    or r4, r5, r4
 ; CHECK-PWR9-BE-NEXT:    clrlwi r10, r10, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r9, r9, 24
 ; CHECK-PWR9-BE-NEXT:    sub r9, r10, r9
 ; CHECK-PWR9-BE-NEXT:    srawi r10, r9, 31
 ; CHECK-PWR9-BE-NEXT:    xor r9, r9, r10
 ; CHECK-PWR9-BE-NEXT:    sub r9, r9, r10
-; CHECK-PWR9-BE-NEXT:    li r10, 7
-; CHECK-PWR9-BE-NEXT:    vextublx r11, r10, v2
-; CHECK-PWR9-BE-NEXT:    vextublx r10, r10, v3
-; CHECK-PWR9-BE-NEXT:    mtfprwz f2, r9
-; CHECK-PWR9-BE-NEXT:    clrlwi r11, r11, 24
-; CHECK-PWR9-BE-NEXT:    clrlwi r10, r10, 24
-; CHECK-PWR9-BE-NEXT:    sub r10, r11, r10
+; CHECK-PWR9-BE-NEXT:    sub r10, r12, r11
 ; CHECK-PWR9-BE-NEXT:    srawi r11, r10, 31
+; CHECK-PWR9-BE-NEXT:    slwi r5, r9, 8
 ; CHECK-PWR9-BE-NEXT:    xor r10, r10, r11
+; CHECK-PWR9-BE-NEXT:    or r4, r4, r5
 ; CHECK-PWR9-BE-NEXT:    sub r10, r10, r11
 ; CHECK-PWR9-BE-NEXT:    li r11, 8
 ; CHECK-PWR9-BE-NEXT:    vextublx r12, r11, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r11, r11, v3
+; CHECK-PWR9-BE-NEXT:    or r4, r4, r10
+; CHECK-PWR9-BE-NEXT:    rldimi r4, r3, 32, 0
 ; CHECK-PWR9-BE-NEXT:    clrlwi r12, r12, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r11, r11, 24
 ; CHECK-PWR9-BE-NEXT:    sub r11, r12, r11
@@ -547,6 +555,7 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r12, 9
 ; CHECK-PWR9-BE-NEXT:    vextublx r0, r12, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r12, r12, v3
+; CHECK-PWR9-BE-NEXT:    slwi r11, r11, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r0, r0, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r12, r12, 24
 ; CHECK-PWR9-BE-NEXT:    sub r12, r0, r12
@@ -556,7 +565,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r0, 10
 ; CHECK-PWR9-BE-NEXT:    vextublx r30, r0, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r0, r0, v3
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v4, r12
+; CHECK-PWR9-BE-NEXT:    slwi r12, r12, 16
+; CHECK-PWR9-BE-NEXT:    or r11, r11, r12
 ; CHECK-PWR9-BE-NEXT:    clrlwi r30, r30, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r0, r0, 24
 ; CHECK-PWR9-BE-NEXT:    sub r0, r30, r0
@@ -566,6 +576,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r30, 11
 ; CHECK-PWR9-BE-NEXT:    vextublx r29, r30, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r30, r30, v3
+; CHECK-PWR9-BE-NEXT:    slwi r12, r0, 8
+; CHECK-PWR9-BE-NEXT:    or r11, r11, r12
 ; CHECK-PWR9-BE-NEXT:    clrlwi r29, r29, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r30, r30, 24
 ; CHECK-PWR9-BE-NEXT:    sub r30, r29, r30
@@ -575,6 +587,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r29, 12
 ; CHECK-PWR9-BE-NEXT:    vextublx r28, r29, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r29, r29, v3
+; CHECK-PWR9-BE-NEXT:    or r11, r11, r30
+; CHECK-PWR9-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-BE-NEXT:    clrlwi r28, r28, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r29, r29, 24
 ; CHECK-PWR9-BE-NEXT:    sub r29, r28, r29
@@ -584,6 +598,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r28, 13
 ; CHECK-PWR9-BE-NEXT:    vextublx r27, r28, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r28, r28, v3
+; CHECK-PWR9-BE-NEXT:    slwi r0, r29, 24
+; CHECK-PWR9-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-BE-NEXT:    clrlwi r27, r27, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r28, r28, 24
 ; CHECK-PWR9-BE-NEXT:    sub r28, r27, r28
@@ -593,6 +609,9 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r27, 14
 ; CHECK-PWR9-BE-NEXT:    vextublx r26, r27, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r27, r27, v3
+; CHECK-PWR9-BE-NEXT:    slwi r12, r28, 16
+; CHECK-PWR9-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-PWR9-BE-NEXT:    or r12, r0, r12
 ; CHECK-PWR9-BE-NEXT:    clrlwi r26, r26, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r27, r27, 24
 ; CHECK-PWR9-BE-NEXT:    sub r27, r26, r27
@@ -602,145 +621,110 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    li r26, 15
 ; CHECK-PWR9-BE-NEXT:    vextublx r25, r26, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r26, r26, v3
-; CHECK-PWR9-BE-NEXT:    mtfprwz f0, r27
-; CHECK-PWR9-BE-NEXT:    addis r27, r2, .LCPI9_0 at toc@ha
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r28
-; CHECK-PWR9-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT:    addi r27, r27, .LCPI9_0 at toc@l
+; CHECK-PWR9-BE-NEXT:    slwi r0, r27, 8
+; CHECK-PWR9-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-PWR9-BE-NEXT:    or r12, r12, r0
 ; CHECK-PWR9-BE-NEXT:    clrlwi r25, r25, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r26, r26, 24
-; CHECK-PWR9-BE-NEXT:    lxv vs1, 0(r27)
-; CHECK-PWR9-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-BE-NEXT:    sub r26, r25, r26
 ; CHECK-PWR9-BE-NEXT:    srawi r25, r26, 31
 ; CHECK-PWR9-BE-NEXT:    xor r26, r26, r25
 ; CHECK-PWR9-BE-NEXT:    sub r26, r26, r25
 ; CHECK-PWR9-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v2, r26
+; CHECK-PWR9-BE-NEXT:    or r12, r12, r26
 ; CHECK-PWR9-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT:    xxperm v2, vs0, vs1
-; CHECK-PWR9-BE-NEXT:    mtfprwz f0, r29
-; CHECK-PWR9-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT:    xxperm v3, vs0, vs1
-; CHECK-PWR9-BE-NEXT:    mtfprwz f0, r0
-; CHECK-PWR9-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r30
-; CHECK-PWR9-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT:    xxperm v3, vs0, vs1
-; CHECK-PWR9-BE-NEXT:    mtfprwz f0, r11
-; CHECK-PWR9-BE-NEXT:    xxperm v4, vs0, vs1
-; CHECK-PWR9-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-PWR9-BE-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v2, r10
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r8
-; CHECK-PWR9-BE-NEXT:    xxperm v2, vs2, vs1
-; CHECK-PWR9-BE-NEXT:    mtfprwz f2, r7
-; CHECK-PWR9-BE-NEXT:    xxperm v3, vs2, vs1
-; CHECK-PWR9-BE-NEXT:    mtfprwz f2, r5
-; CHECK-PWR9-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r6
-; CHECK-PWR9-BE-NEXT:    xxperm v3, vs2, vs1
-; CHECK-PWR9-BE-NEXT:    mtfprwz f2, r3
-; CHECK-PWR9-BE-NEXT:    xxperm v4, vs2, vs1
-; CHECK-PWR9-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-PWR9-BE-NEXT:    xxmrghw vs1, v3, v2
-; CHECK-PWR9-BE-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-PWR9-BE-NEXT:    rldimi r12, r11, 32, 0
+; CHECK-PWR9-BE-NEXT:    mtvsrdd v2, r4, r12
 ; CHECK-PWR9-BE-NEXT:    blr
 ;
 ; CHECK-PWR8-LABEL: sub_absv_8_ext:
 ; CHECK-PWR8:       # %bb.0: # %entry
 ; CHECK-PWR8-NEXT:    xxswapd vs0, v2
 ; CHECK-PWR8-NEXT:    xxswapd vs1, v3
+; CHECK-PWR8-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT:    mffprd r10, f0
+; CHECK-PWR8-NEXT:    mffprd r11, f1
 ; CHECK-PWR8-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    mffprd r11, f0
-; CHECK-PWR8-NEXT:    mffprd r8, f1
-; CHECK-PWR8-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    clrldi r3, r11, 56
-; CHECK-PWR8-NEXT:    clrldi r4, r8, 56
-; CHECK-PWR8-NEXT:    rldicl r5, r11, 56, 56
-; CHECK-PWR8-NEXT:    rldicl r6, r8, 56, 56
-; CHECK-PWR8-NEXT:    rldicl r7, r11, 48, 56
-; CHECK-PWR8-NEXT:    rldicl r9, r8, 48, 56
-; CHECK-PWR8-NEXT:    rldicl r0, r11, 32, 56
-; CHECK-PWR8-NEXT:    rldicl r30, r8, 32, 56
-; CHECK-PWR8-NEXT:    rldicl r29, r11, 24, 56
-; CHECK-PWR8-NEXT:    rldicl r28, r8, 24, 56
-; CHECK-PWR8-NEXT:    rldicl r10, r11, 40, 56
-; CHECK-PWR8-NEXT:    rldicl r12, r8, 40, 56
-; CHECK-PWR8-NEXT:    rldicl r27, r11, 16, 56
-; CHECK-PWR8-NEXT:    rldicl r11, r11, 8, 56
+; CHECK-PWR8-NEXT:    clrldi r3, r10, 56
+; CHECK-PWR8-NEXT:    clrldi r4, r11, 56
+; CHECK-PWR8-NEXT:    rldicl r5, r10, 56, 56
+; CHECK-PWR8-NEXT:    rldicl r6, r11, 56, 56
+; CHECK-PWR8-NEXT:    rldicl r9, r10, 40, 56
+; CHECK-PWR8-NEXT:    rldicl r12, r11, 40, 56
+; CHECK-PWR8-NEXT:    rldicl r7, r10, 48, 56
+; CHECK-PWR8-NEXT:    rldicl r8, r11, 48, 56
+; CHECK-PWR8-NEXT:    rldicl r27, r10, 16, 56
+; CHECK-PWR8-NEXT:    rldicl r0, r10, 32, 56
+; CHECK-PWR8-NEXT:    rldicl r30, r11, 32, 56
+; CHECK-PWR8-NEXT:    rldicl r29, r10, 24, 56
+; CHECK-PWR8-NEXT:    rldicl r28, r11, 24, 56
+; CHECK-PWR8-NEXT:    rldicl r10, r10, 8, 56
 ; CHECK-PWR8-NEXT:    std r24, -64(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    clrlwi r3, r3, 24
 ; CHECK-PWR8-NEXT:    clrlwi r4, r4, 24
 ; CHECK-PWR8-NEXT:    clrlwi r5, r5, 24
 ; CHECK-PWR8-NEXT:    clrlwi r6, r6, 24
-; CHECK-PWR8-NEXT:    clrlwi r7, r7, 24
 ; CHECK-PWR8-NEXT:    clrlwi r9, r9, 24
+; CHECK-PWR8-NEXT:    clrlwi r12, r12, 24
 ; CHECK-PWR8-NEXT:    sub r3, r3, r4
+; CHECK-PWR8-NEXT:    sub r4, r5, r6
+; CHECK-PWR8-NEXT:    sub r6, r9, r12
+; CHECK-PWR8-NEXT:    clrlwi r7, r7, 24
+; CHECK-PWR8-NEXT:    clrlwi r8, r8, 24
+; CHECK-PWR8-NEXT:    clrlwi r27, r27, 24
 ; CHECK-PWR8-NEXT:    clrlwi r0, r0, 24
+; CHECK-PWR8-NEXT:    sub r5, r7, r8
 ; CHECK-PWR8-NEXT:    clrlwi r30, r30, 24
-; CHECK-PWR8-NEXT:    sub r4, r5, r6
-; CHECK-PWR8-NEXT:    sub r5, r7, r9
-; CHECK-PWR8-NEXT:    clrlwi r29, r29, 24
-; CHECK-PWR8-NEXT:    clrlwi r28, r28, 24
 ; CHECK-PWR8-NEXT:    sub r7, r0, r30
-; CHECK-PWR8-NEXT:    sub r9, r29, r28
 ; CHECK-PWR8-NEXT:    clrlwi r10, r10, 24
-; CHECK-PWR8-NEXT:    clrlwi r12, r12, 24
-; CHECK-PWR8-NEXT:    sub r6, r10, r12
-; CHECK-PWR8-NEXT:    clrlwi r27, r27, 24
-; CHECK-PWR8-NEXT:    clrlwi r11, r11, 24
-; CHECK-PWR8-NEXT:    srawi r0, r5, 31
-; CHECK-PWR8-NEXT:    srawi r29, r7, 31
+; CHECK-PWR8-NEXT:    clrlwi r29, r29, 24
+; CHECK-PWR8-NEXT:    clrlwi r28, r28, 24
+; CHECK-PWR8-NEXT:    sub r8, r29, r28
+; CHECK-PWR8-NEXT:    srawi r9, r3, 31
 ; CHECK-PWR8-NEXT:    srawi r12, r4, 31
-; CHECK-PWR8-NEXT:    srawi r28, r9, 31
+; CHECK-PWR8-NEXT:    srawi r0, r5, 31
 ; CHECK-PWR8-NEXT:    srawi r30, r6, 31
-; CHECK-PWR8-NEXT:    srawi r10, r3, 31
-; CHECK-PWR8-NEXT:    xor r5, r5, r0
-; CHECK-PWR8-NEXT:    xor r26, r7, r29
-; CHECK-PWR8-NEXT:    sub r7, r5, r0
-; CHECK-PWR8-NEXT:    rldicl r5, r8, 16, 56
-; CHECK-PWR8-NEXT:    rldicl r8, r8, 8, 56
+; CHECK-PWR8-NEXT:    srawi r29, r7, 31
+; CHECK-PWR8-NEXT:    srawi r28, r8, 31
+; CHECK-PWR8-NEXT:    xor r3, r3, r9
+; CHECK-PWR8-NEXT:    sub r3, r3, r9
+; CHECK-PWR8-NEXT:    rldicl r9, r11, 16, 56
 ; CHECK-PWR8-NEXT:    xor r4, r4, r12
-; CHECK-PWR8-NEXT:    xor r25, r9, r28
-; CHECK-PWR8-NEXT:    sub r9, r4, r12
-; CHECK-PWR8-NEXT:    sub r4, r26, r29
-; CHECK-PWR8-NEXT:    mtvsrd v1, r9
-; CHECK-PWR8-NEXT:    clrlwi r5, r5, 24
-; CHECK-PWR8-NEXT:    sub r5, r27, r5
-; CHECK-PWR8-NEXT:    clrlwi r8, r8, 24
-; CHECK-PWR8-NEXT:    sub r8, r11, r8
+; CHECK-PWR8-NEXT:    sub r4, r4, r12
+; CHECK-PWR8-NEXT:    rldicl r11, r11, 8, 56
+; CHECK-PWR8-NEXT:    xor r5, r5, r0
+; CHECK-PWR8-NEXT:    sub r5, r5, r0
+; CHECK-PWR8-NEXT:    mfvsrd r0, v3
 ; CHECK-PWR8-NEXT:    xor r6, r6, r30
 ; CHECK-PWR8-NEXT:    sub r6, r6, r30
-; CHECK-PWR8-NEXT:    xor r3, r3, r10
-; CHECK-PWR8-NEXT:    sub r10, r3, r10
-; CHECK-PWR8-NEXT:    sub r3, r25, r28
-; CHECK-PWR8-NEXT:    mtvsrd v6, r6
-; CHECK-PWR8-NEXT:    mtvsrd v7, r3
-; CHECK-PWR8-NEXT:    srawi r12, r5, 31
-; CHECK-PWR8-NEXT:    srawi r11, r8, 31
-; CHECK-PWR8-NEXT:    xor r5, r5, r12
-; CHECK-PWR8-NEXT:    xor r8, r8, r11
-; CHECK-PWR8-NEXT:    sub r5, r5, r12
-; CHECK-PWR8-NEXT:    sub r8, r8, r11
-; CHECK-PWR8-NEXT:    mfvsrd r11, v2
-; CHECK-PWR8-NEXT:    mfvsrd r12, v3
-; CHECK-PWR8-NEXT:    mtvsrd v8, r8
-; CHECK-PWR8-NEXT:    clrldi r0, r11, 56
-; CHECK-PWR8-NEXT:    clrldi r30, r12, 56
-; CHECK-PWR8-NEXT:    rldicl r29, r12, 56, 56
-; CHECK-PWR8-NEXT:    rldicl r28, r12, 48, 56
-; CHECK-PWR8-NEXT:    rldicl r27, r12, 40, 56
-; CHECK-PWR8-NEXT:    rldicl r26, r12, 32, 56
-; CHECK-PWR8-NEXT:    rldicl r25, r12, 24, 56
-; CHECK-PWR8-NEXT:    rldicl r24, r12, 16, 56
-; CHECK-PWR8-NEXT:    rldicl r12, r12, 8, 56
-; CHECK-PWR8-NEXT:    clrlwi r0, r0, 24
+; CHECK-PWR8-NEXT:    xor r7, r7, r29
+; CHECK-PWR8-NEXT:    sub r7, r7, r29
+; CHECK-PWR8-NEXT:    xor r8, r8, r28
+; CHECK-PWR8-NEXT:    sub r8, r8, r28
+; CHECK-PWR8-NEXT:    clrlwi r9, r9, 24
+; CHECK-PWR8-NEXT:    sub r9, r27, r9
+; CHECK-PWR8-NEXT:    clrlwi r11, r11, 24
+; CHECK-PWR8-NEXT:    sub r10, r10, r11
+; CHECK-PWR8-NEXT:    slwi r5, r5, 16
+; CHECK-PWR8-NEXT:    slwi r6, r6, 24
+; CHECK-PWR8-NEXT:    slwi r8, r8, 8
+; CHECK-PWR8-NEXT:    or r5, r6, r5
+; CHECK-PWR8-NEXT:    slwi r4, r4, 8
+; CHECK-PWR8-NEXT:    clrldi r30, r0, 56
+; CHECK-PWR8-NEXT:    rldicl r29, r0, 56, 56
+; CHECK-PWR8-NEXT:    rldicl r28, r0, 48, 56
+; CHECK-PWR8-NEXT:    rldicl r27, r0, 40, 56
+; CHECK-PWR8-NEXT:    rldicl r26, r0, 32, 56
+; CHECK-PWR8-NEXT:    rldicl r25, r0, 24, 56
+; CHECK-PWR8-NEXT:    rldicl r24, r0, 16, 56
+; CHECK-PWR8-NEXT:    rldicl r0, r0, 8, 56
+; CHECK-PWR8-NEXT:    srawi r12, r9, 31
+; CHECK-PWR8-NEXT:    srawi r11, r10, 31
+; CHECK-PWR8-NEXT:    or r4, r5, r4
 ; CHECK-PWR8-NEXT:    clrlwi r30, r30, 24
 ; CHECK-PWR8-NEXT:    clrlwi r29, r29, 24
 ; CHECK-PWR8-NEXT:    clrlwi r28, r28, 24
@@ -748,278 +732,253 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR8-NEXT:    clrlwi r26, r26, 24
 ; CHECK-PWR8-NEXT:    clrlwi r25, r25, 24
 ; CHECK-PWR8-NEXT:    clrlwi r24, r24, 24
-; CHECK-PWR8-NEXT:    clrlwi r12, r12, 24
-; CHECK-PWR8-NEXT:    sub r0, r0, r30
-; CHECK-PWR8-NEXT:    srawi r30, r0, 31
-; CHECK-PWR8-NEXT:    xor r0, r0, r30
-; CHECK-PWR8-NEXT:    sub r0, r0, r30
-; CHECK-PWR8-NEXT:    rldicl r30, r11, 56, 56
+; CHECK-PWR8-NEXT:    clrlwi r0, r0, 24
+; CHECK-PWR8-NEXT:    xor r9, r9, r12
+; CHECK-PWR8-NEXT:    sub r9, r9, r12
+; CHECK-PWR8-NEXT:    mfvsrd r12, v2
+; CHECK-PWR8-NEXT:    xor r10, r10, r11
+; CHECK-PWR8-NEXT:    sub r10, r10, r11
+; CHECK-PWR8-NEXT:    or r3, r4, r3
+; CHECK-PWR8-NEXT:    slwi r9, r9, 16
+; CHECK-PWR8-NEXT:    slwi r10, r10, 24
+; CHECK-PWR8-NEXT:    or r9, r10, r9
+; CHECK-PWR8-NEXT:    clrldi r11, r12, 56
+; CHECK-PWR8-NEXT:    or r8, r9, r8
+; CHECK-PWR8-NEXT:    clrlwi r11, r11, 24
+; CHECK-PWR8-NEXT:    sub r11, r11, r30
+; CHECK-PWR8-NEXT:    or r7, r8, r7
+; CHECK-PWR8-NEXT:    rldimi r3, r7, 32, 0
+; CHECK-PWR8-NEXT:    mtfprd f1, r3
+; CHECK-PWR8-NEXT:    srawi r30, r11, 31
+; CHECK-PWR8-NEXT:    xor r11, r11, r30
+; CHECK-PWR8-NEXT:    sub r11, r11, r30
+; CHECK-PWR8-NEXT:    rldicl r30, r12, 56, 56
 ; CHECK-PWR8-NEXT:    clrlwi r30, r30, 24
-; CHECK-PWR8-NEXT:    mtvsrd v2, r0
 ; CHECK-PWR8-NEXT:    sub r30, r30, r29
 ; CHECK-PWR8-NEXT:    srawi r29, r30, 31
 ; CHECK-PWR8-NEXT:    xor r30, r30, r29
 ; CHECK-PWR8-NEXT:    sub r30, r30, r29
-; CHECK-PWR8-NEXT:    rldicl r29, r11, 48, 56
+; CHECK-PWR8-NEXT:    rldicl r29, r12, 48, 56
 ; CHECK-PWR8-NEXT:    clrlwi r29, r29, 24
-; CHECK-PWR8-NEXT:    mtvsrd v3, r30
-; CHECK-PWR8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    slwi r30, r30, 8
 ; CHECK-PWR8-NEXT:    sub r29, r29, r28
 ; CHECK-PWR8-NEXT:    srawi r28, r29, 31
 ; CHECK-PWR8-NEXT:    xor r29, r29, r28
 ; CHECK-PWR8-NEXT:    sub r29, r29, r28
-; CHECK-PWR8-NEXT:    rldicl r28, r11, 40, 56
+; CHECK-PWR8-NEXT:    rldicl r28, r12, 40, 56
 ; CHECK-PWR8-NEXT:    clrlwi r28, r28, 24
 ; CHECK-PWR8-NEXT:    sub r28, r28, r27
 ; CHECK-PWR8-NEXT:    srawi r27, r28, 31
 ; CHECK-PWR8-NEXT:    xor r28, r28, r27
 ; CHECK-PWR8-NEXT:    sub r28, r28, r27
-; CHECK-PWR8-NEXT:    rldicl r27, r11, 32, 56
+; CHECK-PWR8-NEXT:    rldicl r27, r12, 32, 56
 ; CHECK-PWR8-NEXT:    clrlwi r27, r27, 24
-; CHECK-PWR8-NEXT:    mtvsrd v4, r28
-; CHECK-PWR8-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    sub r27, r27, r26
 ; CHECK-PWR8-NEXT:    srawi r26, r27, 31
 ; CHECK-PWR8-NEXT:    xor r27, r27, r26
 ; CHECK-PWR8-NEXT:    sub r27, r27, r26
-; CHECK-PWR8-NEXT:    rldicl r26, r11, 24, 56
+; CHECK-PWR8-NEXT:    rldicl r26, r12, 24, 56
 ; CHECK-PWR8-NEXT:    clrlwi r26, r26, 24
 ; CHECK-PWR8-NEXT:    sub r26, r26, r25
 ; CHECK-PWR8-NEXT:    srawi r25, r26, 31
 ; CHECK-PWR8-NEXT:    xor r26, r26, r25
 ; CHECK-PWR8-NEXT:    sub r26, r26, r25
-; CHECK-PWR8-NEXT:    rldicl r25, r11, 16, 56
-; CHECK-PWR8-NEXT:    rldicl r11, r11, 8, 56
+; CHECK-PWR8-NEXT:    rldicl r25, r12, 16, 56
+; CHECK-PWR8-NEXT:    rldicl r12, r12, 8, 56
 ; CHECK-PWR8-NEXT:    clrlwi r25, r25, 24
-; CHECK-PWR8-NEXT:    clrlwi r11, r11, 24
-; CHECK-PWR8-NEXT:    mtvsrd v5, r26
-; CHECK-PWR8-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    clrlwi r12, r12, 24
 ; CHECK-PWR8-NEXT:    sub r25, r25, r24
-; CHECK-PWR8-NEXT:    sub r11, r11, r12
+; CHECK-PWR8-NEXT:    sub r12, r12, r0
 ; CHECK-PWR8-NEXT:    srawi r24, r25, 31
-; CHECK-PWR8-NEXT:    srawi r12, r11, 31
+; CHECK-PWR8-NEXT:    srawi r0, r12, 31
 ; CHECK-PWR8-NEXT:    xor r25, r25, r24
-; CHECK-PWR8-NEXT:    xor r11, r11, r12
+; CHECK-PWR8-NEXT:    xor r12, r12, r0
 ; CHECK-PWR8-NEXT:    sub r25, r25, r24
-; CHECK-PWR8-NEXT:    sub r11, r11, r12
+; CHECK-PWR8-NEXT:    sub r12, r12, r0
 ; CHECK-PWR8-NEXT:    ld r24, -64(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    mtvsrd v0, r11
-; CHECK-PWR8-NEXT:    vmrghb v2, v3, v2
-; CHECK-PWR8-NEXT:    mtvsrd v3, r29
+; CHECK-PWR8-NEXT:    slwi r0, r25, 16
+; CHECK-PWR8-NEXT:    slwi r12, r12, 24
+; CHECK-PWR8-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    or r12, r12, r0
+; CHECK-PWR8-NEXT:    slwi r0, r26, 8
+; CHECK-PWR8-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    or r12, r12, r0
+; CHECK-PWR8-NEXT:    slwi r0, r29, 16
+; CHECK-PWR8-NEXT:    slwi r29, r28, 24
+; CHECK-PWR8-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    or r0, r29, r0
+; CHECK-PWR8-NEXT:    or r12, r12, r27
 ; CHECK-PWR8-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    vmrghb v3, v4, v3
-; CHECK-PWR8-NEXT:    mtvsrd v4, r27
 ; CHECK-PWR8-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    vmrglh v2, v3, v2
-; CHECK-PWR8-NEXT:    vmrghb v4, v5, v4
-; CHECK-PWR8-NEXT:    mtvsrd v5, r25
-; CHECK-PWR8-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    vmrghb v5, v0, v5
-; CHECK-PWR8-NEXT:    mtvsrd v0, r10
-; CHECK-PWR8-NEXT:    vmrglh v3, v5, v4
-; CHECK-PWR8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-PWR8-NEXT:    vmrghb v0, v1, v0
-; CHECK-PWR8-NEXT:    mtvsrd v1, r7
-; CHECK-PWR8-NEXT:    vmrghb v1, v6, v1
-; CHECK-PWR8-NEXT:    mtvsrd v6, r4
-; CHECK-PWR8-NEXT:    vmrglh v4, v1, v0
-; CHECK-PWR8-NEXT:    vmrghb v6, v7, v6
-; CHECK-PWR8-NEXT:    mtvsrd v7, r5
-; CHECK-PWR8-NEXT:    vmrghb v7, v8, v7
-; CHECK-PWR8-NEXT:    vmrglh v5, v7, v6
-; CHECK-PWR8-NEXT:    xxmrglw vs1, v5, v4
-; CHECK-PWR8-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-PWR8-NEXT:    or r0, r0, r30
+; CHECK-PWR8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    or r11, r0, r11
+; CHECK-PWR8-NEXT:    rldimi r11, r12, 32, 0
+; CHECK-PWR8-NEXT:    mtfprd f0, r11
+; CHECK-PWR8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-PWR8-NEXT:    blr
 ;
 ; CHECK-PWR7-LABEL: sub_absv_8_ext:
 ; CHECK-PWR7:       # %bb.0: # %entry
-; CHECK-PWR7-NEXT:    stdu r1, -400(r1)
-; CHECK-PWR7-NEXT:    .cfi_def_cfa_offset 400
-; CHECK-PWR7-NEXT:    .cfi_offset r24, -64
-; CHECK-PWR7-NEXT:    .cfi_offset r25, -56
-; CHECK-PWR7-NEXT:    .cfi_offset r26, -48
-; CHECK-PWR7-NEXT:    .cfi_offset r27, -40
-; CHECK-PWR7-NEXT:    .cfi_offset r28, -32
-; CHECK-PWR7-NEXT:    .cfi_offset r29, -24
-; CHECK-PWR7-NEXT:    .cfi_offset r30, -16
-; CHECK-PWR7-NEXT:    addi r3, r1, 304
-; CHECK-PWR7-NEXT:    std r24, 336(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r25, 344(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r26, 352(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r27, 360(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r28, 368(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r29, 376(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r30, 384(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    addi r3, r1, -128
+; CHECK-PWR7-NEXT:    std r21, -88(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r22, -80(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r23, -72(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r24, -64(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-PWR7-NEXT:    stxvw4x v2, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 320
-; CHECK-PWR7-NEXT:    lbz r4, 304(r1)
+; CHECK-PWR7-NEXT:    addi r3, r1, -112
+; CHECK-PWR7-NEXT:    lbz r4, -128(r1)
 ; CHECK-PWR7-NEXT:    stxvw4x v3, 0, r3
-; CHECK-PWR7-NEXT:    lbz r5, 305(r1)
-; CHECK-PWR7-NEXT:    lbz r6, 321(r1)
-; CHECK-PWR7-NEXT:    lbz r7, 306(r1)
-; CHECK-PWR7-NEXT:    lbz r8, 322(r1)
-; CHECK-PWR7-NEXT:    lbz r9, 307(r1)
-; CHECK-PWR7-NEXT:    lbz r10, 323(r1)
-; CHECK-PWR7-NEXT:    lbz r0, 309(r1)
-; CHECK-PWR7-NEXT:    lbz r30, 325(r1)
-; CHECK-PWR7-NEXT:    lbz r29, 310(r1)
-; CHECK-PWR7-NEXT:    lbz r28, 326(r1)
-; CHECK-PWR7-NEXT:    lbz r11, 308(r1)
-; CHECK-PWR7-NEXT:    lbz r12, 324(r1)
-; CHECK-PWR7-NEXT:    lbz r27, 311(r1)
-; CHECK-PWR7-NEXT:    lbz r26, 327(r1)
-; CHECK-PWR7-NEXT:    lbz r25, 312(r1)
-; CHECK-PWR7-NEXT:    sub r5, r5, r6
-; CHECK-PWR7-NEXT:    sub r6, r7, r8
-; CHECK-PWR7-NEXT:    sub r7, r9, r10
-; CHECK-PWR7-NEXT:    sub r9, r0, r30
-; CHECK-PWR7-NEXT:    sub r10, r29, r28
-; CHECK-PWR7-NEXT:    sub r8, r11, r12
-; CHECK-PWR7-NEXT:    srawi r0, r5, 31
-; CHECK-PWR7-NEXT:    srawi r30, r6, 31
-; CHECK-PWR7-NEXT:    srawi r29, r7, 31
-; CHECK-PWR7-NEXT:    srawi r28, r8, 31
-; CHECK-PWR7-NEXT:    sub r11, r27, r26
-; CHECK-PWR7-NEXT:    srawi r27, r9, 31
-; CHECK-PWR7-NEXT:    lbz r24, 328(r1)
-; CHECK-PWR7-NEXT:    xor r5, r5, r0
-; CHECK-PWR7-NEXT:    xor r6, r6, r30
-; CHECK-PWR7-NEXT:    xor r7, r7, r29
-; CHECK-PWR7-NEXT:    xor r8, r8, r28
-; CHECK-PWR7-NEXT:    xor r9, r9, r27
-; CHECK-PWR7-NEXT:    srawi r26, r10, 31
-; CHECK-PWR7-NEXT:    sub r5, r5, r0
-; CHECK-PWR7-NEXT:    sub r6, r6, r30
-; CHECK-PWR7-NEXT:    lbz r0, 313(r1)
-; CHECK-PWR7-NEXT:    lbz r30, 329(r1)
-; CHECK-PWR7-NEXT:    sub r7, r7, r29
-; CHECK-PWR7-NEXT:    lbz r29, 330(r1)
-; CHECK-PWR7-NEXT:    sub r8, r8, r28
-; CHECK-PWR7-NEXT:    lbz r28, 331(r1)
-; CHECK-PWR7-NEXT:    sub r9, r9, r27
-; CHECK-PWR7-NEXT:    lbz r27, 332(r1)
-; CHECK-PWR7-NEXT:    xor r10, r10, r26
-; CHECK-PWR7-NEXT:    sub r10, r10, r26
-; CHECK-PWR7-NEXT:    lbz r26, 333(r1)
-; CHECK-PWR7-NEXT:    sub r12, r25, r24
-; CHECK-PWR7-NEXT:    srawi r25, r11, 31
-; CHECK-PWR7-NEXT:    lbz r3, 320(r1)
-; CHECK-PWR7-NEXT:    sub r0, r0, r30
-; CHECK-PWR7-NEXT:    xor r11, r11, r25
-; CHECK-PWR7-NEXT:    sub r11, r11, r25
-; CHECK-PWR7-NEXT:    lbz r25, 334(r1)
-; CHECK-PWR7-NEXT:    sub r4, r4, r3
-; CHECK-PWR7-NEXT:    srawi r30, r0, 31
-; CHECK-PWR7-NEXT:    srawi r24, r12, 31
-; CHECK-PWR7-NEXT:    xor r12, r12, r24
-; CHECK-PWR7-NEXT:    sub r12, r12, r24
-; CHECK-PWR7-NEXT:    lbz r24, 335(r1)
-; CHECK-PWR7-NEXT:    srawi r3, r4, 31
-; CHECK-PWR7-NEXT:    xor r4, r4, r3
-; CHECK-PWR7-NEXT:    xor r0, r0, r30
+; CHECK-PWR7-NEXT:    lbz r3, -112(r1)
+; CHECK-PWR7-NEXT:    lbz r5, -127(r1)
+; CHECK-PWR7-NEXT:    lbz r6, -111(r1)
+; CHECK-PWR7-NEXT:    lbz r7, -126(r1)
+; CHECK-PWR7-NEXT:    lbz r8, -110(r1)
+; CHECK-PWR7-NEXT:    lbz r11, -124(r1)
+; CHECK-PWR7-NEXT:    lbz r12, -108(r1)
+; CHECK-PWR7-NEXT:    lbz r0, -123(r1)
+; CHECK-PWR7-NEXT:    lbz r30, -107(r1)
+; CHECK-PWR7-NEXT:    lbz r9, -125(r1)
+; CHECK-PWR7-NEXT:    lbz r10, -109(r1)
+; CHECK-PWR7-NEXT:    lbz r29, -122(r1)
+; CHECK-PWR7-NEXT:    lbz r28, -106(r1)
 ; CHECK-PWR7-NEXT:    sub r3, r4, r3
-; CHECK-PWR7-NEXT:    stb r3, 48(r1)
-; CHECK-PWR7-NEXT:    addi r3, r1, 288
-; CHECK-PWR7-NEXT:    stb r12, 176(r1)
-; CHECK-PWR7-NEXT:    sub r0, r0, r30
-; CHECK-PWR7-NEXT:    lbz r30, 314(r1)
-; CHECK-PWR7-NEXT:    stb r11, 160(r1)
-; CHECK-PWR7-NEXT:    sub r30, r30, r29
-; CHECK-PWR7-NEXT:    stb r0, 192(r1)
-; CHECK-PWR7-NEXT:    stb r10, 144(r1)
-; CHECK-PWR7-NEXT:    stb r9, 128(r1)
-; CHECK-PWR7-NEXT:    stb r8, 112(r1)
-; CHECK-PWR7-NEXT:    stb r7, 96(r1)
-; CHECK-PWR7-NEXT:    stb r6, 80(r1)
-; CHECK-PWR7-NEXT:    srawi r29, r30, 31
-; CHECK-PWR7-NEXT:    stb r5, 64(r1)
-; CHECK-PWR7-NEXT:    xor r30, r30, r29
-; CHECK-PWR7-NEXT:    sub r30, r30, r29
-; CHECK-PWR7-NEXT:    lbz r29, 315(r1)
+; CHECK-PWR7-NEXT:    sub r4, r5, r6
+; CHECK-PWR7-NEXT:    sub r5, r7, r8
+; CHECK-PWR7-NEXT:    sub r7, r11, r12
+; CHECK-PWR7-NEXT:    sub r8, r0, r30
+; CHECK-PWR7-NEXT:    srawi r12, r3, 31
+; CHECK-PWR7-NEXT:    srawi r0, r4, 31
+; CHECK-PWR7-NEXT:    sub r6, r9, r10
+; CHECK-PWR7-NEXT:    sub r9, r29, r28
+; CHECK-PWR7-NEXT:    srawi r29, r6, 31
+; CHECK-PWR7-NEXT:    xor r3, r3, r12
+; CHECK-PWR7-NEXT:    xor r23, r4, r0
+; CHECK-PWR7-NEXT:    xor r22, r6, r29
+; CHECK-PWR7-NEXT:    srawi r30, r5, 31
+; CHECK-PWR7-NEXT:    xor r5, r5, r30
+; CHECK-PWR7-NEXT:    sub r4, r3, r12
+; CHECK-PWR7-NEXT:    sub r6, r23, r0
+; CHECK-PWR7-NEXT:    lbz r12, -119(r1)
+; CHECK-PWR7-NEXT:    lbz r0, -103(r1)
+; CHECK-PWR7-NEXT:    sub r5, r5, r30
+; CHECK-PWR7-NEXT:    lbz r30, -102(r1)
+; CHECK-PWR7-NEXT:    lbz r27, -121(r1)
+; CHECK-PWR7-NEXT:    lbz r26, -105(r1)
+; CHECK-PWR7-NEXT:    lbz r25, -120(r1)
+; CHECK-PWR7-NEXT:    lbz r24, -104(r1)
+; CHECK-PWR7-NEXT:    sub r10, r27, r26
+; CHECK-PWR7-NEXT:    sub r11, r25, r24
+; CHECK-PWR7-NEXT:    srawi r28, r7, 31
+; CHECK-PWR7-NEXT:    srawi r27, r8, 31
+; CHECK-PWR7-NEXT:    srawi r25, r10, 31
+; CHECK-PWR7-NEXT:    sub r12, r12, r0
+; CHECK-PWR7-NEXT:    xor r7, r7, r28
+; CHECK-PWR7-NEXT:    xor r8, r8, r27
+; CHECK-PWR7-NEXT:    sub r3, r22, r29
+; CHECK-PWR7-NEXT:    srawi r0, r12, 31
+; CHECK-PWR7-NEXT:    xor r21, r10, r25
+; CHECK-PWR7-NEXT:    sub r7, r7, r28
+; CHECK-PWR7-NEXT:    sub r10, r8, r27
+; CHECK-PWR7-NEXT:    lbz r29, -117(r1)
+; CHECK-PWR7-NEXT:    lbz r28, -101(r1)
+; CHECK-PWR7-NEXT:    lbz r27, -100(r1)
 ; CHECK-PWR7-NEXT:    sub r29, r29, r28
-; CHECK-PWR7-NEXT:    stb r30, 208(r1)
-; CHECK-PWR7-NEXT:    ld r30, 384(r1) # 8-byte Folded Reload
 ; CHECK-PWR7-NEXT:    srawi r28, r29, 31
+; CHECK-PWR7-NEXT:    xor r12, r12, r0
 ; CHECK-PWR7-NEXT:    xor r29, r29, r28
+; CHECK-PWR7-NEXT:    srawi r26, r9, 31
+; CHECK-PWR7-NEXT:    xor r9, r9, r26
+; CHECK-PWR7-NEXT:    srawi r24, r11, 31
+; CHECK-PWR7-NEXT:    sub r8, r9, r26
+; CHECK-PWR7-NEXT:    sub r9, r21, r25
+; CHECK-PWR7-NEXT:    sub r12, r12, r0
+; CHECK-PWR7-NEXT:    lbz r0, -118(r1)
 ; CHECK-PWR7-NEXT:    sub r29, r29, r28
-; CHECK-PWR7-NEXT:    lbz r28, 316(r1)
-; CHECK-PWR7-NEXT:    sub r28, r28, r27
-; CHECK-PWR7-NEXT:    stb r29, 224(r1)
-; CHECK-PWR7-NEXT:    ld r29, 376(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    srawi r27, r28, 31
-; CHECK-PWR7-NEXT:    xor r28, r28, r27
-; CHECK-PWR7-NEXT:    sub r28, r28, r27
-; CHECK-PWR7-NEXT:    lbz r27, 317(r1)
-; CHECK-PWR7-NEXT:    sub r27, r27, r26
-; CHECK-PWR7-NEXT:    stb r28, 240(r1)
-; CHECK-PWR7-NEXT:    ld r28, 368(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    srawi r26, r27, 31
-; CHECK-PWR7-NEXT:    xor r27, r27, r26
-; CHECK-PWR7-NEXT:    sub r27, r27, r26
-; CHECK-PWR7-NEXT:    lbz r26, 318(r1)
+; CHECK-PWR7-NEXT:    lbz r28, -115(r1)
+; CHECK-PWR7-NEXT:    lbz r26, -114(r1)
+; CHECK-PWR7-NEXT:    lbz r25, -98(r1)
+; CHECK-PWR7-NEXT:    xor r11, r11, r24
+; CHECK-PWR7-NEXT:    sub r11, r11, r24
 ; CHECK-PWR7-NEXT:    sub r26, r26, r25
-; CHECK-PWR7-NEXT:    stb r27, 256(r1)
-; CHECK-PWR7-NEXT:    ld r27, 360(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    lbz r24, -97(r1)
+; CHECK-PWR7-NEXT:    ld r23, -72(r1) # 8-byte Folded Reload
 ; CHECK-PWR7-NEXT:    srawi r25, r26, 31
 ; CHECK-PWR7-NEXT:    xor r26, r26, r25
+; CHECK-PWR7-NEXT:    slwi r12, r12, 16
+; CHECK-PWR7-NEXT:    sub r0, r0, r30
+; CHECK-PWR7-NEXT:    slwi r11, r11, 24
+; CHECK-PWR7-NEXT:    slwi r6, r6, 16
 ; CHECK-PWR7-NEXT:    sub r26, r26, r25
-; CHECK-PWR7-NEXT:    lbz r25, 319(r1)
-; CHECK-PWR7-NEXT:    sub r25, r25, r24
-; CHECK-PWR7-NEXT:    stb r26, 272(r1)
-; CHECK-PWR7-NEXT:    ld r26, 352(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    srawi r24, r25, 31
-; CHECK-PWR7-NEXT:    xor r25, r25, r24
-; CHECK-PWR7-NEXT:    sub r25, r25, r24
-; CHECK-PWR7-NEXT:    ld r24, 336(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    stb r25, 288(r1)
-; CHECK-PWR7-NEXT:    ld r25, 344(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    lxvw4x v2, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 272
-; CHECK-PWR7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 256
-; CHECK-PWR7-NEXT:    vmrghb v2, v3, v2
-; CHECK-PWR7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 240
-; CHECK-PWR7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 224
-; CHECK-PWR7-NEXT:    vmrghb v3, v4, v3
-; CHECK-PWR7-NEXT:    vmrghh v2, v3, v2
-; CHECK-PWR7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 208
-; CHECK-PWR7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 192
-; CHECK-PWR7-NEXT:    vmrghb v3, v4, v3
-; CHECK-PWR7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 176
-; CHECK-PWR7-NEXT:    lxvw4x v5, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 160
-; CHECK-PWR7-NEXT:    vmrghb v4, v5, v4
-; CHECK-PWR7-NEXT:    vmrghh v3, v4, v3
-; CHECK-PWR7-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-PWR7-NEXT:    lxvw4x v2, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 144
-; CHECK-PWR7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 128
-; CHECK-PWR7-NEXT:    vmrghb v2, v3, v2
-; CHECK-PWR7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 112
-; CHECK-PWR7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 96
-; CHECK-PWR7-NEXT:    vmrghb v3, v4, v3
-; CHECK-PWR7-NEXT:    vmrghh v2, v3, v2
-; CHECK-PWR7-NEXT:    lxvw4x v3, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 80
-; CHECK-PWR7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 64
-; CHECK-PWR7-NEXT:    vmrghb v3, v4, v3
-; CHECK-PWR7-NEXT:    lxvw4x v4, 0, r3
-; CHECK-PWR7-NEXT:    addi r3, r1, 48
-; CHECK-PWR7-NEXT:    lxvw4x v5, 0, r3
-; CHECK-PWR7-NEXT:    vmrghb v4, v5, v4
-; CHECK-PWR7-NEXT:    vmrghh v3, v4, v3
-; CHECK-PWR7-NEXT:    xxmrghw vs1, v3, v2
+; CHECK-PWR7-NEXT:    srawi r30, r0, 31
+; CHECK-PWR7-NEXT:    slwi r4, r4, 24
+; CHECK-PWR7-NEXT:    slwi r10, r10, 16
+; CHECK-PWR7-NEXT:    slwi r7, r7, 24
+; CHECK-PWR7-NEXT:    slwi r5, r5, 8
+; CHECK-PWR7-NEXT:    or r11, r11, r12
+; CHECK-PWR7-NEXT:    or r4, r4, r6
+; CHECK-PWR7-NEXT:    xor r0, r0, r30
+; CHECK-PWR7-NEXT:    slwi r8, r8, 8
+; CHECK-PWR7-NEXT:    or r7, r7, r10
+; CHECK-PWR7-NEXT:    or r4, r4, r5
+; CHECK-PWR7-NEXT:    or r7, r7, r8
+; CHECK-PWR7-NEXT:    or r3, r4, r3
+; CHECK-PWR7-NEXT:    or r7, r7, r9
+; CHECK-PWR7-NEXT:    stw r3, -192(r1)
+; CHECK-PWR7-NEXT:    sub r0, r0, r30
+; CHECK-PWR7-NEXT:    lbz r30, -116(r1)
+; CHECK-PWR7-NEXT:    addi r3, r1, -144
+; CHECK-PWR7-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    ld r22, -80(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    slwi r12, r0, 8
+; CHECK-PWR7-NEXT:    stw r7, -176(r1)
+; CHECK-PWR7-NEXT:    ld r21, -88(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    sub r30, r30, r27
+; CHECK-PWR7-NEXT:    or r11, r11, r12
+; CHECK-PWR7-NEXT:    srawi r27, r30, 31
+; CHECK-PWR7-NEXT:    or r11, r11, r29
+; CHECK-PWR7-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    stw r11, -160(r1)
+; CHECK-PWR7-NEXT:    xor r30, r30, r27
+; CHECK-PWR7-NEXT:    sub r30, r30, r27
+; CHECK-PWR7-NEXT:    lbz r27, -99(r1)
+; CHECK-PWR7-NEXT:    slwi r30, r30, 24
+; CHECK-PWR7-NEXT:    sub r28, r28, r27
+; CHECK-PWR7-NEXT:    srawi r27, r28, 31
+; CHECK-PWR7-NEXT:    xor r28, r28, r27
+; CHECK-PWR7-NEXT:    sub r28, r28, r27
+; CHECK-PWR7-NEXT:    lbz r27, -113(r1)
+; CHECK-PWR7-NEXT:    slwi r28, r28, 16
+; CHECK-PWR7-NEXT:    sub r27, r27, r24
+; CHECK-PWR7-NEXT:    or r30, r30, r28
+; CHECK-PWR7-NEXT:    slwi r28, r26, 8
+; CHECK-PWR7-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    or r30, r30, r28
+; CHECK-PWR7-NEXT:    srawi r24, r27, 31
+; CHECK-PWR7-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    xor r27, r27, r24
+; CHECK-PWR7-NEXT:    sub r27, r27, r24
+; CHECK-PWR7-NEXT:    ld r24, -64(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    or r30, r30, r27
+; CHECK-PWR7-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    stw r30, -144(r1)
+; CHECK-PWR7-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-PWR7-NEXT:    addi r3, r1, -160
+; CHECK-PWR7-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-PWR7-NEXT:    addi r3, r1, -176
+; CHECK-PWR7-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-PWR7-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-PWR7-NEXT:    addi r3, r1, -192
+; CHECK-PWR7-NEXT:    lxvw4x vs2, 0, r3
+; CHECK-PWR7-NEXT:    xxmrghw vs1, vs2, vs1
 ; CHECK-PWR7-NEXT:    xxmrghd v2, vs1, vs0
-; CHECK-PWR7-NEXT:    addi r1, r1, 400
 ; CHECK-PWR7-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %a, i32 0
diff --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll
index c696f0b6bd3f22b..9de8c558be0eff0 100644
--- a/llvm/test/CodeGen/PowerPC/pr25080.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25080.ll
@@ -13,40 +13,38 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; LE-NEXT:    xxland 35, 35, 0
 ; LE-NEXT:    vcmpequw 2, 2, 4
 ; LE-NEXT:    vcmpequw 3, 3, 4
-; LE-NEXT:    xxswapd 1, 34
-; LE-NEXT:    xxsldwi 2, 34, 34, 1
-; LE-NEXT:    xxsldwi 3, 34, 34, 3
-; LE-NEXT:    xxswapd 0, 35
-; LE-NEXT:    xxsldwi 4, 35, 35, 1
-; LE-NEXT:    xxsldwi 5, 35, 35, 3
-; LE-NEXT:    mffprwz 3, 1
-; LE-NEXT:    mtvsrd 36, 3
-; LE-NEXT:    mffprwz 3, 2
-; LE-NEXT:    mtvsrd 37, 3
 ; LE-NEXT:    mfvsrwz 3, 34
-; LE-NEXT:    mtvsrd 34, 3
-; LE-NEXT:    mffprwz 3, 3
-; LE-NEXT:    vmrghh 4, 5, 4
-; LE-NEXT:    mtvsrd 37, 3
-; LE-NEXT:    mffprwz 3, 0
-; LE-NEXT:    vmrghh 2, 5, 2
-; LE-NEXT:    mtvsrd 37, 3
-; LE-NEXT:    mffprwz 3, 4
-; LE-NEXT:    mtvsrd 32, 3
-; LE-NEXT:    mfvsrwz 3, 35
-; LE-NEXT:    mtvsrd 35, 3
-; LE-NEXT:    mffprwz 3, 5
-; LE-NEXT:    xxmrglw 0, 34, 36
-; LE-NEXT:    vmrghh 5, 0, 5
-; LE-NEXT:    mtvsrd 32, 3
+; LE-NEXT:    xxsldwi 0, 34, 34, 3
+; LE-NEXT:    xxsldwi 1, 34, 34, 1
+; LE-NEXT:    xxswapd 4, 34
+; LE-NEXT:    xxsldwi 2, 35, 35, 3
+; LE-NEXT:    xxsldwi 3, 35, 35, 1
+; LE-NEXT:    xxswapd 5, 35
+; LE-NEXT:    mffprwz 4, 0
+; LE-NEXT:    mffprwz 6, 1
+; LE-NEXT:    mffprwz 5, 4
+; LE-NEXT:    mffprwz 7, 5
+; LE-NEXT:    slwi 4, 4, 16
+; LE-NEXT:    or 3, 4, 3
+; LE-NEXT:    slwi 4, 6, 16
+; LE-NEXT:    mffprwz 6, 2
+; LE-NEXT:    or 4, 4, 5
+; LE-NEXT:    mfvsrwz 5, 35
+; LE-NEXT:    rldimi 4, 3, 32, 0
 ; LE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
+; LE-NEXT:    slwi 6, 6, 16
+; LE-NEXT:    mtfprd 0, 4
 ; LE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; LE-NEXT:    vmrghh 3, 0, 3
-; LE-NEXT:    xxmrglw 1, 35, 37
-; LE-NEXT:    xxmrgld 34, 1, 0
-; LE-NEXT:    lxvd2x 0, 0, 3
-; LE-NEXT:    xxswapd 35, 0
-; LE-NEXT:    xxlor 34, 34, 35
+; LE-NEXT:    or 5, 6, 5
+; LE-NEXT:    mffprwz 6, 3
+; LE-NEXT:    slwi 6, 6, 16
+; LE-NEXT:    or 6, 6, 7
+; LE-NEXT:    rldimi 6, 5, 32, 0
+; LE-NEXT:    mtfprd 1, 6
+; LE-NEXT:    xxmrghd 0, 1, 0
+; LE-NEXT:    lxvd2x 1, 0, 3
+; LE-NEXT:    xxswapd 34, 1
+; LE-NEXT:    xxlor 34, 0, 34
 ; LE-NEXT:    blr
 ;
 ; BE-LABEL: pr25080:
@@ -55,46 +53,41 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; BE-NEXT:    xxlxor 36, 36, 36
 ; BE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
 ; BE-NEXT:    lxvw4x 0, 0, 3
-; BE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
-; BE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
 ; BE-NEXT:    xxland 35, 35, 0
 ; BE-NEXT:    xxland 34, 34, 0
 ; BE-NEXT:    vcmpequw 3, 3, 4
 ; BE-NEXT:    vcmpequw 2, 2, 4
-; BE-NEXT:    lxvw4x 36, 0, 3
-; BE-NEXT:    xxswapd 1, 35
-; BE-NEXT:    xxsldwi 2, 35, 35, 1
-; BE-NEXT:    xxsldwi 3, 35, 35, 3
-; BE-NEXT:    xxswapd 0, 34
-; BE-NEXT:    xxsldwi 4, 34, 34, 1
-; BE-NEXT:    xxsldwi 5, 34, 34, 3
-; BE-NEXT:    mffprwz 3, 1
-; BE-NEXT:    mtvsrwz 37, 3
-; BE-NEXT:    mffprwz 3, 2
-; BE-NEXT:    mtvsrwz 32, 3
 ; BE-NEXT:    mfvsrwz 3, 35
-; BE-NEXT:    mtvsrwz 35, 3
-; BE-NEXT:    mffprwz 3, 3
-; BE-NEXT:    vperm 5, 0, 5, 4
-; BE-NEXT:    mtvsrwz 32, 3
-; BE-NEXT:    mffprwz 3, 0
-; BE-NEXT:    vperm 3, 0, 3, 4
-; BE-NEXT:    mtvsrwz 32, 3
-; BE-NEXT:    mffprwz 3, 4
-; BE-NEXT:    mtvsrwz 33, 3
+; BE-NEXT:    xxsldwi 0, 35, 35, 3
+; BE-NEXT:    xxsldwi 1, 35, 35, 1
+; BE-NEXT:    xxswapd 4, 35
+; BE-NEXT:    xxsldwi 2, 34, 34, 3
+; BE-NEXT:    xxsldwi 3, 34, 34, 1
+; BE-NEXT:    xxswapd 5, 34
+; BE-NEXT:    mffprwz 4, 0
+; BE-NEXT:    mffprwz 5, 4
+; BE-NEXT:    slwi 4, 4, 16
+; BE-NEXT:    or 3, 4, 3
+; BE-NEXT:    mffprwz 4, 1
+; BE-NEXT:    slwi 4, 4, 16
+; BE-NEXT:    or 4, 4, 5
+; BE-NEXT:    mffprwz 5, 3
+; BE-NEXT:    rldimi 4, 3, 32, 0
 ; BE-NEXT:    mfvsrwz 3, 34
-; BE-NEXT:    mtvsrwz 34, 3
-; BE-NEXT:    mffprwz 3, 5
-; BE-NEXT:    xxmrghw 0, 35, 37
-; BE-NEXT:    vperm 0, 1, 0, 4
-; BE-NEXT:    mtvsrwz 33, 3
-; BE-NEXT:    addis 3, 2, .LCPI0_2 at toc@ha
-; BE-NEXT:    addi 3, 3, .LCPI0_2 at toc@l
-; BE-NEXT:    vperm 2, 1, 2, 4
-; BE-NEXT:    xxmrghw 1, 34, 32
-; BE-NEXT:    xxmrghd 34, 1, 0
-; BE-NEXT:    lxvw4x 0, 0, 3
-; BE-NEXT:    xxlor 34, 34, 0
+; BE-NEXT:    mtfprd 0, 4
+; BE-NEXT:    mffprwz 4, 2
+; BE-NEXT:    slwi 5, 5, 16
+; BE-NEXT:    slwi 4, 4, 16
+; BE-NEXT:    or 3, 4, 3
+; BE-NEXT:    mffprwz 4, 5
+; BE-NEXT:    or 4, 5, 4
+; BE-NEXT:    rldimi 4, 3, 32, 0
+; BE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
+; BE-NEXT:    mtfprd 1, 4
+; BE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
+; BE-NEXT:    xxmrghd 0, 1, 0
+; BE-NEXT:    lxvw4x 1, 0, 3
+; BE-NEXT:    xxlor 34, 0, 1
 ; BE-NEXT:    blr
 entry:
   %0 = trunc <8 x i32> %a to <8 x i23>
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 4da36c9af5c101c..9770328b8568990 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -240,79 +240,18 @@ entry:
 define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
 ; P9LE-LABEL: test16:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    sldi 4, 4, 1
-; P9LE-NEXT:    li 7, 16
-; P9LE-NEXT:    add 6, 3, 4
-; P9LE-NEXT:    lxsihzx 4, 3, 4
-; P9LE-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
-; P9LE-NEXT:    lxsihzx 2, 6, 7
-; P9LE-NEXT:    li 6, 0
-; P9LE-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
-; P9LE-NEXT:    mtvsrd 3, 6
-; P9LE-NEXT:    lxv 0, 0(3)
-; P9LE-NEXT:    li 3, 0
-; P9LE-NEXT:    vmrghh 4, 3, 4
-; P9LE-NEXT:    vmrghh 2, 3, 2
-; P9LE-NEXT:    vsplth 3, 3, 3
-; P9LE-NEXT:    xxmrglw 3, 4, 3
-; P9LE-NEXT:    xxperm 3, 2, 0
-; P9LE-NEXT:    xxspltw 2, 3, 2
-; P9LE-NEXT:    vadduwm 2, 3, 2
-; P9LE-NEXT:    vextuwrx 3, 3, 2
 ; P9LE-NEXT:    cmpw 3, 5
 ; P9LE-NEXT:    bgelr+ 0
 ; P9LE-NEXT:  # %bb.1: # %if.then
 ;
 ; P9BE-LABEL: test16:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    sldi 4, 4, 1
-; P9BE-NEXT:    li 7, 16
-; P9BE-NEXT:    add 6, 3, 4
-; P9BE-NEXT:    lxsihzx 1, 3, 4
-; P9BE-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
-; P9BE-NEXT:    lxsihzx 2, 6, 7
-; P9BE-NEXT:    addis 6, 2, .LCPI2_0 at toc@ha
-; P9BE-NEXT:    addi 3, 3, .LCPI2_1 at toc@l
-; P9BE-NEXT:    addi 6, 6, .LCPI2_0 at toc@l
-; P9BE-NEXT:    lxv 0, 0(6)
-; P9BE-NEXT:    li 6, 0
-; P9BE-NEXT:    mtvsrwz 3, 6
-; P9BE-NEXT:    xxperm 2, 3, 0
-; P9BE-NEXT:    xxperm 1, 3, 0
-; P9BE-NEXT:    vsplth 3, 3, 3
-; P9BE-NEXT:    lxv 0, 0(3)
-; P9BE-NEXT:    li 3, 0
-; P9BE-NEXT:    xxmrghw 3, 3, 1
-; P9BE-NEXT:    xxperm 2, 3, 0
-; P9BE-NEXT:    xxspltw 3, 2, 1
-; P9BE-NEXT:    vadduwm 2, 2, 3
-; P9BE-NEXT:    vextuwlx 3, 3, 2
 ; P9BE-NEXT:    cmpw 3, 5
 ; P9BE-NEXT:    bgelr+ 0
 ; P9BE-NEXT:  # %bb.1: # %if.then
 ;
 ; P9BE-AIX-LABEL: test16:
 ; P9BE-AIX:       # %bb.0: # %entry
-; P9BE-AIX-NEXT:    sldi 4, 4, 1
-; P9BE-AIX-NEXT:    li 7, 16
-; P9BE-AIX-NEXT:    add 6, 3, 4
-; P9BE-AIX-NEXT:    lxsihzx 1, 3, 4
-; P9BE-AIX-NEXT:    ld 3, L..C3(2) # %const.1
-; P9BE-AIX-NEXT:    lxsihzx 2, 6, 7
-; P9BE-AIX-NEXT:    ld 6, L..C4(2) # %const.0
-; P9BE-AIX-NEXT:    lxv 0, 0(6)
-; P9BE-AIX-NEXT:    li 6, 0
-; P9BE-AIX-NEXT:    mtvsrwz 3, 6
-; P9BE-AIX-NEXT:    xxperm 2, 3, 0
-; P9BE-AIX-NEXT:    xxperm 1, 3, 0
-; P9BE-AIX-NEXT:    vsplth 3, 3, 3
-; P9BE-AIX-NEXT:    lxv 0, 0(3)
-; P9BE-AIX-NEXT:    li 3, 0
-; P9BE-AIX-NEXT:    xxmrghw 3, 3, 1
-; P9BE-AIX-NEXT:    xxperm 2, 3, 0
-; P9BE-AIX-NEXT:    xxspltw 3, 2, 1
-; P9BE-AIX-NEXT:    vadduwm 2, 2, 3
-; P9BE-AIX-NEXT:    vextuwlx 3, 3, 2
 ; P9BE-AIX-NEXT:    cmpw 3, 5
 ; P9BE-AIX-NEXT:    bgelr+ 0
 ; P9BE-AIX-NEXT:  # %bb.1: # %if.then
@@ -320,24 +259,17 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9BE-AIX32-LABEL: test16:
 ; P9BE-AIX32:       # %bb.0: # %entry
 ; P9BE-AIX32-NEXT:    slwi 4, 4, 1
-; P9BE-AIX32-NEXT:    li 6, 0
 ; P9BE-AIX32-NEXT:    lhzux 4, 3, 4
 ; P9BE-AIX32-NEXT:    lhz 3, 16(3)
-; P9BE-AIX32-NEXT:    sth 6, -64(1)
-; P9BE-AIX32-NEXT:    lxv 2, -64(1)
-; P9BE-AIX32-NEXT:    sth 4, -48(1)
-; P9BE-AIX32-NEXT:    lxv 4, -48(1)
-; P9BE-AIX32-NEXT:    sth 3, -32(1)
+; P9BE-AIX32-NEXT:    stw 4, -48(1)
+; P9BE-AIX32-NEXT:    lxv 1, -48(1)
+; P9BE-AIX32-NEXT:    stw 3, -32(1)
 ; P9BE-AIX32-NEXT:    lwz 3, L..C3(2) # %const.0
-; P9BE-AIX32-NEXT:    lxv 3, -32(1)
-; P9BE-AIX32-NEXT:    vmrghh 4, 2, 4
+; P9BE-AIX32-NEXT:    lxv 2, -32(1)
 ; P9BE-AIX32-NEXT:    lxv 0, 0(3)
-; P9BE-AIX32-NEXT:    vmrghh 3, 2, 3
-; P9BE-AIX32-NEXT:    vsplth 2, 2, 0
-; P9BE-AIX32-NEXT:    xxmrghw 2, 2, 4
-; P9BE-AIX32-NEXT:    xxperm 3, 2, 0
-; P9BE-AIX32-NEXT:    xxspltw 2, 3, 1
-; P9BE-AIX32-NEXT:    vadduwm 2, 3, 2
+; P9BE-AIX32-NEXT:    xxperm 2, 1, 0
+; P9BE-AIX32-NEXT:    xxspltw 3, 2, 1
+; P9BE-AIX32-NEXT:    vadduwm 2, 2, 3
 ; P9BE-AIX32-NEXT:    stxv 2, -16(1)
 ; P9BE-AIX32-NEXT:    lwz 3, -16(1)
 ; P9BE-AIX32-NEXT:    cmpw 3, 5
@@ -378,108 +310,35 @@ if.end:                                           ; preds = %for.body
 define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
 ; P9LE-LABEL: test8:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    add 6, 3, 4
-; P9LE-NEXT:    lxsibzx 2, 3, 4
-; P9LE-NEXT:    li 3, 0
-; P9LE-NEXT:    mtvsrd 3, 3
-; P9LE-NEXT:    li 3, 8
-; P9LE-NEXT:    lxsibzx 5, 6, 3
-; P9LE-NEXT:    vspltb 4, 3, 7
-; P9LE-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
-; P9LE-NEXT:    vmrghb 2, 3, 2
-; P9LE-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
-; P9LE-NEXT:    vmrglh 2, 2, 4
-; P9LE-NEXT:    lxv 0, 0(3)
-; P9LE-NEXT:    li 3, 0
-; P9LE-NEXT:    vmrghb 3, 3, 5
-; P9LE-NEXT:    xxmrglw 2, 2, 4
-; P9LE-NEXT:    vmrglh 3, 3, 4
-; P9LE-NEXT:    xxmrglw 3, 4, 3
-; P9LE-NEXT:    xxperm 2, 3, 0
-; P9LE-NEXT:    xxspltw 3, 2, 2
-; P9LE-NEXT:    vadduwm 2, 2, 3
-; P9LE-NEXT:    vextuwrx 3, 3, 2
 ; P9LE-NEXT:    cmpw 3, 5
 ; P9LE-NEXT:    bgelr+ 0
 ; P9LE-NEXT:  # %bb.1: # %if.then
 ;
 ; P9BE-LABEL: test8:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    add 6, 3, 4
-; P9BE-NEXT:    li 7, 8
-; P9BE-NEXT:    lxsibzx 3, 3, 4
-; P9BE-NEXT:    addis 3, 2, .LCPI3_1 at toc@ha
-; P9BE-NEXT:    lxsibzx 0, 6, 7
-; P9BE-NEXT:    addis 6, 2, .LCPI3_0 at toc@ha
-; P9BE-NEXT:    addi 3, 3, .LCPI3_1 at toc@l
-; P9BE-NEXT:    addi 6, 6, .LCPI3_0 at toc@l
-; P9BE-NEXT:    lxv 1, 0(6)
-; P9BE-NEXT:    li 6, 0
-; P9BE-NEXT:    mtvsrwz 2, 6
-; P9BE-NEXT:    xxperm 0, 2, 1
-; P9BE-NEXT:    xxperm 3, 2, 1
-; P9BE-NEXT:    vspltb 2, 2, 7
-; P9BE-NEXT:    vmrghh 3, 3, 2
-; P9BE-NEXT:    xxspltw 1, 2, 0
-; P9BE-NEXT:    xxmrghw 3, 3, 0
-; P9BE-NEXT:    lxv 0, 0(3)
-; P9BE-NEXT:    li 3, 0
-; P9BE-NEXT:    xxperm 3, 1, 0
-; P9BE-NEXT:    xxspltw 2, 3, 1
-; P9BE-NEXT:    vadduwm 2, 3, 2
-; P9BE-NEXT:    vextuwlx 3, 3, 2
 ; P9BE-NEXT:    cmpw 3, 5
 ; P9BE-NEXT:    bgelr+ 0
 ; P9BE-NEXT:  # %bb.1: # %if.then
 ;
 ; P9BE-AIX-LABEL: test8:
 ; P9BE-AIX:       # %bb.0: # %entry
-; P9BE-AIX-NEXT:    add 6, 3, 4
-; P9BE-AIX-NEXT:    li 7, 8
-; P9BE-AIX-NEXT:    lxsibzx 3, 3, 4
-; P9BE-AIX-NEXT:    ld 3, L..C5(2) # %const.1
-; P9BE-AIX-NEXT:    lxsibzx 0, 6, 7
-; P9BE-AIX-NEXT:    ld 6, L..C6(2) # %const.0
-; P9BE-AIX-NEXT:    lxv 1, 0(6)
-; P9BE-AIX-NEXT:    li 6, 0
-; P9BE-AIX-NEXT:    mtvsrwz 2, 6
-; P9BE-AIX-NEXT:    xxperm 0, 2, 1
-; P9BE-AIX-NEXT:    xxperm 3, 2, 1
-; P9BE-AIX-NEXT:    vspltb 2, 2, 7
-; P9BE-AIX-NEXT:    vmrghh 3, 3, 2
-; P9BE-AIX-NEXT:    xxspltw 1, 2, 0
-; P9BE-AIX-NEXT:    xxmrghw 3, 3, 0
-; P9BE-AIX-NEXT:    lxv 0, 0(3)
-; P9BE-AIX-NEXT:    li 3, 0
-; P9BE-AIX-NEXT:    xxperm 3, 1, 0
-; P9BE-AIX-NEXT:    xxspltw 2, 3, 1
-; P9BE-AIX-NEXT:    vadduwm 2, 3, 2
-; P9BE-AIX-NEXT:    vextuwlx 3, 3, 2
 ; P9BE-AIX-NEXT:    cmpw 3, 5
 ; P9BE-AIX-NEXT:    bgelr+ 0
 ; P9BE-AIX-NEXT:  # %bb.1: # %if.then
 ;
 ; P9BE-AIX32-LABEL: test8:
 ; P9BE-AIX32:       # %bb.0: # %entry
-; P9BE-AIX32-NEXT:    add 6, 3, 4
-; P9BE-AIX32-NEXT:    li 7, 8
-; P9BE-AIX32-NEXT:    lxsibzx 3, 3, 4
-; P9BE-AIX32-NEXT:    lwz 3, L..C4(2) # %const.1
-; P9BE-AIX32-NEXT:    lxsibzx 0, 6, 7
-; P9BE-AIX32-NEXT:    lwz 6, L..C5(2) # %const.0
-; P9BE-AIX32-NEXT:    lxv 1, 0(6)
-; P9BE-AIX32-NEXT:    li 6, 0
-; P9BE-AIX32-NEXT:    mtvsrwz 2, 6
-; P9BE-AIX32-NEXT:    xxperm 0, 2, 1
-; P9BE-AIX32-NEXT:    xxperm 3, 2, 1
-; P9BE-AIX32-NEXT:    vspltb 2, 2, 7
-; P9BE-AIX32-NEXT:    vmrghh 3, 3, 2
-; P9BE-AIX32-NEXT:    xxspltw 1, 2, 0
-; P9BE-AIX32-NEXT:    xxmrghw 3, 3, 0
+; P9BE-AIX32-NEXT:    lbzux 4, 3, 4
+; P9BE-AIX32-NEXT:    lbz 3, 8(3)
+; P9BE-AIX32-NEXT:    stw 4, -48(1)
+; P9BE-AIX32-NEXT:    lxv 1, -48(1)
+; P9BE-AIX32-NEXT:    stw 3, -32(1)
+; P9BE-AIX32-NEXT:    lwz 3, L..C4(2) # %const.0
+; P9BE-AIX32-NEXT:    lxv 2, -32(1)
 ; P9BE-AIX32-NEXT:    lxv 0, 0(3)
-; P9BE-AIX32-NEXT:    xxperm 3, 1, 0
-; P9BE-AIX32-NEXT:    xxspltw 2, 3, 1
-; P9BE-AIX32-NEXT:    vadduwm 2, 3, 2
+; P9BE-AIX32-NEXT:    xxperm 2, 1, 0
+; P9BE-AIX32-NEXT:    xxspltw 3, 2, 1
+; P9BE-AIX32-NEXT:    vadduwm 2, 2, 3
 ; P9BE-AIX32-NEXT:    stxv 2, -16(1)
 ; P9BE-AIX32-NEXT:    lwz 3, -16(1)
 ; P9BE-AIX32-NEXT:    cmpw 3, 5
diff --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
index df55b92997765d1..840a19f2f9a6a40 100644
--- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
@@ -13,8 +13,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
 ; P9LE-NEXT:    lis r4, -21386
+; P9LE-NEXT:    lis r6, 31710
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 37253
+; P9LE-NEXT:    ori r6, r6, 63421
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    add r4, r4, r3
@@ -23,25 +25,25 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    add r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    lis r4, 31710
-; P9LE-NEXT:    mtvsrd v3, r3
-; P9LE-NEXT:    li r3, 2
-; P9LE-NEXT:    ori r4, r4, 63421
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    mulhw r4, r3, r4
-; P9LE-NEXT:    sub r4, r4, r3
-; P9LE-NEXT:    srwi r5, r4, 31
-; P9LE-NEXT:    srawi r4, r4, 6
-; P9LE-NEXT:    add r4, r4, r5
-; P9LE-NEXT:    mulli r4, r4, -124
-; P9LE-NEXT:    sub r3, r3, r4
+; P9LE-NEXT:    li r4, 2
+; P9LE-NEXT:    vextuhrx r4, r4, v2
+; P9LE-NEXT:    extsh r5, r4
+; P9LE-NEXT:    mulhw r6, r5, r6
+; P9LE-NEXT:    sub r5, r6, r5
+; P9LE-NEXT:    srwi r6, r5, 31
+; P9LE-NEXT:    srwi r5, r5, 6
+; P9LE-NEXT:    add r5, r5, r6
+; P9LE-NEXT:    lis r6, -16728
+; P9LE-NEXT:    mulli r5, r5, -124
+; P9LE-NEXT:    ori r6, r6, 63249
+; P9LE-NEXT:    sub r4, r4, r5
+; P9LE-NEXT:    slwi r4, r4, 16
+; P9LE-NEXT:    or r3, r4, r3
 ; P9LE-NEXT:    lis r4, 21399
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    ori r4, r4, 33437
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    srwi r5, r4, 31
@@ -49,29 +51,29 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    add r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 98
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    lis r4, -16728
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    li r3, 6
-; P9LE-NEXT:    ori r4, r4, 63249
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    mulhw r4, r3, r4
-; P9LE-NEXT:    srwi r5, r4, 31
-; P9LE-NEXT:    srawi r4, r4, 8
-; P9LE-NEXT:    add r4, r4, r5
-; P9LE-NEXT:    mulli r4, r4, -1003
-; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    xxmrglw v2, v2, v3
+; P9LE-NEXT:    li r4, 6
+; P9LE-NEXT:    vextuhrx r4, r4, v2
+; P9LE-NEXT:    extsh r5, r4
+; P9LE-NEXT:    mulhw r5, r5, r6
+; P9LE-NEXT:    srwi r6, r5, 31
+; P9LE-NEXT:    srwi r5, r5, 8
+; P9LE-NEXT:    add r5, r5, r6
+; P9LE-NEXT:    mulli r5, r5, -1003
+; P9LE-NEXT:    sub r4, r4, r5
+; P9LE-NEXT:    slwi r4, r4, 16
+; P9LE-NEXT:    or r3, r4, r3
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: fold_srem_vec_1:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    lis r4, 31710
+; P9BE-NEXT:    lis r6, -21386
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 63421
+; P9BE-NEXT:    ori r6, r6, 37253
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    sub r4, r4, r3
@@ -80,162 +82,154 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, -124
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    lis r4, -21386
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    ori r4, r4, 37253
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    mulhw r4, r3, r4
-; P9BE-NEXT:    add r4, r4, r3
-; P9BE-NEXT:    srwi r5, r4, 31
-; P9BE-NEXT:    srawi r4, r4, 6
-; P9BE-NEXT:    add r4, r4, r5
-; P9BE-NEXT:    mulli r4, r4, 95
-; P9BE-NEXT:    sub r3, r3, r4
+; P9BE-NEXT:    li r4, 0
+; P9BE-NEXT:    vextuhlx r4, r4, v2
+; P9BE-NEXT:    extsh r5, r4
+; P9BE-NEXT:    mulhw r6, r5, r6
+; P9BE-NEXT:    add r5, r6, r5
+; P9BE-NEXT:    srwi r6, r5, 31
+; P9BE-NEXT:    srwi r5, r5, 6
+; P9BE-NEXT:    add r5, r5, r6
+; P9BE-NEXT:    lis r6, 21399
+; P9BE-NEXT:    mulli r5, r5, 95
+; P9BE-NEXT:    ori r6, r6, 33437
+; P9BE-NEXT:    sub r4, r4, r5
+; P9BE-NEXT:    slwi r4, r4, 16
+; P9BE-NEXT:    or r3, r4, r3
 ; P9BE-NEXT:    lis r4, -16728
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; P9BE-NEXT:    ori r4, r4, 63249
-; P9BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 6
+; P9BE-NEXT:    ori r4, r4, 63249
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    srwi r5, r4, 31
 ; P9BE-NEXT:    srawi r4, r4, 8
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, -1003
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    lis r4, 21399
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    ori r4, r4, 33437
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    mulhw r4, r3, r4
-; P9BE-NEXT:    srwi r5, r4, 31
-; P9BE-NEXT:    srawi r4, r4, 5
-; P9BE-NEXT:    add r4, r4, r5
-; P9BE-NEXT:    mulli r4, r4, 98
-; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    xxmrghw v2, vs0, vs1
+; P9BE-NEXT:    li r4, 4
+; P9BE-NEXT:    vextuhlx r4, r4, v2
+; P9BE-NEXT:    extsh r5, r4
+; P9BE-NEXT:    mulhw r5, r5, r6
+; P9BE-NEXT:    srwi r6, r5, 31
+; P9BE-NEXT:    srwi r5, r5, 5
+; P9BE-NEXT:    add r5, r5, r6
+; P9BE-NEXT:    mulli r5, r5, 98
+; P9BE-NEXT:    sub r4, r4, r5
+; P9BE-NEXT:    slwi r4, r4, 16
+; P9BE-NEXT:    or r3, r4, r3
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vmrgow v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_srem_vec_1:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r4, 21399
-; P8LE-NEXT:    lis r5, -16728
-; P8LE-NEXT:    lis r6, -21386
+; P8LE-NEXT:    lis r5, 21399
+; P8LE-NEXT:    lis r7, -16728
 ; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    ori r4, r4, 33437
-; P8LE-NEXT:    ori r5, r5, 63249
-; P8LE-NEXT:    ori r6, r6, 37253
-; P8LE-NEXT:    rldicl r7, r3, 32, 48
-; P8LE-NEXT:    rldicl r8, r3, 16, 48
-; P8LE-NEXT:    clrldi r9, r3, 48
+; P8LE-NEXT:    ori r5, r5, 33437
+; P8LE-NEXT:    ori r7, r7, 63249
+; P8LE-NEXT:    rldicl r4, r3, 32, 48
+; P8LE-NEXT:    extsh r4, r4
+; P8LE-NEXT:    mulhw r5, r4, r5
+; P8LE-NEXT:    srwi r6, r5, 31
+; P8LE-NEXT:    srawi r5, r5, 5
+; P8LE-NEXT:    add r5, r5, r6
+; P8LE-NEXT:    mulli r5, r5, 98
+; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    rldicl r5, r3, 16, 48
+; P8LE-NEXT:    extsh r6, r5
+; P8LE-NEXT:    mulhw r6, r6, r7
+; P8LE-NEXT:    srwi r7, r6, 31
+; P8LE-NEXT:    srwi r6, r6, 8
+; P8LE-NEXT:    add r6, r6, r7
+; P8LE-NEXT:    mulli r6, r6, -1003
+; P8LE-NEXT:    sub r5, r5, r6
+; P8LE-NEXT:    slwi r5, r5, 16
+; P8LE-NEXT:    or r4, r5, r4
+; P8LE-NEXT:    lis r5, -21386
+; P8LE-NEXT:    mtfprwz f0, r4
+; P8LE-NEXT:    clrldi r4, r3, 48
+; P8LE-NEXT:    ori r5, r5, 37253
 ; P8LE-NEXT:    rldicl r3, r3, 48, 48
-; P8LE-NEXT:    extsh r7, r7
-; P8LE-NEXT:    extsh r8, r8
-; P8LE-NEXT:    extsh r9, r9
-; P8LE-NEXT:    extsh r3, r3
-; P8LE-NEXT:    mulhw r4, r7, r4
-; P8LE-NEXT:    mulhw r5, r8, r5
-; P8LE-NEXT:    mulhw r6, r9, r6
-; P8LE-NEXT:    srwi r10, r4, 31
-; P8LE-NEXT:    srawi r4, r4, 5
-; P8LE-NEXT:    add r6, r6, r9
-; P8LE-NEXT:    add r4, r4, r10
-; P8LE-NEXT:    srwi r10, r5, 31
-; P8LE-NEXT:    srawi r5, r5, 8
-; P8LE-NEXT:    mulli r4, r4, 98
-; P8LE-NEXT:    add r5, r5, r10
-; P8LE-NEXT:    srwi r10, r6, 31
-; P8LE-NEXT:    srawi r6, r6, 6
-; P8LE-NEXT:    add r6, r6, r10
-; P8LE-NEXT:    mulli r5, r5, -1003
-; P8LE-NEXT:    sub r4, r7, r4
-; P8LE-NEXT:    mtvsrd v2, r4
-; P8LE-NEXT:    mulli r4, r6, 95
-; P8LE-NEXT:    sub r5, r8, r5
-; P8LE-NEXT:    mtvsrd v3, r5
-; P8LE-NEXT:    sub r4, r9, r4
-; P8LE-NEXT:    mtvsrd v4, r4
-; P8LE-NEXT:    lis r4, 31710
-; P8LE-NEXT:    ori r4, r4, 63421
-; P8LE-NEXT:    mulhw r4, r3, r4
-; P8LE-NEXT:    sub r4, r4, r3
-; P8LE-NEXT:    srwi r5, r4, 31
-; P8LE-NEXT:    srawi r4, r4, 6
-; P8LE-NEXT:    add r4, r4, r5
-; P8LE-NEXT:    mulli r4, r4, -124
-; P8LE-NEXT:    sub r3, r3, r4
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mtvsrd v3, r3
-; P8LE-NEXT:    vmrghh v3, v3, v4
-; P8LE-NEXT:    xxmrglw v2, v2, v3
+; P8LE-NEXT:    extsh r4, r4
+; P8LE-NEXT:    mulhw r5, r4, r5
+; P8LE-NEXT:    add r5, r5, r4
+; P8LE-NEXT:    srwi r6, r5, 31
+; P8LE-NEXT:    srawi r5, r5, 6
+; P8LE-NEXT:    add r5, r5, r6
+; P8LE-NEXT:    lis r6, 31710
+; P8LE-NEXT:    mulli r5, r5, 95
+; P8LE-NEXT:    ori r6, r6, 63421
+; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    extsh r5, r3
+; P8LE-NEXT:    mulhw r6, r5, r6
+; P8LE-NEXT:    sub r5, r6, r5
+; P8LE-NEXT:    srwi r6, r5, 31
+; P8LE-NEXT:    srwi r5, r5, 6
+; P8LE-NEXT:    add r5, r5, r6
+; P8LE-NEXT:    mulli r5, r5, -124
+; P8LE-NEXT:    sub r3, r3, r5
+; P8LE-NEXT:    slwi r3, r3, 16
+; P8LE-NEXT:    or r3, r3, r4
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v2, vs0, vs1
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_srem_vec_1:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    addis r6, r2, .LCPI0_0 at toc@ha
-; P8BE-NEXT:    lis r4, -16728
-; P8BE-NEXT:    lis r5, 21399
-; P8BE-NEXT:    lis r7, 31710
-; P8BE-NEXT:    addi r6, r6, .LCPI0_0 at toc@l
-; P8BE-NEXT:    ori r4, r4, 63249
-; P8BE-NEXT:    ori r5, r5, 33437
-; P8BE-NEXT:    ori r7, r7, 63421
-; P8BE-NEXT:    lxvw4x v2, 0, r6
-; P8BE-NEXT:    clrldi r6, r3, 48
-; P8BE-NEXT:    rldicl r8, r3, 48, 48
-; P8BE-NEXT:    rldicl r9, r3, 32, 48
+; P8BE-NEXT:    lis r5, -16728
+; P8BE-NEXT:    lis r7, 21399
+; P8BE-NEXT:    ori r5, r5, 63249
+; P8BE-NEXT:    ori r7, r7, 33437
+; P8BE-NEXT:    clrldi r4, r3, 48
+; P8BE-NEXT:    extsh r4, r4
+; P8BE-NEXT:    mulhw r5, r4, r5
+; P8BE-NEXT:    srwi r6, r5, 31
+; P8BE-NEXT:    srawi r5, r5, 8
+; P8BE-NEXT:    add r5, r5, r6
+; P8BE-NEXT:    mulli r5, r5, -1003
+; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    rldicl r5, r3, 48, 48
+; P8BE-NEXT:    extsh r6, r5
+; P8BE-NEXT:    mulhw r6, r6, r7
+; P8BE-NEXT:    srwi r7, r6, 31
+; P8BE-NEXT:    srwi r6, r6, 5
+; P8BE-NEXT:    add r6, r6, r7
+; P8BE-NEXT:    mulli r6, r6, 98
+; P8BE-NEXT:    sub r5, r5, r6
+; P8BE-NEXT:    slwi r5, r5, 16
+; P8BE-NEXT:    or r4, r5, r4
+; P8BE-NEXT:    lis r5, 31710
+; P8BE-NEXT:    mtvsrwz v2, r4
+; P8BE-NEXT:    rldicl r4, r3, 32, 48
+; P8BE-NEXT:    ori r5, r5, 63421
 ; P8BE-NEXT:    rldicl r3, r3, 16, 48
-; P8BE-NEXT:    extsh r6, r6
-; P8BE-NEXT:    extsh r8, r8
-; P8BE-NEXT:    extsh r9, r9
-; P8BE-NEXT:    extsh r3, r3
-; P8BE-NEXT:    mulhw r4, r6, r4
-; P8BE-NEXT:    mulhw r5, r8, r5
-; P8BE-NEXT:    mulhw r7, r9, r7
-; P8BE-NEXT:    srwi r10, r4, 31
-; P8BE-NEXT:    srawi r4, r4, 8
-; P8BE-NEXT:    sub r7, r7, r9
-; P8BE-NEXT:    add r4, r4, r10
-; P8BE-NEXT:    srwi r10, r5, 31
-; P8BE-NEXT:    srawi r5, r5, 5
-; P8BE-NEXT:    mulli r4, r4, -1003
-; P8BE-NEXT:    add r5, r5, r10
-; P8BE-NEXT:    srwi r10, r7, 31
-; P8BE-NEXT:    srawi r7, r7, 6
-; P8BE-NEXT:    add r7, r7, r10
-; P8BE-NEXT:    mulli r5, r5, 98
-; P8BE-NEXT:    sub r4, r6, r4
-; P8BE-NEXT:    mtvsrwz v3, r4
-; P8BE-NEXT:    mulli r4, r7, -124
-; P8BE-NEXT:    sub r5, r8, r5
-; P8BE-NEXT:    mtvsrwz v4, r5
-; P8BE-NEXT:    sub r4, r9, r4
-; P8BE-NEXT:    mtvsrwz v5, r4
-; P8BE-NEXT:    lis r4, -21386
-; P8BE-NEXT:    ori r4, r4, 37253
-; P8BE-NEXT:    mulhw r4, r3, r4
-; P8BE-NEXT:    add r4, r4, r3
-; P8BE-NEXT:    srwi r5, r4, 31
-; P8BE-NEXT:    srawi r4, r4, 6
-; P8BE-NEXT:    add r4, r4, r5
-; P8BE-NEXT:    mulli r4, r4, 95
-; P8BE-NEXT:    sub r3, r3, r4
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    mtvsrwz v4, r3
-; P8BE-NEXT:    vperm v2, v4, v5, v2
-; P8BE-NEXT:    xxmrghw v2, v2, v3
+; P8BE-NEXT:    extsh r4, r4
+; P8BE-NEXT:    mulhw r5, r4, r5
+; P8BE-NEXT:    sub r5, r5, r4
+; P8BE-NEXT:    srwi r6, r5, 31
+; P8BE-NEXT:    srawi r5, r5, 6
+; P8BE-NEXT:    add r5, r5, r6
+; P8BE-NEXT:    lis r6, -21386
+; P8BE-NEXT:    mulli r5, r5, -124
+; P8BE-NEXT:    ori r6, r6, 37253
+; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    extsh r5, r3
+; P8BE-NEXT:    mulhw r6, r5, r6
+; P8BE-NEXT:    add r5, r6, r5
+; P8BE-NEXT:    srwi r6, r5, 31
+; P8BE-NEXT:    srwi r5, r5, 6
+; P8BE-NEXT:    add r5, r5, r6
+; P8BE-NEXT:    mulli r5, r5, 95
+; P8BE-NEXT:    sub r3, r3, r5
+; P8BE-NEXT:    slwi r3, r3, 16
+; P8BE-NEXT:    or r3, r3, r4
+; P8BE-NEXT:    mtvsrwz v3, r3
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
   ret <4 x i16> %1
@@ -256,21 +250,21 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    add r5, r5, r6
 ; P9LE-NEXT:    mulli r5, r5, 95
 ; P9LE-NEXT:    sub r3, r3, r5
-; P9LE-NEXT:    mtvsrd v3, r3
-; P9LE-NEXT:    li r3, 2
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    mulhw r5, r3, r4
-; P9LE-NEXT:    add r5, r5, r3
-; P9LE-NEXT:    srwi r6, r5, 31
-; P9LE-NEXT:    srawi r5, r5, 6
-; P9LE-NEXT:    add r5, r5, r6
-; P9LE-NEXT:    mulli r5, r5, 95
-; P9LE-NEXT:    sub r3, r3, r5
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    li r5, 2
+; P9LE-NEXT:    vextuhrx r5, r5, v2
+; P9LE-NEXT:    extsh r6, r5
+; P9LE-NEXT:    mulhw r7, r6, r4
+; P9LE-NEXT:    add r6, r7, r6
+; P9LE-NEXT:    srwi r7, r6, 31
+; P9LE-NEXT:    srwi r6, r6, 6
+; P9LE-NEXT:    add r6, r6, r7
+; P9LE-NEXT:    mulli r6, r6, 95
+; P9LE-NEXT:    sub r5, r5, r6
+; P9LE-NEXT:    slwi r5, r5, 16
+; P9LE-NEXT:    or r3, r5, r3
+; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r5, r3, r4
 ; P9LE-NEXT:    add r5, r5, r3
@@ -279,20 +273,20 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    add r5, r5, r6
 ; P9LE-NEXT:    mulli r5, r5, 95
 ; P9LE-NEXT:    sub r3, r3, r5
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    li r3, 6
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    mulhw r4, r3, r4
-; P9LE-NEXT:    add r4, r4, r3
-; P9LE-NEXT:    srwi r5, r4, 31
-; P9LE-NEXT:    srawi r4, r4, 6
-; P9LE-NEXT:    add r4, r4, r5
+; P9LE-NEXT:    li r5, 6
+; P9LE-NEXT:    vextuhrx r5, r5, v2
+; P9LE-NEXT:    extsh r6, r5
+; P9LE-NEXT:    mulhw r4, r6, r4
+; P9LE-NEXT:    add r4, r4, r6
+; P9LE-NEXT:    srwi r6, r4, 31
+; P9LE-NEXT:    srwi r4, r4, 6
+; P9LE-NEXT:    add r4, r4, r6
 ; P9LE-NEXT:    mulli r4, r4, 95
-; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    xxmrglw v2, v2, v3
+; P9LE-NEXT:    sub r4, r5, r4
+; P9LE-NEXT:    slwi r4, r4, 16
+; P9LE-NEXT:    or r3, r4, r3
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: fold_srem_vec_2:
@@ -309,25 +303,22 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    add r5, r5, r6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    mulhw r5, r3, r4
-; P9BE-NEXT:    add r5, r5, r3
-; P9BE-NEXT:    srwi r6, r5, 31
-; P9BE-NEXT:    srawi r5, r5, 6
-; P9BE-NEXT:    add r5, r5, r6
-; P9BE-NEXT:    mulli r5, r5, 95
-; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
+; P9BE-NEXT:    li r5, 4
+; P9BE-NEXT:    vextuhlx r5, r5, v2
+; P9BE-NEXT:    extsh r6, r5
+; P9BE-NEXT:    mulhw r7, r6, r4
+; P9BE-NEXT:    add r6, r7, r6
+; P9BE-NEXT:    srwi r7, r6, 31
+; P9BE-NEXT:    srwi r6, r6, 6
+; P9BE-NEXT:    add r6, r6, r7
+; P9BE-NEXT:    mulli r6, r6, 95
+; P9BE-NEXT:    sub r5, r5, r6
+; P9BE-NEXT:    slwi r5, r5, 16
+; P9BE-NEXT:    or r3, r5, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhw r5, r3, r4
 ; P9BE-NEXT:    add r5, r5, r3
 ; P9BE-NEXT:    srwi r6, r5, 31
@@ -335,124 +326,121 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    add r5, r5, r6
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    mulhw r4, r3, r4
-; P9BE-NEXT:    add r4, r4, r3
-; P9BE-NEXT:    srwi r5, r4, 31
-; P9BE-NEXT:    srawi r4, r4, 6
-; P9BE-NEXT:    add r4, r4, r5
+; P9BE-NEXT:    li r5, 0
+; P9BE-NEXT:    vextuhlx r5, r5, v2
+; P9BE-NEXT:    extsh r6, r5
+; P9BE-NEXT:    mulhw r4, r6, r4
+; P9BE-NEXT:    add r4, r4, r6
+; P9BE-NEXT:    srwi r6, r4, 31
+; P9BE-NEXT:    srwi r4, r4, 6
+; P9BE-NEXT:    add r4, r4, r6
 ; P9BE-NEXT:    mulli r4, r4, 95
-; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    xxmrghw v2, vs1, vs0
+; P9BE-NEXT:    sub r4, r5, r4
+; P9BE-NEXT:    slwi r4, r4, 16
+; P9BE-NEXT:    or r3, r4, r3
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vmrgow v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_srem_vec_2:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r4, -21386
+; P8LE-NEXT:    lis r5, -21386
 ; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    ori r4, r4, 37253
-; P8LE-NEXT:    clrldi r5, r3, 48
+; P8LE-NEXT:    ori r5, r5, 37253
+; P8LE-NEXT:    clrldi r4, r3, 48
+; P8LE-NEXT:    extsh r4, r4
+; P8LE-NEXT:    mulhw r6, r4, r5
+; P8LE-NEXT:    add r6, r6, r4
+; P8LE-NEXT:    srwi r7, r6, 31
+; P8LE-NEXT:    srawi r6, r6, 6
+; P8LE-NEXT:    add r6, r6, r7
+; P8LE-NEXT:    mulli r6, r6, 95
+; P8LE-NEXT:    sub r4, r4, r6
 ; P8LE-NEXT:    rldicl r6, r3, 48, 48
-; P8LE-NEXT:    rldicl r7, r3, 32, 48
+; P8LE-NEXT:    extsh r7, r6
+; P8LE-NEXT:    mulhw r8, r7, r5
+; P8LE-NEXT:    add r7, r8, r7
+; P8LE-NEXT:    srwi r8, r7, 31
+; P8LE-NEXT:    srwi r7, r7, 6
+; P8LE-NEXT:    add r7, r7, r8
+; P8LE-NEXT:    mulli r7, r7, 95
+; P8LE-NEXT:    sub r6, r6, r7
+; P8LE-NEXT:    slwi r6, r6, 16
+; P8LE-NEXT:    or r4, r6, r4
+; P8LE-NEXT:    mtfprwz f0, r4
+; P8LE-NEXT:    rldicl r4, r3, 32, 48
 ; P8LE-NEXT:    rldicl r3, r3, 16, 48
-; P8LE-NEXT:    extsh r5, r5
-; P8LE-NEXT:    extsh r6, r6
-; P8LE-NEXT:    extsh r7, r7
-; P8LE-NEXT:    extsh r3, r3
-; P8LE-NEXT:    mulhw r8, r5, r4
-; P8LE-NEXT:    mulhw r9, r6, r4
-; P8LE-NEXT:    mulhw r10, r7, r4
-; P8LE-NEXT:    mulhw r4, r3, r4
-; P8LE-NEXT:    add r8, r8, r5
-; P8LE-NEXT:    add r9, r9, r6
-; P8LE-NEXT:    add r10, r10, r7
-; P8LE-NEXT:    add r4, r4, r3
-; P8LE-NEXT:    srwi r11, r8, 31
-; P8LE-NEXT:    srawi r8, r8, 6
-; P8LE-NEXT:    add r8, r8, r11
-; P8LE-NEXT:    srwi r11, r9, 31
-; P8LE-NEXT:    srawi r9, r9, 6
-; P8LE-NEXT:    mulli r8, r8, 95
-; P8LE-NEXT:    add r9, r9, r11
-; P8LE-NEXT:    srwi r11, r10, 31
-; P8LE-NEXT:    srawi r10, r10, 6
-; P8LE-NEXT:    add r10, r10, r11
-; P8LE-NEXT:    srwi r11, r4, 31
-; P8LE-NEXT:    srawi r4, r4, 6
-; P8LE-NEXT:    add r4, r4, r11
-; P8LE-NEXT:    sub r5, r5, r8
-; P8LE-NEXT:    mulli r8, r9, 95
-; P8LE-NEXT:    mulli r4, r4, 95
-; P8LE-NEXT:    mtvsrd v2, r5
-; P8LE-NEXT:    sub r6, r6, r8
-; P8LE-NEXT:    mulli r8, r10, 95
-; P8LE-NEXT:    sub r3, r3, r4
-; P8LE-NEXT:    mtvsrd v3, r6
-; P8LE-NEXT:    sub r7, r7, r8
-; P8LE-NEXT:    mtvsrd v4, r7
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mtvsrd v3, r3
-; P8LE-NEXT:    vmrghh v3, v3, v4
-; P8LE-NEXT:    xxmrglw v2, v3, v2
+; P8LE-NEXT:    extsh r4, r4
+; P8LE-NEXT:    mulhw r6, r4, r5
+; P8LE-NEXT:    add r6, r6, r4
+; P8LE-NEXT:    srwi r7, r6, 31
+; P8LE-NEXT:    srawi r6, r6, 6
+; P8LE-NEXT:    add r6, r6, r7
+; P8LE-NEXT:    mulli r6, r6, 95
+; P8LE-NEXT:    sub r4, r4, r6
+; P8LE-NEXT:    extsh r6, r3
+; P8LE-NEXT:    mulhw r5, r6, r5
+; P8LE-NEXT:    add r5, r5, r6
+; P8LE-NEXT:    srwi r6, r5, 31
+; P8LE-NEXT:    srwi r5, r5, 6
+; P8LE-NEXT:    add r5, r5, r6
+; P8LE-NEXT:    mulli r5, r5, 95
+; P8LE-NEXT:    sub r3, r3, r5
+; P8LE-NEXT:    slwi r3, r3, 16
+; P8LE-NEXT:    or r3, r3, r4
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_srem_vec_2:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    lis r4, -21386
-; P8BE-NEXT:    ori r4, r4, 37253
-; P8BE-NEXT:    clrldi r5, r3, 48
+; P8BE-NEXT:    lis r5, -21386
+; P8BE-NEXT:    ori r5, r5, 37253
+; P8BE-NEXT:    clrldi r4, r3, 48
+; P8BE-NEXT:    extsh r4, r4
+; P8BE-NEXT:    mulhw r6, r4, r5
+; P8BE-NEXT:    add r6, r6, r4
+; P8BE-NEXT:    srwi r7, r6, 31
+; P8BE-NEXT:    srawi r6, r6, 6
+; P8BE-NEXT:    add r6, r6, r7
+; P8BE-NEXT:    mulli r6, r6, 95
+; P8BE-NEXT:    sub r4, r4, r6
 ; P8BE-NEXT:    rldicl r6, r3, 48, 48
-; P8BE-NEXT:    rldicl r7, r3, 32, 48
+; P8BE-NEXT:    extsh r7, r6
+; P8BE-NEXT:    mulhw r8, r7, r5
+; P8BE-NEXT:    add r7, r8, r7
+; P8BE-NEXT:    srwi r8, r7, 31
+; P8BE-NEXT:    srwi r7, r7, 6
+; P8BE-NEXT:    add r7, r7, r8
+; P8BE-NEXT:    mulli r7, r7, 95
+; P8BE-NEXT:    sub r6, r6, r7
+; P8BE-NEXT:    slwi r6, r6, 16
+; P8BE-NEXT:    or r4, r6, r4
+; P8BE-NEXT:    mtvsrwz v2, r4
+; P8BE-NEXT:    rldicl r4, r3, 32, 48
 ; P8BE-NEXT:    rldicl r3, r3, 16, 48
-; P8BE-NEXT:    extsh r5, r5
-; P8BE-NEXT:    extsh r6, r6
-; P8BE-NEXT:    extsh r7, r7
-; P8BE-NEXT:    extsh r3, r3
-; P8BE-NEXT:    mulhw r8, r5, r4
-; P8BE-NEXT:    mulhw r9, r6, r4
-; P8BE-NEXT:    mulhw r10, r7, r4
-; P8BE-NEXT:    mulhw r4, r3, r4
-; P8BE-NEXT:    add r8, r8, r5
-; P8BE-NEXT:    add r9, r9, r6
-; P8BE-NEXT:    add r10, r10, r7
-; P8BE-NEXT:    add r4, r4, r3
-; P8BE-NEXT:    srwi r11, r8, 31
-; P8BE-NEXT:    srawi r8, r8, 6
-; P8BE-NEXT:    add r8, r8, r11
-; P8BE-NEXT:    srwi r11, r9, 31
-; P8BE-NEXT:    srawi r9, r9, 6
-; P8BE-NEXT:    mulli r8, r8, 95
-; P8BE-NEXT:    add r9, r9, r11
-; P8BE-NEXT:    srwi r11, r10, 31
-; P8BE-NEXT:    srawi r10, r10, 6
-; P8BE-NEXT:    add r10, r10, r11
-; P8BE-NEXT:    srwi r11, r4, 31
-; P8BE-NEXT:    srawi r4, r4, 6
-; P8BE-NEXT:    add r4, r4, r11
-; P8BE-NEXT:    addis r11, r2, .LCPI1_0 at toc@ha
-; P8BE-NEXT:    sub r5, r5, r8
-; P8BE-NEXT:    mulli r8, r9, 95
-; P8BE-NEXT:    addi r11, r11, .LCPI1_0 at toc@l
-; P8BE-NEXT:    mulli r4, r4, 95
-; P8BE-NEXT:    mtvsrwz v3, r5
-; P8BE-NEXT:    lxvw4x v2, 0, r11
-; P8BE-NEXT:    sub r6, r6, r8
-; P8BE-NEXT:    mulli r8, r10, 95
-; P8BE-NEXT:    sub r3, r3, r4
-; P8BE-NEXT:    mtvsrwz v4, r6
-; P8BE-NEXT:    sub r7, r7, r8
-; P8BE-NEXT:    mtvsrwz v5, r7
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    mtvsrwz v4, r3
-; P8BE-NEXT:    vperm v2, v4, v5, v2
-; P8BE-NEXT:    xxmrghw v2, v2, v3
+; P8BE-NEXT:    extsh r4, r4
+; P8BE-NEXT:    mulhw r6, r4, r5
+; P8BE-NEXT:    add r6, r6, r4
+; P8BE-NEXT:    srwi r7, r6, 31
+; P8BE-NEXT:    srawi r6, r6, 6
+; P8BE-NEXT:    add r6, r6, r7
+; P8BE-NEXT:    mulli r6, r6, 95
+; P8BE-NEXT:    sub r4, r4, r6
+; P8BE-NEXT:    extsh r6, r3
+; P8BE-NEXT:    mulhw r5, r6, r5
+; P8BE-NEXT:    add r5, r5, r6
+; P8BE-NEXT:    srwi r6, r5, 31
+; P8BE-NEXT:    srwi r5, r5, 6
+; P8BE-NEXT:    add r5, r5, r6
+; P8BE-NEXT:    mulli r5, r5, 95
+; P8BE-NEXT:    sub r3, r3, r5
+; P8BE-NEXT:    slwi r3, r3, 16
+; P8BE-NEXT:    or r3, r3, r4
+; P8BE-NEXT:    mtvsrwz v3, r3
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   ret <4 x i16> %1
@@ -463,68 +451,68 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-LABEL: combine_srem_sdiv:
 ; P9LE:       # %bb.0:
-; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    lis r4, -21386
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    ori r4, r4, 37253
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    mulhw r5, r3, r4
-; P9LE-NEXT:    add r5, r5, r3
-; P9LE-NEXT:    srwi r6, r5, 31
-; P9LE-NEXT:    srawi r5, r5, 6
-; P9LE-NEXT:    add r5, r5, r6
-; P9LE-NEXT:    mulli r6, r5, 95
-; P9LE-NEXT:    sub r3, r3, r6
-; P9LE-NEXT:    mtvsrd v3, r3
 ; P9LE-NEXT:    li r3, 2
+; P9LE-NEXT:    lis r5, -21386
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    extsh r6, r3
-; P9LE-NEXT:    mulhw r7, r6, r4
-; P9LE-NEXT:    add r6, r7, r6
-; P9LE-NEXT:    srwi r7, r6, 31
-; P9LE-NEXT:    srawi r6, r6, 6
-; P9LE-NEXT:    add r6, r6, r7
-; P9LE-NEXT:    mulli r7, r6, 95
-; P9LE-NEXT:    sub r3, r3, r7
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    li r3, 4
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v4, v3
-; P9LE-NEXT:    extsh r7, r3
-; P9LE-NEXT:    mulhw r8, r7, r4
-; P9LE-NEXT:    add r7, r8, r7
+; P9LE-NEXT:    ori r5, r5, 37253
+; P9LE-NEXT:    extsh r4, r3
+; P9LE-NEXT:    mulhw r6, r4, r5
+; P9LE-NEXT:    add r4, r6, r4
+; P9LE-NEXT:    srwi r6, r4, 31
+; P9LE-NEXT:    srawi r4, r4, 6
+; P9LE-NEXT:    add r4, r4, r6
+; P9LE-NEXT:    mulli r6, r4, 95
+; P9LE-NEXT:    sub r3, r3, r6
+; P9LE-NEXT:    li r6, 0
+; P9LE-NEXT:    vextuhrx r6, r6, v2
+; P9LE-NEXT:    slwi r3, r3, 16
+; P9LE-NEXT:    extsh r6, r6
+; P9LE-NEXT:    mulhw r7, r6, r5
+; P9LE-NEXT:    add r7, r7, r6
 ; P9LE-NEXT:    srwi r8, r7, 31
 ; P9LE-NEXT:    srawi r7, r7, 6
 ; P9LE-NEXT:    add r7, r7, r8
 ; P9LE-NEXT:    mulli r8, r7, 95
-; P9LE-NEXT:    sub r3, r3, r8
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    sub r6, r6, r8
+; P9LE-NEXT:    or r3, r3, r6
+; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    extsh r8, r3
-; P9LE-NEXT:    mulhw r4, r8, r4
-; P9LE-NEXT:    add r4, r4, r8
-; P9LE-NEXT:    srwi r8, r4, 31
-; P9LE-NEXT:    srawi r4, r4, 6
-; P9LE-NEXT:    add r4, r4, r8
-; P9LE-NEXT:    mulli r8, r4, 95
-; P9LE-NEXT:    mtvsrd v5, r4
+; P9LE-NEXT:    extsh r6, r3
+; P9LE-NEXT:    mulhw r8, r6, r5
+; P9LE-NEXT:    add r6, r8, r6
+; P9LE-NEXT:    srwi r8, r6, 31
+; P9LE-NEXT:    srawi r6, r6, 6
+; P9LE-NEXT:    add r6, r6, r8
+; P9LE-NEXT:    mulli r8, r6, 95
 ; P9LE-NEXT:    sub r3, r3, r8
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    mtvsrd v4, r6
-; P9LE-NEXT:    xxmrglw v2, v2, v3
-; P9LE-NEXT:    mtvsrd v3, r5
-; P9LE-NEXT:    vmrghh v3, v4, v3
-; P9LE-NEXT:    mtvsrd v4, r7
-; P9LE-NEXT:    vmrghh v4, v5, v4
-; P9LE-NEXT:    xxmrglw v3, v4, v3
+; P9LE-NEXT:    li r8, 4
+; P9LE-NEXT:    vextuhrx r8, r8, v2
+; P9LE-NEXT:    slwi r3, r3, 16
+; P9LE-NEXT:    extsh r9, r8
+; P9LE-NEXT:    mulhw r5, r9, r5
+; P9LE-NEXT:    add r5, r5, r9
+; P9LE-NEXT:    srwi r9, r5, 31
+; P9LE-NEXT:    srawi r5, r5, 6
+; P9LE-NEXT:    add r5, r5, r9
+; P9LE-NEXT:    mulli r9, r5, 95
+; P9LE-NEXT:    sub r8, r8, r9
+; P9LE-NEXT:    or r3, r3, r8
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    slwi r3, r4, 16
+; P9LE-NEXT:    or r3, r3, r7
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
+; P9LE-NEXT:    mtfprwz f0, r3
+; P9LE-NEXT:    slwi r3, r6, 16
+; P9LE-NEXT:    or r3, r3, r5
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    xxmrghw v3, vs1, vs0
 ; P9LE-NEXT:    vadduhm v2, v2, v3
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: combine_srem_sdiv:
 ; P9BE:       # %bb.0:
-; P9BE-NEXT:    li r3, 6
+; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    lis r5, -21386
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r5, r5, 37253
@@ -536,113 +524,110 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r6
 ; P9BE-NEXT:    mulli r6, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r6
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r6, r3
-; P9BE-NEXT:    mulhw r7, r6, r5
-; P9BE-NEXT:    add r6, r7, r6
-; P9BE-NEXT:    srwi r7, r6, 31
-; P9BE-NEXT:    srawi r6, r6, 6
-; P9BE-NEXT:    add r6, r6, r7
-; P9BE-NEXT:    mulli r7, r6, 95
-; P9BE-NEXT:    sub r3, r3, r7
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
-; P9BE-NEXT:    li r3, 2
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r7, r3
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
+; P9BE-NEXT:    li r6, 6
+; P9BE-NEXT:    vextuhlx r6, r6, v2
+; P9BE-NEXT:    slwi r3, r3, 16
+; P9BE-NEXT:    extsh r7, r6
 ; P9BE-NEXT:    mulhw r8, r7, r5
 ; P9BE-NEXT:    add r7, r8, r7
 ; P9BE-NEXT:    srwi r8, r7, 31
 ; P9BE-NEXT:    srawi r7, r7, 6
 ; P9BE-NEXT:    add r7, r7, r8
 ; P9BE-NEXT:    mulli r8, r7, 95
-; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    mtfprwz f1, r3
+; P9BE-NEXT:    sub r6, r6, r8
+; P9BE-NEXT:    or r3, r3, r6
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    mulhw r5, r3, r5
-; P9BE-NEXT:    add r5, r5, r3
-; P9BE-NEXT:    srwi r8, r5, 31
-; P9BE-NEXT:    srawi r5, r5, 6
-; P9BE-NEXT:    add r5, r5, r8
-; P9BE-NEXT:    mulli r8, r5, 95
+; P9BE-NEXT:    extsh r6, r3
+; P9BE-NEXT:    mulhw r8, r6, r5
+; P9BE-NEXT:    add r6, r8, r6
+; P9BE-NEXT:    srwi r8, r6, 31
+; P9BE-NEXT:    srawi r6, r6, 6
+; P9BE-NEXT:    add r6, r6, r8
+; P9BE-NEXT:    mulli r8, r6, 95
 ; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    mtfprwz f3, r5
-; P9BE-NEXT:    xxmrghw v2, vs1, vs0
-; P9BE-NEXT:    mtfprwz f0, r4
-; P9BE-NEXT:    mtfprwz f1, r6
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
-; P9BE-NEXT:    mtfprwz f1, r7
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    xxmrghw v3, vs1, vs0
+; P9BE-NEXT:    li r8, 2
+; P9BE-NEXT:    vextuhlx r8, r8, v2
+; P9BE-NEXT:    slwi r3, r3, 16
+; P9BE-NEXT:    extsh r9, r8
+; P9BE-NEXT:    mulhw r5, r9, r5
+; P9BE-NEXT:    add r5, r5, r9
+; P9BE-NEXT:    srwi r9, r5, 31
+; P9BE-NEXT:    srawi r5, r5, 6
+; P9BE-NEXT:    add r5, r5, r9
+; P9BE-NEXT:    mulli r9, r5, 95
+; P9BE-NEXT:    sub r8, r8, r9
+; P9BE-NEXT:    or r3, r3, r8
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    slwi r3, r4, 16
+; P9BE-NEXT:    or r3, r3, r7
+; P9BE-NEXT:    vmrgow v2, v2, v3
+; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    slwi r3, r6, 16
+; P9BE-NEXT:    or r3, r3, r5
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    vmrgow v3, v4, v3
 ; P9BE-NEXT:    vadduhm v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: combine_srem_sdiv:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r4, -21386
-; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    ori r4, r4, 37253
-; P8LE-NEXT:    rldicl r6, r3, 48, 48
-; P8LE-NEXT:    rldicl r7, r3, 32, 48
-; P8LE-NEXT:    clrldi r5, r3, 48
-; P8LE-NEXT:    rldicl r3, r3, 16, 48
-; P8LE-NEXT:    extsh r8, r6
-; P8LE-NEXT:    extsh r9, r7
-; P8LE-NEXT:    extsh r5, r5
-; P8LE-NEXT:    extsh r10, r3
-; P8LE-NEXT:    mulhw r11, r8, r4
+; P8LE-NEXT:    lis r5, -21386
+; P8LE-NEXT:    mffprd r4, f0
+; P8LE-NEXT:    ori r5, r5, 37253
+; P8LE-NEXT:    rldicl r3, r4, 48, 48
+; P8LE-NEXT:    rldicl r6, r4, 16, 48
+; P8LE-NEXT:    clrldi r7, r4, 48
+; P8LE-NEXT:    rldicl r4, r4, 32, 48
+; P8LE-NEXT:    extsh r8, r3
+; P8LE-NEXT:    extsh r9, r6
+; P8LE-NEXT:    extsh r7, r7
+; P8LE-NEXT:    extsh r10, r4
+; P8LE-NEXT:    mulhw r11, r8, r5
 ; P8LE-NEXT:    add r8, r11, r8
-; P8LE-NEXT:    mulhw r11, r9, r4
+; P8LE-NEXT:    mulhw r11, r9, r5
 ; P8LE-NEXT:    add r9, r11, r9
-; P8LE-NEXT:    mulhw r11, r5, r4
-; P8LE-NEXT:    mulhw r4, r10, r4
-; P8LE-NEXT:    add r11, r11, r5
-; P8LE-NEXT:    add r4, r4, r10
+; P8LE-NEXT:    mulhw r11, r7, r5
+; P8LE-NEXT:    mulhw r5, r10, r5
+; P8LE-NEXT:    add r11, r11, r7
+; P8LE-NEXT:    add r5, r5, r10
+; P8LE-NEXT:    srwi r10, r8, 31
+; P8LE-NEXT:    srawi r8, r8, 6
+; P8LE-NEXT:    add r8, r8, r10
 ; P8LE-NEXT:    srwi r10, r11, 31
 ; P8LE-NEXT:    srawi r11, r11, 6
 ; P8LE-NEXT:    add r10, r11, r10
-; P8LE-NEXT:    srwi r11, r8, 31
-; P8LE-NEXT:    srawi r8, r8, 6
-; P8LE-NEXT:    add r8, r8, r11
 ; P8LE-NEXT:    srwi r11, r9, 31
 ; P8LE-NEXT:    srawi r9, r9, 6
-; P8LE-NEXT:    mtvsrd v2, r10
 ; P8LE-NEXT:    add r9, r9, r11
-; P8LE-NEXT:    srwi r11, r4, 31
-; P8LE-NEXT:    srawi r4, r4, 6
-; P8LE-NEXT:    mtvsrd v3, r8
-; P8LE-NEXT:    add r4, r4, r11
-; P8LE-NEXT:    mulli r11, r10, 95
-; P8LE-NEXT:    sub r5, r5, r11
-; P8LE-NEXT:    mulli r11, r8, 95
-; P8LE-NEXT:    mtvsrd v4, r5
-; P8LE-NEXT:    sub r6, r6, r11
-; P8LE-NEXT:    mulli r11, r9, 95
-; P8LE-NEXT:    mtvsrd v5, r6
-; P8LE-NEXT:    sub r7, r7, r11
-; P8LE-NEXT:    mulli r11, r4, 95
-; P8LE-NEXT:    mtvsrd v0, r7
-; P8LE-NEXT:    sub r3, r3, r11
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mtvsrd v3, r9
-; P8LE-NEXT:    vmrghh v4, v5, v4
-; P8LE-NEXT:    mtvsrd v5, r3
-; P8LE-NEXT:    vmrghh v5, v5, v0
-; P8LE-NEXT:    mtvsrd v0, r4
-; P8LE-NEXT:    xxmrglw v4, v5, v4
-; P8LE-NEXT:    vmrghh v3, v0, v3
-; P8LE-NEXT:    xxmrglw v2, v3, v2
-; P8LE-NEXT:    vadduhm v2, v4, v2
+; P8LE-NEXT:    srwi r11, r5, 31
+; P8LE-NEXT:    srawi r5, r5, 6
+; P8LE-NEXT:    add r5, r5, r11
+; P8LE-NEXT:    slwi r11, r8, 16
+; P8LE-NEXT:    mulli r8, r8, 95
+; P8LE-NEXT:    or r11, r11, r10
+; P8LE-NEXT:    mtfprwz f0, r11
+; P8LE-NEXT:    sub r3, r3, r8
+; P8LE-NEXT:    mulli r8, r10, 95
+; P8LE-NEXT:    slwi r3, r3, 16
+; P8LE-NEXT:    sub r7, r7, r8
+; P8LE-NEXT:    mulli r8, r9, 95
+; P8LE-NEXT:    or r3, r3, r7
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    slwi r3, r9, 16
+; P8LE-NEXT:    sub r6, r6, r8
+; P8LE-NEXT:    mulli r8, r5, 95
+; P8LE-NEXT:    or r3, r3, r5
+; P8LE-NEXT:    slwi r6, r6, 16
+; P8LE-NEXT:    sub r4, r4, r8
+; P8LE-NEXT:    or r4, r6, r4
+; P8LE-NEXT:    mtfprwz f2, r4
+; P8LE-NEXT:    xxmrghw v2, vs2, vs1
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v3, vs1, vs0
+; P8LE-NEXT:    vadduhm v2, v2, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: combine_srem_sdiv:
@@ -650,60 +635,57 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P8BE-NEXT:    mfvsrd r4, v2
 ; P8BE-NEXT:    lis r5, -21386
 ; P8BE-NEXT:    ori r5, r5, 37253
-; P8BE-NEXT:    clrldi r3, r4, 48
-; P8BE-NEXT:    rldicl r6, r4, 48, 48
-; P8BE-NEXT:    rldicl r7, r4, 32, 48
-; P8BE-NEXT:    rldicl r4, r4, 16, 48
+; P8BE-NEXT:    rldicl r3, r4, 48, 48
+; P8BE-NEXT:    clrldi r7, r4, 48
+; P8BE-NEXT:    rldicl r6, r4, 16, 48
+; P8BE-NEXT:    rldicl r4, r4, 32, 48
 ; P8BE-NEXT:    extsh r8, r3
-; P8BE-NEXT:    extsh r9, r6
 ; P8BE-NEXT:    extsh r10, r7
-; P8BE-NEXT:    extsh r4, r4
+; P8BE-NEXT:    extsh r9, r6
 ; P8BE-NEXT:    mulhw r11, r8, r5
 ; P8BE-NEXT:    add r8, r11, r8
-; P8BE-NEXT:    mulhw r11, r9, r5
-; P8BE-NEXT:    add r9, r11, r9
 ; P8BE-NEXT:    mulhw r11, r10, r5
-; P8BE-NEXT:    mulhw r5, r4, r5
 ; P8BE-NEXT:    add r10, r11, r10
+; P8BE-NEXT:    mulhw r11, r9, r5
+; P8BE-NEXT:    add r9, r11, r9
+; P8BE-NEXT:    extsh r11, r4
+; P8BE-NEXT:    mulhw r5, r11, r5
+; P8BE-NEXT:    add r5, r5, r11
 ; P8BE-NEXT:    srwi r11, r8, 31
 ; P8BE-NEXT:    srawi r8, r8, 6
-; P8BE-NEXT:    add r5, r5, r4
 ; P8BE-NEXT:    add r8, r8, r11
-; P8BE-NEXT:    srwi r11, r9, 31
-; P8BE-NEXT:    srawi r9, r9, 6
-; P8BE-NEXT:    add r9, r9, r11
 ; P8BE-NEXT:    srwi r11, r10, 31
 ; P8BE-NEXT:    srawi r10, r10, 6
-; P8BE-NEXT:    mtvsrwz v3, r8
 ; P8BE-NEXT:    add r10, r10, r11
+; P8BE-NEXT:    srwi r11, r9, 31
+; P8BE-NEXT:    srawi r9, r9, 6
+; P8BE-NEXT:    add r9, r9, r11
 ; P8BE-NEXT:    srwi r11, r5, 31
 ; P8BE-NEXT:    srawi r5, r5, 6
-; P8BE-NEXT:    mtvsrwz v4, r9
 ; P8BE-NEXT:    add r5, r5, r11
-; P8BE-NEXT:    mulli r11, r8, 95
-; P8BE-NEXT:    sub r3, r3, r11
-; P8BE-NEXT:    mulli r11, r9, 95
-; P8BE-NEXT:    mtvsrwz v5, r3
-; P8BE-NEXT:    sub r6, r6, r11
-; P8BE-NEXT:    mulli r11, r10, 95
-; P8BE-NEXT:    mtvsrwz v0, r6
-; P8BE-NEXT:    sub r7, r7, r11
-; P8BE-NEXT:    mulli r11, r5, 95
-; P8BE-NEXT:    mtvsrwz v1, r7
-; P8BE-NEXT:    sub r4, r4, r11
-; P8BE-NEXT:    addis r11, r2, .LCPI2_0 at toc@ha
-; P8BE-NEXT:    addi r11, r11, .LCPI2_0 at toc@l
-; P8BE-NEXT:    lxvw4x v2, 0, r11
-; P8BE-NEXT:    vperm v5, v0, v5, v2
-; P8BE-NEXT:    mtvsrwz v0, r4
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    mtvsrwz v4, r10
-; P8BE-NEXT:    vperm v0, v0, v1, v2
-; P8BE-NEXT:    mtvsrwz v1, r5
-; P8BE-NEXT:    vperm v2, v1, v4, v2
-; P8BE-NEXT:    xxmrghw v4, v0, v5
-; P8BE-NEXT:    xxmrghw v2, v2, v3
-; P8BE-NEXT:    vadduhm v2, v4, v2
+; P8BE-NEXT:    slwi r11, r8, 16
+; P8BE-NEXT:    mulli r8, r8, 95
+; P8BE-NEXT:    or r11, r11, r10
+; P8BE-NEXT:    mtvsrwz v2, r11
+; P8BE-NEXT:    sub r3, r3, r8
+; P8BE-NEXT:    mulli r8, r10, 95
+; P8BE-NEXT:    slwi r3, r3, 16
+; P8BE-NEXT:    sub r7, r7, r8
+; P8BE-NEXT:    mulli r8, r9, 95
+; P8BE-NEXT:    or r3, r3, r7
+; P8BE-NEXT:    mtvsrwz v3, r3
+; P8BE-NEXT:    slwi r3, r9, 16
+; P8BE-NEXT:    sub r6, r6, r8
+; P8BE-NEXT:    mulli r8, r5, 95
+; P8BE-NEXT:    or r3, r3, r5
+; P8BE-NEXT:    slwi r6, r6, 16
+; P8BE-NEXT:    sub r4, r4, r8
+; P8BE-NEXT:    or r4, r6, r4
+; P8BE-NEXT:    mtvsrwz v4, r4
+; P8BE-NEXT:    vmrgow v3, v4, v3
+; P8BE-NEXT:    mtvsrwz v4, r3
+; P8BE-NEXT:    vmrgow v2, v4, v2
+; P8BE-NEXT:    vadduhm v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -722,39 +704,39 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    addze r4, r4
 ; P9LE-NEXT:    slwi r4, r4, 6
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v3, r3
-; P9LE-NEXT:    li r3, 2
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    srawi r4, r3, 5
-; P9LE-NEXT:    addze r4, r4
-; P9LE-NEXT:    slwi r4, r4, 5
-; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    lis r4, -21386
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    li r4, 2
+; P9LE-NEXT:    vextuhrx r4, r4, v2
+; P9LE-NEXT:    extsh r5, r4
+; P9LE-NEXT:    srawi r5, r5, 5
+; P9LE-NEXT:    addze r5, r5
+; P9LE-NEXT:    slwi r5, r5, 5
+; P9LE-NEXT:    sub r4, r4, r5
+; P9LE-NEXT:    lis r5, -21386
+; P9LE-NEXT:    slwi r4, r4, 16
+; P9LE-NEXT:    ori r5, r5, 37253
+; P9LE-NEXT:    or r3, r4, r3
+; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 6
-; P9LE-NEXT:    ori r4, r4, 37253
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v4, v3
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    mulhw r4, r3, r4
-; P9LE-NEXT:    add r4, r4, r3
+; P9LE-NEXT:    extsh r4, r3
+; P9LE-NEXT:    mulhw r5, r4, r5
+; P9LE-NEXT:    add r4, r5, r4
 ; P9LE-NEXT:    srwi r5, r4, 31
-; P9LE-NEXT:    srawi r4, r4, 6
+; P9LE-NEXT:    srwi r4, r4, 6
 ; P9LE-NEXT:    add r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    li r3, 4
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    srawi r4, r3, 3
-; P9LE-NEXT:    addze r4, r4
-; P9LE-NEXT:    slwi r4, r4, 3
-; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    vmrghh v2, v4, v2
-; P9LE-NEXT:    xxmrglw v2, v2, v3
+; P9LE-NEXT:    li r4, 4
+; P9LE-NEXT:    vextuhrx r4, r4, v2
+; P9LE-NEXT:    slwi r3, r3, 16
+; P9LE-NEXT:    extsh r4, r4
+; P9LE-NEXT:    srawi r5, r4, 3
+; P9LE-NEXT:    addze r5, r5
+; P9LE-NEXT:    slwi r5, r5, 3
+; P9LE-NEXT:    sub r4, r4, r5
+; P9LE-NEXT:    or r3, r3, r4
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: dont_fold_srem_power_of_two:
@@ -766,24 +748,21 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    addze r4, r4
 ; P9BE-NEXT:    slwi r4, r4, 5
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    srawi r4, r3, 6
-; P9BE-NEXT:    addze r4, r4
-; P9BE-NEXT:    slwi r4, r4, 6
-; P9BE-NEXT:    sub r3, r3, r4
+; P9BE-NEXT:    li r4, 0
+; P9BE-NEXT:    vextuhlx r4, r4, v2
+; P9BE-NEXT:    extsh r5, r4
+; P9BE-NEXT:    srawi r5, r5, 6
+; P9BE-NEXT:    addze r5, r5
+; P9BE-NEXT:    slwi r5, r5, 6
+; P9BE-NEXT:    sub r4, r4, r5
+; P9BE-NEXT:    slwi r4, r4, 16
+; P9BE-NEXT:    or r3, r4, r3
 ; P9BE-NEXT:    lis r4, -21386
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; P9BE-NEXT:    ori r4, r4, 37253
-; P9BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 6
+; P9BE-NEXT:    ori r4, r4, 37253
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    add r4, r4, r3
 ; P9BE-NEXT:    srwi r5, r4, 31
@@ -791,17 +770,17 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    add r4, r4, r5
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    srawi r4, r3, 3
-; P9BE-NEXT:    addze r4, r4
-; P9BE-NEXT:    slwi r4, r4, 3
-; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    xxmrghw v2, vs0, vs1
+; P9BE-NEXT:    li r4, 4
+; P9BE-NEXT:    vextuhlx r4, r4, v2
+; P9BE-NEXT:    extsh r5, r4
+; P9BE-NEXT:    srawi r5, r5, 3
+; P9BE-NEXT:    addze r5, r5
+; P9BE-NEXT:    slwi r5, r5, 3
+; P9BE-NEXT:    sub r4, r4, r5
+; P9BE-NEXT:    slwi r4, r4, 16
+; P9BE-NEXT:    or r3, r4, r3
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vmrgow v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_srem_power_of_two:
@@ -814,36 +793,36 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P8LE-NEXT:    addze r5, r5
 ; P8LE-NEXT:    slwi r5, r5, 6
 ; P8LE-NEXT:    sub r4, r4, r5
-; P8LE-NEXT:    mtvsrd v2, r4
-; P8LE-NEXT:    rldicl r4, r3, 48, 48
-; P8LE-NEXT:    extsh r4, r4
-; P8LE-NEXT:    srawi r5, r4, 5
-; P8LE-NEXT:    addze r5, r5
-; P8LE-NEXT:    slwi r5, r5, 5
-; P8LE-NEXT:    sub r4, r4, r5
-; P8LE-NEXT:    lis r5, -21386
-; P8LE-NEXT:    mtvsrd v3, r4
+; P8LE-NEXT:    rldicl r5, r3, 48, 48
+; P8LE-NEXT:    extsh r6, r5
+; P8LE-NEXT:    srawi r6, r6, 5
+; P8LE-NEXT:    addze r6, r6
+; P8LE-NEXT:    slwi r6, r6, 5
+; P8LE-NEXT:    sub r5, r5, r6
+; P8LE-NEXT:    lis r6, -21386
+; P8LE-NEXT:    slwi r5, r5, 16
+; P8LE-NEXT:    ori r6, r6, 37253
+; P8LE-NEXT:    or r4, r5, r4
+; P8LE-NEXT:    mtfprwz f0, r4
 ; P8LE-NEXT:    rldicl r4, r3, 16, 48
-; P8LE-NEXT:    ori r5, r5, 37253
 ; P8LE-NEXT:    rldicl r3, r3, 32, 48
-; P8LE-NEXT:    extsh r4, r4
+; P8LE-NEXT:    extsh r5, r4
 ; P8LE-NEXT:    extsh r3, r3
-; P8LE-NEXT:    mulhw r5, r4, r5
-; P8LE-NEXT:    add r5, r5, r4
+; P8LE-NEXT:    mulhw r6, r5, r6
+; P8LE-NEXT:    add r5, r6, r5
 ; P8LE-NEXT:    srwi r6, r5, 31
-; P8LE-NEXT:    srawi r5, r5, 6
+; P8LE-NEXT:    srwi r5, r5, 6
 ; P8LE-NEXT:    add r5, r5, r6
 ; P8LE-NEXT:    mulli r5, r5, 95
 ; P8LE-NEXT:    sub r4, r4, r5
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mtvsrd v3, r4
-; P8LE-NEXT:    srawi r4, r3, 3
-; P8LE-NEXT:    addze r4, r4
-; P8LE-NEXT:    slwi r4, r4, 3
-; P8LE-NEXT:    sub r3, r3, r4
-; P8LE-NEXT:    mtvsrd v4, r3
-; P8LE-NEXT:    vmrghh v3, v3, v4
-; P8LE-NEXT:    xxmrglw v2, v3, v2
+; P8LE-NEXT:    srawi r5, r3, 3
+; P8LE-NEXT:    addze r5, r5
+; P8LE-NEXT:    slwi r4, r4, 16
+; P8LE-NEXT:    slwi r5, r5, 3
+; P8LE-NEXT:    sub r3, r3, r5
+; P8LE-NEXT:    or r3, r4, r3
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_srem_power_of_two:
@@ -855,23 +834,20 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P8BE-NEXT:    addze r5, r5
 ; P8BE-NEXT:    slwi r5, r5, 5
 ; P8BE-NEXT:    sub r4, r4, r5
-; P8BE-NEXT:    mtvsrwz v2, r4
-; P8BE-NEXT:    rldicl r4, r3, 16, 48
-; P8BE-NEXT:    extsh r4, r4
-; P8BE-NEXT:    srawi r5, r4, 6
-; P8BE-NEXT:    addze r5, r5
-; P8BE-NEXT:    slwi r5, r5, 6
-; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    rldicl r5, r3, 16, 48
+; P8BE-NEXT:    extsh r6, r5
+; P8BE-NEXT:    srawi r6, r6, 6
+; P8BE-NEXT:    addze r6, r6
+; P8BE-NEXT:    slwi r6, r6, 6
+; P8BE-NEXT:    sub r5, r5, r6
+; P8BE-NEXT:    slwi r5, r5, 16
+; P8BE-NEXT:    or r4, r5, r4
 ; P8BE-NEXT:    lis r5, -21386
-; P8BE-NEXT:    mtvsrwz v3, r4
-; P8BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
-; P8BE-NEXT:    ori r5, r5, 37253
-; P8BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; P8BE-NEXT:    lxvw4x v4, 0, r4
+; P8BE-NEXT:    mtvsrwz v2, r4
 ; P8BE-NEXT:    clrldi r4, r3, 48
+; P8BE-NEXT:    ori r5, r5, 37253
 ; P8BE-NEXT:    rldicl r3, r3, 48, 48
 ; P8BE-NEXT:    extsh r4, r4
-; P8BE-NEXT:    extsh r3, r3
 ; P8BE-NEXT:    mulhw r5, r4, r5
 ; P8BE-NEXT:    add r5, r5, r4
 ; P8BE-NEXT:    srwi r6, r5, 31
@@ -879,15 +855,15 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P8BE-NEXT:    add r5, r5, r6
 ; P8BE-NEXT:    mulli r5, r5, 95
 ; P8BE-NEXT:    sub r4, r4, r5
-; P8BE-NEXT:    vperm v2, v3, v2, v4
-; P8BE-NEXT:    mtvsrwz v3, r4
-; P8BE-NEXT:    srawi r4, r3, 3
-; P8BE-NEXT:    addze r4, r4
-; P8BE-NEXT:    slwi r4, r4, 3
-; P8BE-NEXT:    sub r3, r3, r4
-; P8BE-NEXT:    mtvsrwz v5, r3
-; P8BE-NEXT:    vperm v3, v5, v3, v4
-; P8BE-NEXT:    xxmrghw v2, v2, v3
+; P8BE-NEXT:    extsh r5, r3
+; P8BE-NEXT:    srawi r5, r5, 3
+; P8BE-NEXT:    addze r5, r5
+; P8BE-NEXT:    slwi r5, r5, 3
+; P8BE-NEXT:    sub r3, r3, r5
+; P8BE-NEXT:    slwi r3, r3, 16
+; P8BE-NEXT:    or r3, r3, r4
+; P8BE-NEXT:    mtvsrwz v3, r3
+; P8BE-NEXT:    vmrgow v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
   ret <4 x i16> %1
@@ -898,25 +874,25 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9LE-LABEL: dont_fold_srem_one:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 2
-; P9LE-NEXT:    lis r4, -14230
+; P9LE-NEXT:    lis r5, -14230
+; P9LE-NEXT:    lis r6, 24749
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    ori r4, r4, 30865
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    mulhw r4, r3, r4
-; P9LE-NEXT:    add r4, r4, r3
+; P9LE-NEXT:    ori r5, r5, 30865
+; P9LE-NEXT:    ori r6, r6, 47143
+; P9LE-NEXT:    extsh r4, r3
+; P9LE-NEXT:    mulhw r5, r4, r5
+; P9LE-NEXT:    add r4, r5, r4
 ; P9LE-NEXT:    srwi r5, r4, 31
-; P9LE-NEXT:    srawi r4, r4, 9
+; P9LE-NEXT:    srwi r4, r4, 9
 ; P9LE-NEXT:    add r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 654
 ; P9LE-NEXT:    sub r3, r3, r4
 ; P9LE-NEXT:    lis r4, -19946
-; P9LE-NEXT:    mtvsrd v3, r3
-; P9LE-NEXT:    li r3, 0
+; P9LE-NEXT:    slwi r3, r3, 16
 ; P9LE-NEXT:    ori r4, r4, 17097
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v3, v4
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    add r4, r4, r3
@@ -925,165 +901,149 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    add r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 23
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    lis r4, 24749
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    li r3, 6
-; P9LE-NEXT:    ori r4, r4, 47143
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    mulhw r4, r3, r4
-; P9LE-NEXT:    srwi r5, r4, 31
-; P9LE-NEXT:    srawi r4, r4, 11
-; P9LE-NEXT:    add r4, r4, r5
-; P9LE-NEXT:    mulli r4, r4, 5423
-; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    xxmrglw v2, v2, v3
+; P9LE-NEXT:    li r4, 6
+; P9LE-NEXT:    vextuhrx r4, r4, v2
+; P9LE-NEXT:    extsh r5, r4
+; P9LE-NEXT:    mulhw r5, r5, r6
+; P9LE-NEXT:    srwi r6, r5, 31
+; P9LE-NEXT:    srwi r5, r5, 11
+; P9LE-NEXT:    add r5, r5, r6
+; P9LE-NEXT:    mulli r5, r5, 5423
+; P9LE-NEXT:    sub r4, r4, r5
+; P9LE-NEXT:    slwi r4, r4, 16
+; P9LE-NEXT:    or r3, r4, r3
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: dont_fold_srem_one:
 ; P9BE:       # %bb.0:
-; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    lis r4, -19946
+; P9BE-NEXT:    li r3, 2
+; P9BE-NEXT:    lis r4, -14230
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    ori r4, r4, 17097
+; P9BE-NEXT:    ori r4, r4, 30865
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    add r4, r4, r3
 ; P9BE-NEXT:    srwi r5, r4, 31
-; P9BE-NEXT:    srawi r4, r4, 4
-; P9BE-NEXT:    add r4, r4, r5
-; P9BE-NEXT:    mulli r4, r4, 23
-; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    lis r4, 24749
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 6
-; P9BE-NEXT:    ori r4, r4, 47143
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    mulhw r4, r3, r4
-; P9BE-NEXT:    srwi r5, r4, 31
-; P9BE-NEXT:    srawi r4, r4, 11
+; P9BE-NEXT:    srawi r4, r4, 9
 ; P9BE-NEXT:    add r4, r4, r5
-; P9BE-NEXT:    mulli r4, r4, 5423
+; P9BE-NEXT:    lis r5, -19946
+; P9BE-NEXT:    mulli r4, r4, 654
+; P9BE-NEXT:    ori r5, r5, 17097
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    lis r4, -14230
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; P9BE-NEXT:    ori r4, r4, 30865
-; P9BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
-; P9BE-NEXT:    li r3, 2
+; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    xxperm vs1, vs0, vs2
-; P9BE-NEXT:    mulhw r4, r3, r4
-; P9BE-NEXT:    add r4, r4, r3
+; P9BE-NEXT:    extsh r4, r3
+; P9BE-NEXT:    mulhw r5, r4, r5
+; P9BE-NEXT:    add r4, r5, r4
 ; P9BE-NEXT:    srwi r5, r4, 31
-; P9BE-NEXT:    srawi r4, r4, 9
+; P9BE-NEXT:    srwi r4, r4, 4
 ; P9BE-NEXT:    add r4, r4, r5
-; P9BE-NEXT:    mulli r4, r4, 654
+; P9BE-NEXT:    lis r5, 24749
+; P9BE-NEXT:    mulli r4, r4, 23
+; P9BE-NEXT:    ori r5, r5, 47143
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs0, vs3, vs2
-; P9BE-NEXT:    xxmrghw v2, vs0, vs1
+; P9BE-NEXT:    li r4, 6
+; P9BE-NEXT:    vextuhlx r4, r4, v2
+; P9BE-NEXT:    slwi r3, r3, 16
+; P9BE-NEXT:    extsh r4, r4
+; P9BE-NEXT:    mulhw r5, r4, r5
+; P9BE-NEXT:    srwi r6, r5, 31
+; P9BE-NEXT:    srawi r5, r5, 11
+; P9BE-NEXT:    add r5, r5, r6
+; P9BE-NEXT:    mulli r5, r5, 5423
+; P9BE-NEXT:    sub r4, r4, r5
+; P9BE-NEXT:    or r3, r3, r4
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vmrgow v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_srem_one:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r8, 24749
-; P8LE-NEXT:    lis r4, -19946
-; P8LE-NEXT:    lis r5, -14230
+; P8LE-NEXT:    lis r5, -19946
+; P8LE-NEXT:    lis r7, 24749
 ; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    ori r8, r8, 47143
-; P8LE-NEXT:    ori r4, r4, 17097
+; P8LE-NEXT:    ori r5, r5, 17097
+; P8LE-NEXT:    ori r7, r7, 47143
+; P8LE-NEXT:    rldicl r4, r3, 32, 48
+; P8LE-NEXT:    extsh r4, r4
+; P8LE-NEXT:    mulhw r5, r4, r5
+; P8LE-NEXT:    add r5, r5, r4
+; P8LE-NEXT:    srwi r6, r5, 31
+; P8LE-NEXT:    srawi r5, r5, 4
+; P8LE-NEXT:    add r5, r5, r6
+; P8LE-NEXT:    mulli r5, r5, 23
+; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    rldicl r5, r3, 16, 48
+; P8LE-NEXT:    rldicl r3, r3, 48, 48
+; P8LE-NEXT:    extsh r6, r5
+; P8LE-NEXT:    mulhw r6, r6, r7
+; P8LE-NEXT:    srwi r7, r6, 31
+; P8LE-NEXT:    srwi r6, r6, 11
+; P8LE-NEXT:    add r6, r6, r7
+; P8LE-NEXT:    mulli r6, r6, 5423
+; P8LE-NEXT:    sub r5, r5, r6
+; P8LE-NEXT:    slwi r5, r5, 16
+; P8LE-NEXT:    or r4, r5, r4
+; P8LE-NEXT:    lis r5, -14230
+; P8LE-NEXT:    mtfprwz f0, r4
+; P8LE-NEXT:    extsh r4, r3
 ; P8LE-NEXT:    ori r5, r5, 30865
-; P8LE-NEXT:    rldicl r6, r3, 32, 48
-; P8LE-NEXT:    rldicl r7, r3, 48, 48
-; P8LE-NEXT:    rldicl r3, r3, 16, 48
-; P8LE-NEXT:    extsh r3, r3
-; P8LE-NEXT:    extsh r6, r6
-; P8LE-NEXT:    extsh r7, r7
-; P8LE-NEXT:    mulhw r8, r3, r8
-; P8LE-NEXT:    mulhw r4, r6, r4
-; P8LE-NEXT:    mulhw r5, r7, r5
-; P8LE-NEXT:    srwi r9, r8, 31
-; P8LE-NEXT:    srawi r8, r8, 11
-; P8LE-NEXT:    add r4, r4, r6
-; P8LE-NEXT:    add r5, r5, r7
-; P8LE-NEXT:    add r8, r8, r9
-; P8LE-NEXT:    srwi r9, r4, 31
-; P8LE-NEXT:    srawi r4, r4, 4
-; P8LE-NEXT:    mulli r8, r8, 5423
-; P8LE-NEXT:    add r4, r4, r9
-; P8LE-NEXT:    srwi r9, r5, 31
-; P8LE-NEXT:    srawi r5, r5, 9
-; P8LE-NEXT:    add r5, r5, r9
-; P8LE-NEXT:    sub r3, r3, r8
-; P8LE-NEXT:    mtvsrd v2, r3
-; P8LE-NEXT:    mulli r3, r4, 23
-; P8LE-NEXT:    mulli r4, r5, 654
-; P8LE-NEXT:    sub r3, r6, r3
-; P8LE-NEXT:    sub r4, r7, r4
-; P8LE-NEXT:    mtvsrd v3, r3
-; P8LE-NEXT:    li r3, 0
-; P8LE-NEXT:    mtvsrd v4, r3
-; P8LE-NEXT:    vmrghh v2, v2, v3
-; P8LE-NEXT:    mtvsrd v3, r4
-; P8LE-NEXT:    vmrghh v3, v3, v4
-; P8LE-NEXT:    xxmrglw v2, v2, v3
+; P8LE-NEXT:    mulhw r5, r4, r5
+; P8LE-NEXT:    add r4, r5, r4
+; P8LE-NEXT:    srwi r5, r4, 31
+; P8LE-NEXT:    srwi r4, r4, 9
+; P8LE-NEXT:    add r4, r4, r5
+; P8LE-NEXT:    mulli r4, r4, 654
+; P8LE-NEXT:    sub r3, r3, r4
+; P8LE-NEXT:    slwi r3, r3, 16
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v2, vs0, vs1
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_srem_one:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    lis r4, -19946
-; P8BE-NEXT:    lis r8, 24749
-; P8BE-NEXT:    lis r5, -14230
-; P8BE-NEXT:    ori r4, r4, 17097
-; P8BE-NEXT:    ori r8, r8, 47143
-; P8BE-NEXT:    ori r5, r5, 30865
-; P8BE-NEXT:    rldicl r6, r3, 48, 48
-; P8BE-NEXT:    rldicl r7, r3, 32, 48
-; P8BE-NEXT:    clrldi r3, r3, 48
-; P8BE-NEXT:    extsh r6, r6
+; P8BE-NEXT:    lis r5, 24749
+; P8BE-NEXT:    lis r7, -19946
+; P8BE-NEXT:    ori r5, r5, 47143
+; P8BE-NEXT:    ori r7, r7, 17097
+; P8BE-NEXT:    clrldi r4, r3, 48
+; P8BE-NEXT:    extsh r4, r4
+; P8BE-NEXT:    mulhw r5, r4, r5
+; P8BE-NEXT:    srwi r6, r5, 31
+; P8BE-NEXT:    srawi r5, r5, 11
+; P8BE-NEXT:    add r5, r5, r6
+; P8BE-NEXT:    mulli r5, r5, 5423
+; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    rldicl r5, r3, 48, 48
+; P8BE-NEXT:    rldicl r3, r3, 32, 48
+; P8BE-NEXT:    extsh r6, r5
 ; P8BE-NEXT:    extsh r3, r3
-; P8BE-NEXT:    extsh r7, r7
-; P8BE-NEXT:    mulhw r4, r6, r4
-; P8BE-NEXT:    mulhw r8, r3, r8
-; P8BE-NEXT:    mulhw r5, r7, r5
-; P8BE-NEXT:    add r4, r4, r6
-; P8BE-NEXT:    srwi r9, r8, 31
-; P8BE-NEXT:    srawi r8, r8, 11
-; P8BE-NEXT:    add r5, r5, r7
-; P8BE-NEXT:    add r8, r8, r9
-; P8BE-NEXT:    srwi r9, r4, 31
-; P8BE-NEXT:    srawi r4, r4, 4
-; P8BE-NEXT:    add r4, r4, r9
-; P8BE-NEXT:    mulli r8, r8, 5423
-; P8BE-NEXT:    srwi r9, r5, 31
-; P8BE-NEXT:    srawi r5, r5, 9
-; P8BE-NEXT:    mulli r4, r4, 23
-; P8BE-NEXT:    add r5, r5, r9
-; P8BE-NEXT:    addis r9, r2, .LCPI4_0 at toc@ha
-; P8BE-NEXT:    addi r9, r9, .LCPI4_0 at toc@l
-; P8BE-NEXT:    mulli r5, r5, 654
-; P8BE-NEXT:    sub r3, r3, r8
-; P8BE-NEXT:    lxvw4x v2, 0, r9
-; P8BE-NEXT:    sub r4, r6, r4
+; P8BE-NEXT:    mulhw r7, r6, r7
+; P8BE-NEXT:    add r6, r7, r6
+; P8BE-NEXT:    srwi r7, r6, 31
+; P8BE-NEXT:    srwi r6, r6, 4
+; P8BE-NEXT:    add r6, r6, r7
+; P8BE-NEXT:    mulli r6, r6, 23
+; P8BE-NEXT:    sub r5, r5, r6
+; P8BE-NEXT:    slwi r5, r5, 16
+; P8BE-NEXT:    or r4, r5, r4
+; P8BE-NEXT:    mtvsrwz v2, r4
+; P8BE-NEXT:    lis r4, -14230
+; P8BE-NEXT:    ori r4, r4, 30865
+; P8BE-NEXT:    mulhw r4, r3, r4
+; P8BE-NEXT:    add r4, r4, r3
+; P8BE-NEXT:    srwi r5, r4, 31
+; P8BE-NEXT:    srawi r4, r4, 9
+; P8BE-NEXT:    add r4, r4, r5
+; P8BE-NEXT:    mulli r4, r4, 654
+; P8BE-NEXT:    sub r3, r3, r4
 ; P8BE-NEXT:    mtvsrwz v3, r3
-; P8BE-NEXT:    mtvsrwz v4, r4
-; P8BE-NEXT:    sub r3, r7, r5
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    mtvsrwz v4, r3
-; P8BE-NEXT:    li r3, 0
-; P8BE-NEXT:    mtvsrwz v5, r3
-; P8BE-NEXT:    vperm v2, v5, v4, v2
-; P8BE-NEXT:    xxmrghw v2, v2, v3
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
   ret <4 x i16> %1
@@ -1095,8 +1055,10 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    lis r4, -19946
+; P9LE-NEXT:    lis r6, 24749
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 17097
+; P9LE-NEXT:    ori r6, r6, 47143
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    add r4, r4, r3
@@ -1105,161 +1067,143 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9LE-NEXT:    add r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 23
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    lis r4, 24749
-; P9LE-NEXT:    mtvsrd v3, r3
-; P9LE-NEXT:    li r3, 6
-; P9LE-NEXT:    ori r4, r4, 47143
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    mulhw r4, r3, r4
-; P9LE-NEXT:    srwi r5, r4, 31
-; P9LE-NEXT:    srawi r4, r4, 11
-; P9LE-NEXT:    add r4, r4, r5
-; P9LE-NEXT:    mulli r4, r4, 5423
-; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    li r4, 6
+; P9LE-NEXT:    vextuhrx r4, r4, v2
+; P9LE-NEXT:    extsh r5, r4
+; P9LE-NEXT:    mulhw r5, r5, r6
+; P9LE-NEXT:    srwi r6, r5, 31
+; P9LE-NEXT:    srwi r5, r5, 11
+; P9LE-NEXT:    add r5, r5, r6
+; P9LE-NEXT:    mulli r5, r5, 5423
+; P9LE-NEXT:    sub r4, r4, r5
+; P9LE-NEXT:    slwi r4, r4, 16
+; P9LE-NEXT:    or r3, r4, r3
+; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v4, v3
-; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    srawi r4, r3, 15
+; P9LE-NEXT:    extsh r4, r3
+; P9LE-NEXT:    srawi r4, r4, 15
 ; P9LE-NEXT:    addze r4, r4
 ; P9LE-NEXT:    slwi r4, r4, 15
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    xxmrglw v2, v3, v2
+; P9LE-NEXT:    slwi r3, r3, 16
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    xxmrghw v2, vs0, vs1
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: dont_fold_urem_i16_smax:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    lis r4, -19946
+; P9BE-NEXT:    lis r5, -19946
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    ori r4, r4, 17097
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    mulhw r4, r3, r4
-; P9BE-NEXT:    add r4, r4, r3
+; P9BE-NEXT:    ori r5, r5, 17097
+; P9BE-NEXT:    extsh r4, r3
+; P9BE-NEXT:    mulhw r5, r4, r5
+; P9BE-NEXT:    add r4, r5, r4
 ; P9BE-NEXT:    srwi r5, r4, 31
-; P9BE-NEXT:    srawi r4, r4, 4
+; P9BE-NEXT:    srwi r4, r4, 4
 ; P9BE-NEXT:    add r4, r4, r5
+; P9BE-NEXT:    lis r5, 24749
 ; P9BE-NEXT:    mulli r4, r4, 23
+; P9BE-NEXT:    ori r5, r5, 47143
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    lis r4, 24749
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 6
-; P9BE-NEXT:    ori r4, r4, 47143
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    mulhw r4, r3, r4
-; P9BE-NEXT:    srwi r5, r4, 31
-; P9BE-NEXT:    srawi r4, r4, 11
-; P9BE-NEXT:    add r4, r4, r5
-; P9BE-NEXT:    mulli r4, r4, 5423
-; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
+; P9BE-NEXT:    li r4, 6
+; P9BE-NEXT:    vextuhlx r4, r4, v2
+; P9BE-NEXT:    slwi r3, r3, 16
+; P9BE-NEXT:    extsh r4, r4
+; P9BE-NEXT:    mulhw r5, r4, r5
+; P9BE-NEXT:    srwi r6, r5, 31
+; P9BE-NEXT:    srawi r5, r5, 11
+; P9BE-NEXT:    add r5, r5, r6
+; P9BE-NEXT:    mulli r5, r5, 5423
+; P9BE-NEXT:    sub r4, r4, r5
+; P9BE-NEXT:    or r3, r3, r4
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    xxperm vs1, vs0, vs2
 ; P9BE-NEXT:    srawi r4, r3, 15
 ; P9BE-NEXT:    addze r4, r4
 ; P9BE-NEXT:    slwi r4, r4, 15
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs0, vs3, vs2
-; P9BE-NEXT:    xxmrghw v2, vs0, vs1
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vmrgow v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_urem_i16_smax:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r4, -19946
-; P8LE-NEXT:    lis r6, 24749
-; P8LE-NEXT:    li r8, 0
+; P8LE-NEXT:    lis r5, -19946
+; P8LE-NEXT:    lis r7, 24749
 ; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    ori r4, r4, 17097
-; P8LE-NEXT:    ori r6, r6, 47143
-; P8LE-NEXT:    mtvsrd v2, r8
-; P8LE-NEXT:    rldicl r5, r3, 32, 48
-; P8LE-NEXT:    rldicl r7, r3, 16, 48
+; P8LE-NEXT:    ori r5, r5, 17097
+; P8LE-NEXT:    ori r7, r7, 47143
+; P8LE-NEXT:    rldicl r4, r3, 32, 48
+; P8LE-NEXT:    extsh r4, r4
+; P8LE-NEXT:    mulhw r5, r4, r5
+; P8LE-NEXT:    add r5, r5, r4
+; P8LE-NEXT:    srwi r6, r5, 31
+; P8LE-NEXT:    srawi r5, r5, 4
+; P8LE-NEXT:    add r5, r5, r6
+; P8LE-NEXT:    mulli r5, r5, 23
+; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    rldicl r5, r3, 16, 48
 ; P8LE-NEXT:    rldicl r3, r3, 48, 48
-; P8LE-NEXT:    extsh r5, r5
-; P8LE-NEXT:    extsh r7, r7
-; P8LE-NEXT:    extsh r3, r3
-; P8LE-NEXT:    mulhw r4, r5, r4
-; P8LE-NEXT:    mulhw r6, r7, r6
-; P8LE-NEXT:    add r4, r4, r5
-; P8LE-NEXT:    srwi r8, r6, 31
-; P8LE-NEXT:    srawi r6, r6, 11
-; P8LE-NEXT:    add r6, r6, r8
-; P8LE-NEXT:    srwi r8, r4, 31
-; P8LE-NEXT:    srawi r4, r4, 4
-; P8LE-NEXT:    add r4, r4, r8
+; P8LE-NEXT:    extsh r6, r5
+; P8LE-NEXT:    mulhw r6, r6, r7
+; P8LE-NEXT:    srwi r7, r6, 31
+; P8LE-NEXT:    srwi r6, r6, 11
+; P8LE-NEXT:    add r6, r6, r7
 ; P8LE-NEXT:    mulli r6, r6, 5423
-; P8LE-NEXT:    mulli r4, r4, 23
-; P8LE-NEXT:    sub r6, r7, r6
-; P8LE-NEXT:    sub r4, r5, r4
-; P8LE-NEXT:    srawi r5, r3, 15
-; P8LE-NEXT:    mtvsrd v3, r6
-; P8LE-NEXT:    addze r5, r5
-; P8LE-NEXT:    mtvsrd v4, r4
-; P8LE-NEXT:    slwi r4, r5, 15
+; P8LE-NEXT:    sub r5, r5, r6
+; P8LE-NEXT:    slwi r5, r5, 16
+; P8LE-NEXT:    or r4, r5, r4
+; P8LE-NEXT:    mtfprwz f0, r4
+; P8LE-NEXT:    extsh r4, r3
+; P8LE-NEXT:    srawi r4, r4, 15
+; P8LE-NEXT:    addze r4, r4
+; P8LE-NEXT:    slwi r4, r4, 15
 ; P8LE-NEXT:    sub r3, r3, r4
-; P8LE-NEXT:    vmrghh v3, v3, v4
-; P8LE-NEXT:    mtvsrd v4, r3
-; P8LE-NEXT:    vmrghh v2, v4, v2
-; P8LE-NEXT:    xxmrglw v2, v3, v2
+; P8LE-NEXT:    slwi r3, r3, 16
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v2, vs0, vs1
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_urem_i16_smax:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    lis r4, -19946
-; P8BE-NEXT:    lis r6, 24749
-; P8BE-NEXT:    ori r4, r4, 17097
-; P8BE-NEXT:    ori r6, r6, 47143
+; P8BE-NEXT:    lis r5, 24749
+; P8BE-NEXT:    lis r7, -19946
+; P8BE-NEXT:    ori r5, r5, 47143
+; P8BE-NEXT:    ori r7, r7, 17097
+; P8BE-NEXT:    clrldi r4, r3, 48
+; P8BE-NEXT:    extsh r4, r4
+; P8BE-NEXT:    mulhw r5, r4, r5
+; P8BE-NEXT:    srwi r6, r5, 31
+; P8BE-NEXT:    srawi r5, r5, 11
+; P8BE-NEXT:    add r5, r5, r6
+; P8BE-NEXT:    mulli r5, r5, 5423
+; P8BE-NEXT:    sub r4, r4, r5
 ; P8BE-NEXT:    rldicl r5, r3, 48, 48
-; P8BE-NEXT:    clrldi r7, r3, 48
 ; P8BE-NEXT:    rldicl r3, r3, 32, 48
-; P8BE-NEXT:    extsh r5, r5
-; P8BE-NEXT:    extsh r7, r7
+; P8BE-NEXT:    extsh r6, r5
 ; P8BE-NEXT:    extsh r3, r3
-; P8BE-NEXT:    mulhw r4, r5, r4
-; P8BE-NEXT:    mulhw r6, r7, r6
-; P8BE-NEXT:    add r4, r4, r5
-; P8BE-NEXT:    srwi r8, r6, 31
-; P8BE-NEXT:    srawi r6, r6, 11
-; P8BE-NEXT:    add r6, r6, r8
-; P8BE-NEXT:    srwi r8, r4, 31
-; P8BE-NEXT:    srawi r4, r4, 4
-; P8BE-NEXT:    add r4, r4, r8
-; P8BE-NEXT:    addis r8, r2, .LCPI5_0 at toc@ha
-; P8BE-NEXT:    mulli r6, r6, 5423
-; P8BE-NEXT:    mulli r4, r4, 23
-; P8BE-NEXT:    addi r8, r8, .LCPI5_0 at toc@l
-; P8BE-NEXT:    lxvw4x v2, 0, r8
-; P8BE-NEXT:    srawi r8, r3, 15
-; P8BE-NEXT:    sub r6, r7, r6
-; P8BE-NEXT:    addze r8, r8
-; P8BE-NEXT:    sub r4, r5, r4
-; P8BE-NEXT:    mtvsrwz v3, r6
-; P8BE-NEXT:    slwi r8, r8, 15
-; P8BE-NEXT:    mtvsrwz v4, r4
-; P8BE-NEXT:    sub r3, r3, r8
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    mtvsrwz v4, r3
-; P8BE-NEXT:    li r3, 0
-; P8BE-NEXT:    mtvsrwz v5, r3
-; P8BE-NEXT:    vperm v2, v5, v4, v2
-; P8BE-NEXT:    xxmrghw v2, v2, v3
+; P8BE-NEXT:    mulhw r7, r6, r7
+; P8BE-NEXT:    add r6, r7, r6
+; P8BE-NEXT:    srwi r7, r6, 31
+; P8BE-NEXT:    srwi r6, r6, 4
+; P8BE-NEXT:    add r6, r6, r7
+; P8BE-NEXT:    mulli r6, r6, 23
+; P8BE-NEXT:    sub r5, r5, r6
+; P8BE-NEXT:    slwi r5, r5, 16
+; P8BE-NEXT:    or r4, r5, r4
+; P8BE-NEXT:    mtvsrwz v2, r4
+; P8BE-NEXT:    srawi r4, r3, 15
+; P8BE-NEXT:    addze r4, r4
+; P8BE-NEXT:    slwi r4, r4, 15
+; P8BE-NEXT:    sub r3, r3, r4
+; P8BE-NEXT:    mtvsrwz v3, r3
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
   ret <4 x i16> %1
diff --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
index a2ad2946cc8ec12..808f1908461b750 100644
--- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
@@ -13,173 +13,167 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
 ; P9LE-NEXT:    lis r4, 689
+; P9LE-NEXT:    lis r6, 528
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 55879
+; P9LE-NEXT:    ori r6, r6, 33826
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mulhwu r4, r3, r4
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    lis r4, 528
-; P9LE-NEXT:    mtvsrd v3, r3
-; P9LE-NEXT:    li r3, 2
-; P9LE-NEXT:    ori r4, r4, 33826
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    mulhwu r4, r3, r4
-; P9LE-NEXT:    mulli r4, r4, 124
-; P9LE-NEXT:    sub r3, r3, r4
+; P9LE-NEXT:    li r4, 2
+; P9LE-NEXT:    vextuhrx r4, r4, v2
+; P9LE-NEXT:    clrlwi r5, r4, 16
+; P9LE-NEXT:    mulhwu r5, r5, r6
+; P9LE-NEXT:    lis r6, 65
+; P9LE-NEXT:    ori r6, r6, 22281
+; P9LE-NEXT:    mulli r5, r5, 124
+; P9LE-NEXT:    sub r4, r4, r5
+; P9LE-NEXT:    slwi r4, r4, 16
+; P9LE-NEXT:    or r3, r4, r3
 ; P9LE-NEXT:    lis r4, 668
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    ori r4, r4, 48149
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mulhwu r4, r3, r4
 ; P9LE-NEXT:    mulli r4, r4, 98
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    lis r4, 65
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    li r3, 6
-; P9LE-NEXT:    ori r4, r4, 22281
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    mulhwu r4, r3, r4
-; P9LE-NEXT:    mulli r4, r4, 1003
-; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    xxmrglw v2, v2, v3
+; P9LE-NEXT:    li r4, 6
+; P9LE-NEXT:    vextuhrx r4, r4, v2
+; P9LE-NEXT:    clrlwi r5, r4, 16
+; P9LE-NEXT:    mulhwu r5, r5, r6
+; P9LE-NEXT:    mulli r5, r5, 1003
+; P9LE-NEXT:    sub r4, r4, r5
+; P9LE-NEXT:    slwi r4, r4, 16
+; P9LE-NEXT:    or r3, r4, r3
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: fold_urem_vec_1:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    lis r4, 65
+; P9BE-NEXT:    lis r6, 668
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 22281
+; P9BE-NEXT:    ori r6, r6, 48149
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    mulli r4, r4, 1003
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    lis r4, 668
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    ori r4, r4, 48149
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    mulhwu r4, r3, r4
-; P9BE-NEXT:    mulli r4, r4, 98
-; P9BE-NEXT:    sub r3, r3, r4
+; P9BE-NEXT:    li r4, 4
+; P9BE-NEXT:    vextuhlx r4, r4, v2
+; P9BE-NEXT:    clrlwi r5, r4, 16
+; P9BE-NEXT:    mulhwu r5, r5, r6
+; P9BE-NEXT:    lis r6, 689
+; P9BE-NEXT:    ori r6, r6, 55879
+; P9BE-NEXT:    mulli r5, r5, 98
+; P9BE-NEXT:    sub r4, r4, r5
+; P9BE-NEXT:    slwi r4, r4, 16
+; P9BE-NEXT:    or r3, r4, r3
 ; P9BE-NEXT:    lis r4, 528
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; P9BE-NEXT:    ori r4, r4, 33826
-; P9BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 2
+; P9BE-NEXT:    ori r4, r4, 33826
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    mulli r4, r4, 124
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    lis r4, 689
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    ori r4, r4, 55879
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    mulhwu r4, r3, r4
-; P9BE-NEXT:    mulli r4, r4, 95
-; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    xxmrghw v2, vs1, vs0
+; P9BE-NEXT:    li r4, 0
+; P9BE-NEXT:    vextuhlx r4, r4, v2
+; P9BE-NEXT:    clrlwi r5, r4, 16
+; P9BE-NEXT:    mulhwu r5, r5, r6
+; P9BE-NEXT:    mulli r5, r5, 95
+; P9BE-NEXT:    sub r4, r4, r5
+; P9BE-NEXT:    slwi r4, r4, 16
+; P9BE-NEXT:    or r3, r4, r3
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vmrgow v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_urem_vec_1:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r4, 689
-; P8LE-NEXT:    lis r5, 528
-; P8LE-NEXT:    lis r6, 668
+; P8LE-NEXT:    lis r5, 689
+; P8LE-NEXT:    lis r7, 528
 ; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    ori r4, r4, 55879
-; P8LE-NEXT:    ori r5, r5, 33826
-; P8LE-NEXT:    ori r6, r6, 48149
-; P8LE-NEXT:    clrldi r7, r3, 48
-; P8LE-NEXT:    clrlwi r7, r7, 16
-; P8LE-NEXT:    mulhwu r4, r7, r4
-; P8LE-NEXT:    mulli r4, r4, 95
-; P8LE-NEXT:    sub r4, r7, r4
-; P8LE-NEXT:    rldicl r7, r3, 48, 48
-; P8LE-NEXT:    clrlwi r7, r7, 16
-; P8LE-NEXT:    mtvsrd v2, r4
-; P8LE-NEXT:    lis r4, 65
-; P8LE-NEXT:    mulhwu r5, r7, r5
-; P8LE-NEXT:    ori r4, r4, 22281
-; P8LE-NEXT:    mulli r5, r5, 124
-; P8LE-NEXT:    sub r5, r7, r5
-; P8LE-NEXT:    rldicl r7, r3, 32, 48
+; P8LE-NEXT:    ori r5, r5, 55879
+; P8LE-NEXT:    ori r7, r7, 33826
+; P8LE-NEXT:    clrldi r4, r3, 48
+; P8LE-NEXT:    clrlwi r4, r4, 16
+; P8LE-NEXT:    mulhwu r5, r4, r5
+; P8LE-NEXT:    mulli r5, r5, 95
+; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    rldicl r5, r3, 48, 48
+; P8LE-NEXT:    clrlwi r6, r5, 16
+; P8LE-NEXT:    mulhwu r6, r6, r7
+; P8LE-NEXT:    mulli r6, r6, 124
+; P8LE-NEXT:    sub r5, r5, r6
+; P8LE-NEXT:    lis r6, 65
+; P8LE-NEXT:    slwi r5, r5, 16
+; P8LE-NEXT:    ori r6, r6, 22281
+; P8LE-NEXT:    or r4, r5, r4
+; P8LE-NEXT:    lis r5, 668
+; P8LE-NEXT:    mtfprwz f0, r4
+; P8LE-NEXT:    rldicl r4, r3, 32, 48
+; P8LE-NEXT:    ori r5, r5, 48149
 ; P8LE-NEXT:    rldicl r3, r3, 16, 48
-; P8LE-NEXT:    clrlwi r7, r7, 16
-; P8LE-NEXT:    clrlwi r3, r3, 16
-; P8LE-NEXT:    mtvsrd v3, r5
-; P8LE-NEXT:    mulhwu r6, r7, r6
-; P8LE-NEXT:    mulhwu r4, r3, r4
-; P8LE-NEXT:    mulli r6, r6, 98
-; P8LE-NEXT:    mulli r4, r4, 1003
-; P8LE-NEXT:    sub r6, r7, r6
-; P8LE-NEXT:    sub r3, r3, r4
-; P8LE-NEXT:    mtvsrd v4, r3
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mtvsrd v3, r6
-; P8LE-NEXT:    vmrghh v3, v4, v3
-; P8LE-NEXT:    xxmrglw v2, v3, v2
+; P8LE-NEXT:    clrlwi r4, r4, 16
+; P8LE-NEXT:    mulhwu r5, r4, r5
+; P8LE-NEXT:    mulli r5, r5, 98
+; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    clrlwi r5, r3, 16
+; P8LE-NEXT:    mulhwu r5, r5, r6
+; P8LE-NEXT:    mulli r5, r5, 1003
+; P8LE-NEXT:    sub r3, r3, r5
+; P8LE-NEXT:    slwi r3, r3, 16
+; P8LE-NEXT:    or r3, r3, r4
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_urem_vec_1:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    addis r6, r2, .LCPI0_0 at toc@ha
-; P8BE-NEXT:    lis r4, 65
-; P8BE-NEXT:    lis r5, 668
-; P8BE-NEXT:    lis r7, 528
-; P8BE-NEXT:    addi r6, r6, .LCPI0_0 at toc@l
-; P8BE-NEXT:    ori r4, r4, 22281
-; P8BE-NEXT:    ori r5, r5, 48149
-; P8BE-NEXT:    ori r7, r7, 33826
-; P8BE-NEXT:    lxvw4x v2, 0, r6
-; P8BE-NEXT:    clrldi r6, r3, 48
-; P8BE-NEXT:    clrlwi r6, r6, 16
-; P8BE-NEXT:    mulhwu r4, r6, r4
-; P8BE-NEXT:    mulli r4, r4, 1003
-; P8BE-NEXT:    sub r4, r6, r4
-; P8BE-NEXT:    rldicl r6, r3, 48, 48
-; P8BE-NEXT:    clrlwi r6, r6, 16
-; P8BE-NEXT:    mtvsrwz v3, r4
-; P8BE-NEXT:    lis r4, 689
-; P8BE-NEXT:    mulhwu r5, r6, r5
-; P8BE-NEXT:    ori r4, r4, 55879
-; P8BE-NEXT:    mulli r5, r5, 98
-; P8BE-NEXT:    sub r5, r6, r5
-; P8BE-NEXT:    rldicl r6, r3, 32, 48
+; P8BE-NEXT:    lis r5, 65
+; P8BE-NEXT:    lis r7, 668
+; P8BE-NEXT:    ori r5, r5, 22281
+; P8BE-NEXT:    ori r7, r7, 48149
+; P8BE-NEXT:    clrldi r4, r3, 48
+; P8BE-NEXT:    clrlwi r4, r4, 16
+; P8BE-NEXT:    mulhwu r5, r4, r5
+; P8BE-NEXT:    mulli r5, r5, 1003
+; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    rldicl r5, r3, 48, 48
+; P8BE-NEXT:    clrlwi r6, r5, 16
+; P8BE-NEXT:    mulhwu r6, r6, r7
+; P8BE-NEXT:    mulli r6, r6, 98
+; P8BE-NEXT:    sub r5, r5, r6
+; P8BE-NEXT:    lis r6, 689
+; P8BE-NEXT:    slwi r5, r5, 16
+; P8BE-NEXT:    ori r6, r6, 55879
+; P8BE-NEXT:    or r4, r5, r4
+; P8BE-NEXT:    lis r5, 528
+; P8BE-NEXT:    mtvsrwz v2, r4
+; P8BE-NEXT:    rldicl r4, r3, 32, 48
+; P8BE-NEXT:    ori r5, r5, 33826
 ; P8BE-NEXT:    rldicl r3, r3, 16, 48
-; P8BE-NEXT:    clrlwi r6, r6, 16
-; P8BE-NEXT:    clrlwi r3, r3, 16
-; P8BE-NEXT:    mtvsrwz v4, r5
-; P8BE-NEXT:    mulhwu r7, r6, r7
-; P8BE-NEXT:    mulhwu r4, r3, r4
-; P8BE-NEXT:    mulli r7, r7, 124
-; P8BE-NEXT:    mulli r4, r4, 95
-; P8BE-NEXT:    sub r6, r6, r7
-; P8BE-NEXT:    sub r3, r3, r4
-; P8BE-NEXT:    mtvsrwz v5, r3
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    mtvsrwz v4, r6
-; P8BE-NEXT:    vperm v2, v5, v4, v2
-; P8BE-NEXT:    xxmrghw v2, v2, v3
+; P8BE-NEXT:    clrlwi r4, r4, 16
+; P8BE-NEXT:    mulhwu r5, r4, r5
+; P8BE-NEXT:    mulli r5, r5, 124
+; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    clrlwi r5, r3, 16
+; P8BE-NEXT:    mulhwu r5, r5, r6
+; P8BE-NEXT:    mulli r5, r5, 95
+; P8BE-NEXT:    sub r3, r3, r5
+; P8BE-NEXT:    slwi r3, r3, 16
+; P8BE-NEXT:    or r3, r3, r4
+; P8BE-NEXT:    mtvsrwz v3, r3
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
   ret <4 x i16> %1
@@ -196,31 +190,31 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    mulhwu r5, r3, r4
 ; P9LE-NEXT:    mulli r5, r5, 95
 ; P9LE-NEXT:    sub r3, r3, r5
-; P9LE-NEXT:    mtvsrd v3, r3
-; P9LE-NEXT:    li r3, 2
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    mulhwu r5, r3, r4
-; P9LE-NEXT:    mulli r5, r5, 95
-; P9LE-NEXT:    sub r3, r3, r5
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    li r5, 2
+; P9LE-NEXT:    vextuhrx r5, r5, v2
+; P9LE-NEXT:    clrlwi r6, r5, 16
+; P9LE-NEXT:    mulhwu r6, r6, r4
+; P9LE-NEXT:    mulli r6, r6, 95
+; P9LE-NEXT:    sub r5, r5, r6
+; P9LE-NEXT:    slwi r5, r5, 16
+; P9LE-NEXT:    or r3, r5, r3
+; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mulhwu r5, r3, r4
 ; P9LE-NEXT:    mulli r5, r5, 95
 ; P9LE-NEXT:    sub r3, r3, r5
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    li r3, 6
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    mulhwu r4, r3, r4
+; P9LE-NEXT:    li r5, 6
+; P9LE-NEXT:    vextuhrx r5, r5, v2
+; P9LE-NEXT:    clrlwi r6, r5, 16
+; P9LE-NEXT:    mulhwu r4, r6, r4
 ; P9LE-NEXT:    mulli r4, r4, 95
-; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    xxmrglw v2, v2, v3
+; P9LE-NEXT:    sub r4, r5, r4
+; P9LE-NEXT:    slwi r4, r4, 16
+; P9LE-NEXT:    or r3, r4, r3
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: fold_urem_vec_2:
@@ -233,106 +227,100 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    mulhwu r5, r3, r4
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    mulhwu r5, r3, r4
-; P9BE-NEXT:    mulli r5, r5, 95
-; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
+; P9BE-NEXT:    li r5, 4
+; P9BE-NEXT:    vextuhlx r5, r5, v2
+; P9BE-NEXT:    clrlwi r6, r5, 16
+; P9BE-NEXT:    mulhwu r6, r6, r4
+; P9BE-NEXT:    mulli r6, r6, 95
+; P9BE-NEXT:    sub r5, r5, r6
+; P9BE-NEXT:    slwi r5, r5, 16
+; P9BE-NEXT:    or r3, r5, r3
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhwu r5, r3, r4
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    mulhwu r4, r3, r4
+; P9BE-NEXT:    li r5, 0
+; P9BE-NEXT:    vextuhlx r5, r5, v2
+; P9BE-NEXT:    clrlwi r6, r5, 16
+; P9BE-NEXT:    mulhwu r4, r6, r4
 ; P9BE-NEXT:    mulli r4, r4, 95
-; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    xxmrghw v2, vs1, vs0
+; P9BE-NEXT:    sub r4, r5, r4
+; P9BE-NEXT:    slwi r4, r4, 16
+; P9BE-NEXT:    or r3, r4, r3
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vmrgow v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_urem_vec_2:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r4, 689
+; P8LE-NEXT:    lis r5, 689
 ; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    ori r4, r4, 55879
-; P8LE-NEXT:    clrldi r5, r3, 48
+; P8LE-NEXT:    ori r5, r5, 55879
+; P8LE-NEXT:    clrldi r4, r3, 48
+; P8LE-NEXT:    clrlwi r4, r4, 16
+; P8LE-NEXT:    mulhwu r6, r4, r5
+; P8LE-NEXT:    mulli r6, r6, 95
+; P8LE-NEXT:    sub r4, r4, r6
 ; P8LE-NEXT:    rldicl r6, r3, 48, 48
-; P8LE-NEXT:    rldicl r7, r3, 32, 48
+; P8LE-NEXT:    clrlwi r7, r6, 16
+; P8LE-NEXT:    mulhwu r7, r7, r5
+; P8LE-NEXT:    mulli r7, r7, 95
+; P8LE-NEXT:    sub r6, r6, r7
+; P8LE-NEXT:    slwi r6, r6, 16
+; P8LE-NEXT:    or r4, r6, r4
+; P8LE-NEXT:    mtfprwz f0, r4
+; P8LE-NEXT:    rldicl r4, r3, 32, 48
 ; P8LE-NEXT:    rldicl r3, r3, 16, 48
-; P8LE-NEXT:    clrlwi r5, r5, 16
-; P8LE-NEXT:    clrlwi r6, r6, 16
-; P8LE-NEXT:    clrlwi r7, r7, 16
-; P8LE-NEXT:    clrlwi r3, r3, 16
-; P8LE-NEXT:    mulhwu r8, r5, r4
-; P8LE-NEXT:    mulli r8, r8, 95
-; P8LE-NEXT:    sub r5, r5, r8
-; P8LE-NEXT:    mulhwu r8, r6, r4
-; P8LE-NEXT:    mtvsrd v2, r5
-; P8LE-NEXT:    mulli r8, r8, 95
-; P8LE-NEXT:    sub r6, r6, r8
-; P8LE-NEXT:    mulhwu r8, r7, r4
-; P8LE-NEXT:    mulhwu r4, r3, r4
-; P8LE-NEXT:    mtvsrd v3, r6
-; P8LE-NEXT:    mulli r8, r8, 95
-; P8LE-NEXT:    mulli r4, r4, 95
-; P8LE-NEXT:    sub r7, r7, r8
-; P8LE-NEXT:    sub r3, r3, r4
-; P8LE-NEXT:    mtvsrd v4, r7
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mtvsrd v3, r3
-; P8LE-NEXT:    vmrghh v3, v3, v4
-; P8LE-NEXT:    xxmrglw v2, v3, v2
+; P8LE-NEXT:    clrlwi r4, r4, 16
+; P8LE-NEXT:    mulhwu r6, r4, r5
+; P8LE-NEXT:    mulli r6, r6, 95
+; P8LE-NEXT:    sub r4, r4, r6
+; P8LE-NEXT:    clrlwi r6, r3, 16
+; P8LE-NEXT:    mulhwu r5, r6, r5
+; P8LE-NEXT:    mulli r5, r5, 95
+; P8LE-NEXT:    sub r3, r3, r5
+; P8LE-NEXT:    slwi r3, r3, 16
+; P8LE-NEXT:    or r3, r3, r4
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_urem_vec_2:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
-; P8BE-NEXT:    lis r4, 689
-; P8BE-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
-; P8BE-NEXT:    ori r4, r4, 55879
-; P8BE-NEXT:    lxvw4x v2, 0, r5
-; P8BE-NEXT:    clrldi r5, r3, 48
+; P8BE-NEXT:    lis r5, 689
+; P8BE-NEXT:    ori r5, r5, 55879
+; P8BE-NEXT:    clrldi r4, r3, 48
+; P8BE-NEXT:    clrlwi r4, r4, 16
+; P8BE-NEXT:    mulhwu r6, r4, r5
+; P8BE-NEXT:    mulli r6, r6, 95
+; P8BE-NEXT:    sub r4, r4, r6
 ; P8BE-NEXT:    rldicl r6, r3, 48, 48
-; P8BE-NEXT:    rldicl r7, r3, 32, 48
+; P8BE-NEXT:    clrlwi r7, r6, 16
+; P8BE-NEXT:    mulhwu r7, r7, r5
+; P8BE-NEXT:    mulli r7, r7, 95
+; P8BE-NEXT:    sub r6, r6, r7
+; P8BE-NEXT:    slwi r6, r6, 16
+; P8BE-NEXT:    or r4, r6, r4
+; P8BE-NEXT:    mtvsrwz v2, r4
+; P8BE-NEXT:    rldicl r4, r3, 32, 48
 ; P8BE-NEXT:    rldicl r3, r3, 16, 48
-; P8BE-NEXT:    clrlwi r5, r5, 16
-; P8BE-NEXT:    clrlwi r6, r6, 16
-; P8BE-NEXT:    clrlwi r7, r7, 16
-; P8BE-NEXT:    clrlwi r3, r3, 16
-; P8BE-NEXT:    mulhwu r8, r5, r4
-; P8BE-NEXT:    mulli r8, r8, 95
-; P8BE-NEXT:    sub r5, r5, r8
-; P8BE-NEXT:    mulhwu r8, r6, r4
-; P8BE-NEXT:    mtvsrwz v3, r5
-; P8BE-NEXT:    mulli r8, r8, 95
-; P8BE-NEXT:    sub r6, r6, r8
-; P8BE-NEXT:    mulhwu r8, r7, r4
-; P8BE-NEXT:    mulhwu r4, r3, r4
-; P8BE-NEXT:    mtvsrwz v4, r6
-; P8BE-NEXT:    mulli r8, r8, 95
-; P8BE-NEXT:    mulli r4, r4, 95
-; P8BE-NEXT:    sub r7, r7, r8
-; P8BE-NEXT:    sub r3, r3, r4
-; P8BE-NEXT:    mtvsrwz v5, r7
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    mtvsrwz v4, r3
-; P8BE-NEXT:    vperm v2, v4, v5, v2
-; P8BE-NEXT:    xxmrghw v2, v2, v3
+; P8BE-NEXT:    clrlwi r4, r4, 16
+; P8BE-NEXT:    mulhwu r6, r4, r5
+; P8BE-NEXT:    mulli r6, r6, 95
+; P8BE-NEXT:    sub r4, r4, r6
+; P8BE-NEXT:    clrlwi r6, r3, 16
+; P8BE-NEXT:    mulhwu r5, r6, r5
+; P8BE-NEXT:    mulli r5, r5, 95
+; P8BE-NEXT:    sub r3, r3, r5
+; P8BE-NEXT:    slwi r3, r3, 16
+; P8BE-NEXT:    or r3, r3, r4
+; P8BE-NEXT:    mtvsrwz v3, r3
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   ret <4 x i16> %1
@@ -343,52 +331,52 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-LABEL: combine_urem_udiv:
 ; P9LE:       # %bb.0:
-; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    lis r4, 689
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    ori r4, r4, 55879
-; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    mulhwu r5, r3, r4
-; P9LE-NEXT:    mulli r6, r5, 95
-; P9LE-NEXT:    sub r3, r3, r6
-; P9LE-NEXT:    mtvsrd v3, r3
 ; P9LE-NEXT:    li r3, 2
+; P9LE-NEXT:    lis r5, 689
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    clrlwi r6, r3, 16
-; P9LE-NEXT:    mulhwu r6, r6, r4
-; P9LE-NEXT:    mulli r7, r6, 95
-; P9LE-NEXT:    sub r3, r3, r7
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    li r3, 4
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v4, v3
-; P9LE-NEXT:    clrlwi r7, r3, 16
-; P9LE-NEXT:    mulhwu r7, r7, r4
+; P9LE-NEXT:    ori r5, r5, 55879
+; P9LE-NEXT:    clrlwi r4, r3, 16
+; P9LE-NEXT:    mulhwu r4, r4, r5
+; P9LE-NEXT:    mulli r6, r4, 95
+; P9LE-NEXT:    sub r3, r3, r6
+; P9LE-NEXT:    li r6, 0
+; P9LE-NEXT:    vextuhrx r6, r6, v2
+; P9LE-NEXT:    slwi r3, r3, 16
+; P9LE-NEXT:    clrlwi r6, r6, 16
+; P9LE-NEXT:    mulhwu r7, r6, r5
 ; P9LE-NEXT:    mulli r8, r7, 95
-; P9LE-NEXT:    sub r3, r3, r8
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    sub r6, r6, r8
+; P9LE-NEXT:    or r3, r3, r6
+; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    clrlwi r8, r3, 16
-; P9LE-NEXT:    mulhwu r4, r8, r4
-; P9LE-NEXT:    mulli r8, r4, 95
-; P9LE-NEXT:    mtvsrd v5, r4
+; P9LE-NEXT:    clrlwi r6, r3, 16
+; P9LE-NEXT:    mulhwu r6, r6, r5
+; P9LE-NEXT:    mulli r8, r6, 95
 ; P9LE-NEXT:    sub r3, r3, r8
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    mtvsrd v4, r6
-; P9LE-NEXT:    xxmrglw v2, v2, v3
-; P9LE-NEXT:    mtvsrd v3, r5
-; P9LE-NEXT:    vmrghh v3, v4, v3
-; P9LE-NEXT:    mtvsrd v4, r7
-; P9LE-NEXT:    vmrghh v4, v5, v4
-; P9LE-NEXT:    xxmrglw v3, v4, v3
+; P9LE-NEXT:    li r8, 4
+; P9LE-NEXT:    vextuhrx r8, r8, v2
+; P9LE-NEXT:    slwi r3, r3, 16
+; P9LE-NEXT:    clrlwi r9, r8, 16
+; P9LE-NEXT:    mulhwu r5, r9, r5
+; P9LE-NEXT:    mulli r9, r5, 95
+; P9LE-NEXT:    sub r8, r8, r9
+; P9LE-NEXT:    or r3, r3, r8
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    slwi r3, r4, 16
+; P9LE-NEXT:    rlwimi r3, r7, 0, 22, 31
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
+; P9LE-NEXT:    mtfprwz f0, r3
+; P9LE-NEXT:    slwi r3, r6, 16
+; P9LE-NEXT:    rlwimi r3, r5, 0, 22, 31
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    xxmrghw v3, vs1, vs0
 ; P9LE-NEXT:    vadduhm v2, v2, v3
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: combine_urem_udiv:
 ; P9BE:       # %bb.0:
-; P9BE-NEXT:    li r3, 6
+; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    lis r5, 689
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r5, r5, 55879
@@ -396,41 +384,38 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    mulhwu r4, r4, r5
 ; P9BE-NEXT:    mulli r6, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r6
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r6, r3, 16
-; P9BE-NEXT:    mulhwu r6, r6, r5
-; P9BE-NEXT:    mulli r7, r6, 95
-; P9BE-NEXT:    sub r3, r3, r7
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
-; P9BE-NEXT:    li r3, 2
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r7, r3, 16
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
+; P9BE-NEXT:    li r6, 6
+; P9BE-NEXT:    vextuhlx r6, r6, v2
+; P9BE-NEXT:    slwi r3, r3, 16
+; P9BE-NEXT:    clrlwi r7, r6, 16
 ; P9BE-NEXT:    mulhwu r7, r7, r5
 ; P9BE-NEXT:    mulli r8, r7, 95
-; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    mtfprwz f1, r3
+; P9BE-NEXT:    sub r6, r6, r8
+; P9BE-NEXT:    or r3, r3, r6
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    mulhwu r5, r3, r5
-; P9BE-NEXT:    mulli r8, r5, 95
+; P9BE-NEXT:    clrlwi r6, r3, 16
+; P9BE-NEXT:    mulhwu r6, r6, r5
+; P9BE-NEXT:    mulli r8, r6, 95
 ; P9BE-NEXT:    sub r3, r3, r8
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    mtfprwz f3, r5
-; P9BE-NEXT:    xxmrghw v2, vs1, vs0
-; P9BE-NEXT:    mtfprwz f0, r4
-; P9BE-NEXT:    mtfprwz f1, r6
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
-; P9BE-NEXT:    mtfprwz f1, r7
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    xxmrghw v3, vs1, vs0
+; P9BE-NEXT:    li r8, 2
+; P9BE-NEXT:    vextuhlx r8, r8, v2
+; P9BE-NEXT:    slwi r3, r3, 16
+; P9BE-NEXT:    clrlwi r9, r8, 16
+; P9BE-NEXT:    mulhwu r5, r9, r5
+; P9BE-NEXT:    mulli r9, r5, 95
+; P9BE-NEXT:    sub r8, r8, r9
+; P9BE-NEXT:    or r3, r3, r8
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    slwi r3, r4, 16
+; P9BE-NEXT:    rlwimi r3, r7, 0, 22, 31
+; P9BE-NEXT:    vmrgow v2, v2, v3
+; P9BE-NEXT:    mtvsrwz v3, r3
+; P9BE-NEXT:    slwi r3, r6, 16
+; P9BE-NEXT:    rlwimi r3, r5, 0, 22, 31
+; P9BE-NEXT:    mtvsrwz v4, r3
+; P9BE-NEXT:    vmrgow v3, v4, v3
 ; P9BE-NEXT:    vadduhm v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -440,41 +425,41 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P8LE-NEXT:    lis r4, 689
 ; P8LE-NEXT:    mffprd r3, f0
 ; P8LE-NEXT:    ori r4, r4, 55879
-; P8LE-NEXT:    clrldi r5, r3, 48
-; P8LE-NEXT:    rldicl r6, r3, 48, 48
-; P8LE-NEXT:    rldicl r7, r3, 32, 48
-; P8LE-NEXT:    rldicl r3, r3, 16, 48
-; P8LE-NEXT:    clrlwi r5, r5, 16
-; P8LE-NEXT:    clrlwi r8, r6, 16
-; P8LE-NEXT:    clrlwi r9, r7, 16
-; P8LE-NEXT:    clrlwi r10, r3, 16
-; P8LE-NEXT:    mulhwu r11, r5, r4
+; P8LE-NEXT:    rldicl r5, r3, 48, 48
+; P8LE-NEXT:    rldicl r6, r3, 16, 48
+; P8LE-NEXT:    clrldi r7, r3, 48
+; P8LE-NEXT:    rldicl r3, r3, 32, 48
+; P8LE-NEXT:    clrlwi r8, r5, 16
+; P8LE-NEXT:    clrlwi r9, r6, 16
+; P8LE-NEXT:    clrlwi r7, r7, 16
 ; P8LE-NEXT:    mulhwu r8, r8, r4
 ; P8LE-NEXT:    mulhwu r9, r9, r4
-; P8LE-NEXT:    mulhwu r4, r10, r4
-; P8LE-NEXT:    mulli r10, r11, 95
-; P8LE-NEXT:    mtvsrd v2, r11
-; P8LE-NEXT:    mtvsrd v3, r8
-; P8LE-NEXT:    sub r5, r5, r10
 ; P8LE-NEXT:    mulli r10, r8, 95
-; P8LE-NEXT:    mtvsrd v4, r5
-; P8LE-NEXT:    sub r6, r6, r10
-; P8LE-NEXT:    mulli r10, r9, 95
-; P8LE-NEXT:    mtvsrd v5, r6
-; P8LE-NEXT:    sub r7, r7, r10
-; P8LE-NEXT:    mulli r10, r4, 95
-; P8LE-NEXT:    mtvsrd v0, r7
-; P8LE-NEXT:    sub r3, r3, r10
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mtvsrd v3, r9
-; P8LE-NEXT:    vmrghh v4, v5, v4
-; P8LE-NEXT:    mtvsrd v5, r3
-; P8LE-NEXT:    vmrghh v5, v5, v0
-; P8LE-NEXT:    mtvsrd v0, r4
-; P8LE-NEXT:    xxmrglw v4, v5, v4
-; P8LE-NEXT:    vmrghh v3, v0, v3
-; P8LE-NEXT:    xxmrglw v2, v3, v2
-; P8LE-NEXT:    vadduhm v2, v4, v2
+; P8LE-NEXT:    slwi r8, r8, 16
+; P8LE-NEXT:    sub r5, r5, r10
+; P8LE-NEXT:    clrlwi r10, r3, 16
+; P8LE-NEXT:    mulhwu r10, r10, r4
+; P8LE-NEXT:    mulhwu r4, r7, r4
+; P8LE-NEXT:    slwi r5, r5, 16
+; P8LE-NEXT:    rlwimi r8, r4, 0, 22, 31
+; P8LE-NEXT:    mulli r4, r4, 95
+; P8LE-NEXT:    mtfprwz f0, r8
+; P8LE-NEXT:    sub r4, r7, r4
+; P8LE-NEXT:    mulli r7, r9, 95
+; P8LE-NEXT:    or r4, r5, r4
+; P8LE-NEXT:    mtfprwz f1, r4
+; P8LE-NEXT:    sub r6, r6, r7
+; P8LE-NEXT:    mulli r7, r10, 95
+; P8LE-NEXT:    slwi r6, r6, 16
+; P8LE-NEXT:    sub r3, r3, r7
+; P8LE-NEXT:    or r3, r6, r3
+; P8LE-NEXT:    mtfprwz f2, r3
+; P8LE-NEXT:    slwi r3, r9, 16
+; P8LE-NEXT:    rlwimi r3, r10, 0, 22, 31
+; P8LE-NEXT:    xxmrghw v2, vs2, vs1
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v3, vs1, vs0
+; P8LE-NEXT:    vadduhm v2, v2, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: combine_urem_udiv:
@@ -482,44 +467,41 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P8BE-NEXT:    mfvsrd r3, v2
 ; P8BE-NEXT:    lis r4, 689
 ; P8BE-NEXT:    ori r4, r4, 55879
-; P8BE-NEXT:    clrldi r5, r3, 48
-; P8BE-NEXT:    rldicl r6, r3, 48, 48
-; P8BE-NEXT:    rldicl r7, r3, 32, 48
-; P8BE-NEXT:    rldicl r3, r3, 16, 48
+; P8BE-NEXT:    rldicl r5, r3, 48, 48
+; P8BE-NEXT:    clrldi r7, r3, 48
+; P8BE-NEXT:    rldicl r6, r3, 16, 48
+; P8BE-NEXT:    rldicl r3, r3, 32, 48
 ; P8BE-NEXT:    clrlwi r8, r5, 16
-; P8BE-NEXT:    clrlwi r9, r6, 16
 ; P8BE-NEXT:    clrlwi r10, r7, 16
-; P8BE-NEXT:    clrlwi r3, r3, 16
+; P8BE-NEXT:    clrlwi r9, r6, 16
+; P8BE-NEXT:    clrlwi r11, r3, 16
 ; P8BE-NEXT:    mulhwu r8, r8, r4
-; P8BE-NEXT:    mulhwu r9, r9, r4
 ; P8BE-NEXT:    mulhwu r10, r10, r4
-; P8BE-NEXT:    mulhwu r4, r3, r4
+; P8BE-NEXT:    mulhwu r9, r9, r4
+; P8BE-NEXT:    mulhwu r4, r11, r4
 ; P8BE-NEXT:    mulli r11, r8, 95
-; P8BE-NEXT:    mtvsrwz v3, r8
-; P8BE-NEXT:    mtvsrwz v4, r9
+; P8BE-NEXT:    slwi r8, r8, 16
+; P8BE-NEXT:    rlwimi r8, r10, 0, 22, 31
+; P8BE-NEXT:    mulli r10, r10, 95
+; P8BE-NEXT:    mtvsrwz v2, r8
 ; P8BE-NEXT:    sub r5, r5, r11
-; P8BE-NEXT:    mulli r11, r9, 95
-; P8BE-NEXT:    mtvsrwz v5, r5
-; P8BE-NEXT:    sub r6, r6, r11
-; P8BE-NEXT:    mulli r11, r10, 95
-; P8BE-NEXT:    mtvsrwz v0, r6
-; P8BE-NEXT:    sub r7, r7, r11
-; P8BE-NEXT:    mulli r11, r4, 95
-; P8BE-NEXT:    mtvsrwz v1, r7
-; P8BE-NEXT:    sub r3, r3, r11
-; P8BE-NEXT:    addis r11, r2, .LCPI2_0 at toc@ha
-; P8BE-NEXT:    addi r11, r11, .LCPI2_0 at toc@l
-; P8BE-NEXT:    lxvw4x v2, 0, r11
-; P8BE-NEXT:    vperm v5, v0, v5, v2
-; P8BE-NEXT:    mtvsrwz v0, r3
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    mtvsrwz v4, r10
-; P8BE-NEXT:    vperm v0, v0, v1, v2
-; P8BE-NEXT:    mtvsrwz v1, r4
-; P8BE-NEXT:    vperm v2, v1, v4, v2
-; P8BE-NEXT:    xxmrghw v4, v0, v5
-; P8BE-NEXT:    xxmrghw v2, v2, v3
-; P8BE-NEXT:    vadduhm v2, v4, v2
+; P8BE-NEXT:    sub r7, r7, r10
+; P8BE-NEXT:    mulli r10, r9, 95
+; P8BE-NEXT:    slwi r5, r5, 16
+; P8BE-NEXT:    or r5, r5, r7
+; P8BE-NEXT:    mtvsrwz v3, r5
+; P8BE-NEXT:    sub r6, r6, r10
+; P8BE-NEXT:    mulli r10, r4, 95
+; P8BE-NEXT:    slwi r6, r6, 16
+; P8BE-NEXT:    sub r3, r3, r10
+; P8BE-NEXT:    or r3, r6, r3
+; P8BE-NEXT:    mtvsrwz v4, r3
+; P8BE-NEXT:    slwi r3, r9, 16
+; P8BE-NEXT:    rlwimi r3, r4, 0, 22, 31
+; P8BE-NEXT:    vmrgow v3, v4, v3
+; P8BE-NEXT:    mtvsrwz v4, r3
+; P8BE-NEXT:    vmrgow v2, v4, v2
+; P8BE-NEXT:    vadduhm v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -531,115 +513,97 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9LE-LABEL: dont_fold_urem_power_of_two:
 ; P9LE:       # %bb.0:
-; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    lis r4, 689
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    ori r4, r4, 55879
-; P9LE-NEXT:    clrlwi r3, r3, 26
-; P9LE-NEXT:    mtvsrd v3, r3
+; P9LE-NEXT:    li r4, 0
 ; P9LE-NEXT:    li r3, 2
+; P9LE-NEXT:    lis r5, 689
+; P9LE-NEXT:    vextuhrx r4, r4, v2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    clrlwi r3, r3, 27
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    ori r5, r5, 55879
+; P9LE-NEXT:    clrlwi r4, r4, 26
+; P9LE-NEXT:    rlwimi r4, r3, 16, 11, 15
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v4, v3
-; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    mulhwu r4, r3, r4
+; P9LE-NEXT:    mtfprwz f0, r4
+; P9LE-NEXT:    clrlwi r4, r3, 16
+; P9LE-NEXT:    mulhwu r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    li r3, 4
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    clrlwi r3, r3, 29
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    vmrghh v2, v4, v2
-; P9LE-NEXT:    xxmrglw v2, v2, v3
+; P9LE-NEXT:    li r4, 4
+; P9LE-NEXT:    vextuhrx r4, r4, v2
+; P9LE-NEXT:    clrlwi r4, r4, 29
+; P9LE-NEXT:    rlwimi r4, r3, 16, 0, 15
+; P9LE-NEXT:    mtfprwz f1, r4
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: dont_fold_urem_power_of_two:
 ; P9BE:       # %bb.0:
-; P9BE-NEXT:    li r3, 2
-; P9BE-NEXT:    lis r4, 689
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    ori r4, r4, 55879
-; P9BE-NEXT:    clrlwi r3, r3, 27
-; P9BE-NEXT:    mtfprwz f0, r3
+; P9BE-NEXT:    li r4, 2
 ; P9BE-NEXT:    li r3, 0
+; P9BE-NEXT:    vextuhlx r4, r4, v2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r3, r3, 26
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
+; P9BE-NEXT:    clrlwi r4, r4, 27
+; P9BE-NEXT:    rlwimi r4, r3, 16, 10, 15
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    mtvsrwz v3, r4
+; P9BE-NEXT:    lis r4, 689
+; P9BE-NEXT:    ori r4, r4, 55879
 ; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r3, r3, 29
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    xxmrghw v2, vs0, vs1
+; P9BE-NEXT:    li r4, 4
+; P9BE-NEXT:    vextuhlx r4, r4, v2
+; P9BE-NEXT:    rlwinm r4, r4, 16, 13, 15
+; P9BE-NEXT:    or r3, r4, r3
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vmrgow v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_urem_power_of_two:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
+; P8LE-NEXT:    lis r6, 689
 ; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    clrldi r4, r3, 48
-; P8LE-NEXT:    clrlwi r4, r4, 26
-; P8LE-NEXT:    mtvsrd v2, r4
+; P8LE-NEXT:    ori r6, r6, 55879
+; P8LE-NEXT:    clrldi r5, r3, 48
 ; P8LE-NEXT:    rldicl r4, r3, 48, 48
-; P8LE-NEXT:    clrlwi r4, r4, 27
-; P8LE-NEXT:    mtvsrd v3, r4
-; P8LE-NEXT:    rldicl r4, r3, 32, 48
-; P8LE-NEXT:    rldicl r3, r3, 16, 48
-; P8LE-NEXT:    clrlwi r4, r4, 29
-; P8LE-NEXT:    clrlwi r3, r3, 16
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mtvsrd v3, r4
-; P8LE-NEXT:    lis r4, 689
-; P8LE-NEXT:    ori r4, r4, 55879
-; P8LE-NEXT:    mulhwu r4, r3, r4
-; P8LE-NEXT:    mulli r4, r4, 95
-; P8LE-NEXT:    sub r3, r3, r4
-; P8LE-NEXT:    mtvsrd v4, r3
-; P8LE-NEXT:    vmrghh v3, v4, v3
-; P8LE-NEXT:    xxmrglw v2, v3, v2
+; P8LE-NEXT:    clrlwi r5, r5, 26
+; P8LE-NEXT:    rlwimi r5, r4, 16, 11, 15
+; P8LE-NEXT:    rldicl r4, r3, 16, 48
+; P8LE-NEXT:    rldicl r3, r3, 32, 48
+; P8LE-NEXT:    mtfprwz f0, r5
+; P8LE-NEXT:    clrlwi r5, r4, 16
+; P8LE-NEXT:    clrlwi r3, r3, 29
+; P8LE-NEXT:    mulhwu r5, r5, r6
+; P8LE-NEXT:    mulli r5, r5, 95
+; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    rlwimi r3, r4, 16, 0, 15
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_urem_power_of_two:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; P8BE-NEXT:    lis r5, 689
-; P8BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; P8BE-NEXT:    ori r5, r5, 55879
-; P8BE-NEXT:    lxvw4x v3, 0, r3
 ; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    rldicl r4, r3, 32, 48
-; P8BE-NEXT:    clrlwi r4, r4, 27
-; P8BE-NEXT:    mtvsrwz v2, r4
+; P8BE-NEXT:    rldicl r5, r3, 32, 48
 ; P8BE-NEXT:    rldicl r4, r3, 16, 48
-; P8BE-NEXT:    clrlwi r4, r4, 26
-; P8BE-NEXT:    mtvsrwz v4, r4
+; P8BE-NEXT:    clrlwi r5, r5, 27
+; P8BE-NEXT:    rlwimi r5, r4, 16, 10, 15
 ; P8BE-NEXT:    clrldi r4, r3, 48
 ; P8BE-NEXT:    rldicl r3, r3, 48, 48
+; P8BE-NEXT:    mtvsrwz v2, r5
+; P8BE-NEXT:    lis r5, 689
 ; P8BE-NEXT:    clrlwi r4, r4, 16
-; P8BE-NEXT:    clrlwi r3, r3, 29
+; P8BE-NEXT:    rlwinm r3, r3, 16, 13, 15
+; P8BE-NEXT:    ori r5, r5, 55879
 ; P8BE-NEXT:    mulhwu r5, r4, r5
-; P8BE-NEXT:    mtvsrwz v5, r3
 ; P8BE-NEXT:    mulli r5, r5, 95
 ; P8BE-NEXT:    sub r4, r4, r5
-; P8BE-NEXT:    vperm v2, v4, v2, v3
-; P8BE-NEXT:    mtvsrwz v4, r4
-; P8BE-NEXT:    vperm v3, v5, v4, v3
-; P8BE-NEXT:    xxmrghw v2, v2, v3
+; P8BE-NEXT:    or r3, r3, r4
+; P8BE-NEXT:    mtvsrwz v3, r3
+; P8BE-NEXT:    vmrgow v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
   ret <4 x i16> %1
@@ -651,147 +615,131 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    lis r4, 2849
+; P9LE-NEXT:    lis r6, 12
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 25645
+; P9LE-NEXT:    ori r6, r6, 5560
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mulhwu r4, r3, r4
 ; P9LE-NEXT:    mulli r4, r4, 23
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    lis r4, 12
-; P9LE-NEXT:    mtvsrd v3, r3
-; P9LE-NEXT:    li r3, 6
-; P9LE-NEXT:    ori r4, r4, 5560
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    mulhwu r4, r3, r4
-; P9LE-NEXT:    mulli r4, r4, 5423
-; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    lis r4, 100
-; P9LE-NEXT:    mtvsrd v4, r3
+; P9LE-NEXT:    li r4, 6
+; P9LE-NEXT:    vextuhrx r4, r4, v2
+; P9LE-NEXT:    clrlwi r5, r4, 16
+; P9LE-NEXT:    mulhwu r5, r5, r6
+; P9LE-NEXT:    mulli r5, r5, 5423
+; P9LE-NEXT:    sub r4, r4, r5
+; P9LE-NEXT:    lis r5, 100
+; P9LE-NEXT:    slwi r4, r4, 16
+; P9LE-NEXT:    ori r5, r5, 13629
+; P9LE-NEXT:    or r3, r4, r3
+; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 2
-; P9LE-NEXT:    ori r4, r4, 13629
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    vmrghh v3, v4, v3
-; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    mulhwu r4, r3, r4
+; P9LE-NEXT:    clrlwi r4, r3, 16
+; P9LE-NEXT:    mulhwu r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 654
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    mtvsrd v2, r3
-; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    mtvsrd v4, r3
-; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    xxmrglw v2, v3, v2
+; P9LE-NEXT:    slwi r3, r3, 16
+; P9LE-NEXT:    mtfprwz f1, r3
+; P9LE-NEXT:    xxmrghw v2, vs0, vs1
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: dont_fold_urem_one:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    lis r4, 12
+; P9BE-NEXT:    lis r6, 2849
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 5560
+; P9BE-NEXT:    ori r6, r6, 25645
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    mulli r4, r4, 5423
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    lis r4, 2849
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    ori r4, r4, 25645
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    mulhwu r4, r3, r4
-; P9BE-NEXT:    mulli r4, r4, 23
-; P9BE-NEXT:    sub r3, r3, r4
+; P9BE-NEXT:    li r4, 4
+; P9BE-NEXT:    vextuhlx r4, r4, v2
+; P9BE-NEXT:    clrlwi r5, r4, 16
+; P9BE-NEXT:    mulhwu r5, r5, r6
+; P9BE-NEXT:    mulli r5, r5, 23
+; P9BE-NEXT:    sub r4, r4, r5
+; P9BE-NEXT:    slwi r4, r4, 16
+; P9BE-NEXT:    or r3, r4, r3
 ; P9BE-NEXT:    lis r4, 100
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; P9BE-NEXT:    ori r4, r4, 13629
-; P9BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; P9BE-NEXT:    lxv vs2, 0(r3)
+; P9BE-NEXT:    mtvsrwz v3, r3
 ; P9BE-NEXT:    li r3, 2
+; P9BE-NEXT:    ori r4, r4, 13629
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    xxperm vs0, vs1, vs2
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    mulli r4, r4, 654
 ; P9BE-NEXT:    sub r3, r3, r4
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    mtfprwz f3, r3
-; P9BE-NEXT:    xxperm vs1, vs3, vs2
-; P9BE-NEXT:    xxmrghw v2, vs1, vs0
+; P9BE-NEXT:    mtvsrwz v2, r3
+; P9BE-NEXT:    vmrgow v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_urem_one:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    li r4, 0
-; P8LE-NEXT:    lis r5, 100
-; P8LE-NEXT:    lis r6, 2849
+; P8LE-NEXT:    lis r5, 2849
+; P8LE-NEXT:    lis r7, 12
 ; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    mtvsrd v2, r4
-; P8LE-NEXT:    ori r4, r5, 13629
-; P8LE-NEXT:    ori r5, r6, 25645
-; P8LE-NEXT:    rldicl r6, r3, 48, 48
-; P8LE-NEXT:    clrlwi r6, r6, 16
-; P8LE-NEXT:    mulhwu r4, r6, r4
-; P8LE-NEXT:    mulli r4, r4, 654
-; P8LE-NEXT:    sub r4, r6, r4
-; P8LE-NEXT:    rldicl r6, r3, 32, 48
-; P8LE-NEXT:    rldicl r3, r3, 16, 48
-; P8LE-NEXT:    mtvsrd v3, r4
-; P8LE-NEXT:    lis r4, 12
-; P8LE-NEXT:    clrlwi r6, r6, 16
-; P8LE-NEXT:    clrlwi r3, r3, 16
-; P8LE-NEXT:    ori r4, r4, 5560
-; P8LE-NEXT:    mulhwu r5, r6, r5
-; P8LE-NEXT:    mulhwu r4, r3, r4
+; P8LE-NEXT:    ori r5, r5, 25645
+; P8LE-NEXT:    ori r7, r7, 5560
+; P8LE-NEXT:    rldicl r4, r3, 32, 48
+; P8LE-NEXT:    clrlwi r4, r4, 16
+; P8LE-NEXT:    mulhwu r5, r4, r5
 ; P8LE-NEXT:    mulli r5, r5, 23
-; P8LE-NEXT:    mulli r4, r4, 5423
-; P8LE-NEXT:    sub r5, r6, r5
+; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    rldicl r5, r3, 16, 48
+; P8LE-NEXT:    rldicl r3, r3, 48, 48
+; P8LE-NEXT:    clrlwi r6, r5, 16
+; P8LE-NEXT:    mulhwu r6, r6, r7
+; P8LE-NEXT:    mulli r6, r6, 5423
+; P8LE-NEXT:    sub r5, r5, r6
+; P8LE-NEXT:    slwi r5, r5, 16
+; P8LE-NEXT:    or r4, r5, r4
+; P8LE-NEXT:    lis r5, 100
+; P8LE-NEXT:    mtfprwz f0, r4
+; P8LE-NEXT:    clrlwi r4, r3, 16
+; P8LE-NEXT:    ori r5, r5, 13629
+; P8LE-NEXT:    mulhwu r4, r4, r5
+; P8LE-NEXT:    mulli r4, r4, 654
 ; P8LE-NEXT:    sub r3, r3, r4
-; P8LE-NEXT:    mtvsrd v4, r3
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mtvsrd v3, r5
-; P8LE-NEXT:    vmrghh v3, v4, v3
-; P8LE-NEXT:    xxmrglw v2, v3, v2
+; P8LE-NEXT:    slwi r3, r3, 16
+; P8LE-NEXT:    mtfprwz f1, r3
+; P8LE-NEXT:    xxmrghw v2, vs0, vs1
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_urem_one:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    addis r6, r2, .LCPI4_0 at toc@ha
-; P8BE-NEXT:    lis r4, 12
-; P8BE-NEXT:    lis r5, 2849
-; P8BE-NEXT:    addi r6, r6, .LCPI4_0 at toc@l
-; P8BE-NEXT:    ori r4, r4, 5560
-; P8BE-NEXT:    ori r5, r5, 25645
-; P8BE-NEXT:    lxvw4x v2, 0, r6
-; P8BE-NEXT:    clrldi r6, r3, 48
-; P8BE-NEXT:    clrlwi r6, r6, 16
-; P8BE-NEXT:    mulhwu r4, r6, r4
-; P8BE-NEXT:    mulli r4, r4, 5423
-; P8BE-NEXT:    sub r4, r6, r4
-; P8BE-NEXT:    rldicl r6, r3, 48, 48
+; P8BE-NEXT:    lis r5, 12
+; P8BE-NEXT:    lis r7, 2849
+; P8BE-NEXT:    ori r5, r5, 5560
+; P8BE-NEXT:    ori r7, r7, 25645
+; P8BE-NEXT:    clrldi r4, r3, 48
+; P8BE-NEXT:    clrlwi r4, r4, 16
+; P8BE-NEXT:    mulhwu r5, r4, r5
+; P8BE-NEXT:    mulli r5, r5, 5423
+; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    rldicl r5, r3, 48, 48
 ; P8BE-NEXT:    rldicl r3, r3, 32, 48
-; P8BE-NEXT:    clrlwi r6, r6, 16
+; P8BE-NEXT:    clrlwi r6, r5, 16
 ; P8BE-NEXT:    clrlwi r3, r3, 16
-; P8BE-NEXT:    mtvsrwz v3, r4
-; P8BE-NEXT:    mulhwu r5, r6, r5
-; P8BE-NEXT:    mulli r5, r5, 23
-; P8BE-NEXT:    sub r5, r6, r5
-; P8BE-NEXT:    lis r6, 100
-; P8BE-NEXT:    ori r6, r6, 13629
-; P8BE-NEXT:    mtvsrwz v4, r5
-; P8BE-NEXT:    mulhwu r6, r3, r6
-; P8BE-NEXT:    mulli r6, r6, 654
-; P8BE-NEXT:    sub r3, r3, r6
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    mtvsrwz v4, r3
-; P8BE-NEXT:    li r3, 0
-; P8BE-NEXT:    mtvsrwz v5, r3
-; P8BE-NEXT:    vperm v2, v5, v4, v2
-; P8BE-NEXT:    xxmrghw v2, v2, v3
+; P8BE-NEXT:    mulhwu r6, r6, r7
+; P8BE-NEXT:    mulli r6, r6, 23
+; P8BE-NEXT:    sub r5, r5, r6
+; P8BE-NEXT:    slwi r5, r5, 16
+; P8BE-NEXT:    or r4, r5, r4
+; P8BE-NEXT:    mtvsrwz v2, r4
+; P8BE-NEXT:    lis r4, 100
+; P8BE-NEXT:    ori r4, r4, 13629
+; P8BE-NEXT:    mulhwu r4, r3, r4
+; P8BE-NEXT:    mulli r4, r4, 654
+; P8BE-NEXT:    sub r3, r3, r4
+; P8BE-NEXT:    mtvsrwz v3, r3
+; P8BE-NEXT:    vmrgow v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
   ret <4 x i16> %1
diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 11cc8abd2c7fa3d..6fdc867936a00c0 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -28,80 +28,51 @@
 define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    lbz r3, 0(r3)
 ; CHECK-LE-P8-NEXT:    lbz r4, 0(r4)
-; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
-; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-LE-P8-NEXT:    lbz r3, 0(r3)
+; CHECK-LE-P8-NEXT:    slwi r4, r4, 16
+; CHECK-LE-P8-NEXT:    or r3, r4, r3
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lxsibzx v2, 0, r3
-; CHECK-LE-P9-NEXT:    lxsibzx v3, 0, r4
-; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-LE-P9-NEXT:    lbz r4, 0(r4)
+; CHECK-LE-P9-NEXT:    lbz r3, 0(r3)
+; CHECK-LE-P9-NEXT:    slwi r4, r4, 16
+; CHECK-LE-P9-NEXT:    or r3, r4, r3
+; CHECK-LE-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-P8-NEXT:    lbz r3, 0(r3)
-; CHECK-BE-P8-NEXT:    lbz r4, 0(r4)
-; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI0_0 at toc@l
-; CHECK-BE-P8-NEXT:    mtvsrwz v3, r4
-; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r5
-; CHECK-BE-P8-NEXT:    vperm v2, v4, v3, v2
+; CHECK-BE-P8-NEXT:    xxleqv v2, v2, v2
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-P9-NEXT:    lxsibzx v2, 0, r4
-; CHECK-BE-P9-NEXT:    lxsibzx f1, 0, r3
-; CHECK-BE-P9-NEXT:    addi r5, r5, .LCPI0_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv vs0, 0(r5)
-; CHECK-BE-P9-NEXT:    xxperm v2, vs1, vs0
+; CHECK-BE-P9-NEXT:    xxleqv v2, v2, v2
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lbz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lbz r4, 0(r4)
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C0(r2) # %const.0
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r5
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v3, v2
+; CHECK-AIX-64-P8-NEXT:    xxleqv v2, v2, v2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    ld r5, L..C0(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxsibzx v2, 0, r4
-; CHECK-AIX-64-P9-NEXT:    lxsibzx f1, 0, r3
-; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r5)
-; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT:    xxleqv v2, v2, v2
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lbz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lbz r4, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C0(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
-; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r4
-; CHECK-AIX-32-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v3, v2
+; CHECK-AIX-32-P8-NEXT:    xxleqv v2, v2, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    lwz r5, L..C0(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r4
-; CHECK-AIX-32-P9-NEXT:    lxsibzx f1, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r5)
-; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT:    xxleqv v2, v2, v2
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %0 = load <1 x i8>, ptr %a, align 4
@@ -150,7 +121,7 @@ define <16 x i8> @test_v16i8_none(<16 x i8> %a, i8 %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C1(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r4, L..C0(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
@@ -164,7 +135,7 @@ define <16 x i8> @test_v16i8_none(<16 x i8> %a, i8 %b) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C1(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C0(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
@@ -225,7 +196,7 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-AIX-64-P8-LABEL: test_none_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C1(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
@@ -234,7 +205,7 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-AIX-64-P9-LABEL: test_none_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C1(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C0(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
@@ -472,7 +443,7 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C2(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
@@ -481,7 +452,7 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C1(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
@@ -545,7 +516,7 @@ define <8 x i16> @test_v8i16_none(<8 x i16> %a, i16 %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r4, L..C3(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
@@ -560,7 +531,7 @@ define <8 x i16> @test_v8i16_none(<8 x i16> %a, i16 %b) {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C1(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
@@ -781,9 +752,9 @@ define <4 x i32> @test_none_v4i32(<4 x i32> %a, i64 %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C5(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r4, L..C4(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C6(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C5(r2) # %const.1
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
@@ -799,12 +770,12 @@ define <4 x i32> @test_none_v4i32(<4 x i32> %a, i64 %b) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C2(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C4(r2) # %const.1
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.1
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
@@ -875,7 +846,7 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lbzx r4, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C6(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vspltb v2, v2, 7
@@ -885,7 +856,7 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxsibzx v2, 0, r4
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    vspltb v2, v2, 7
@@ -896,7 +867,7 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lbzx r4, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C5(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C4(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
@@ -906,7 +877,7 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-32-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C1(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C0(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    vspltb v2, v2, 7
@@ -1146,7 +1117,7 @@ define dso_local <16 x i8> @test_1_2(ptr nocapture noundef readonly %a, ptr noca
 ; CHECK-AIX-32-P8-NEXT:    lbzx r3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C6(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C5(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
@@ -1155,7 +1126,7 @@ define dso_local <16 x i8> @test_1_2(ptr nocapture noundef readonly %a, ptr noca
 ; CHECK-AIX-32-P9-LABEL: test_1_2:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C1(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    vspltb v3, v2, 7
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r3)
@@ -1238,7 +1209,7 @@ define <16 x i8> @test_none_v2i64(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-32-P8-NEXT:    lbzx r3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C7(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C6(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
@@ -1247,7 +1218,7 @@ define <16 x i8> @test_none_v2i64(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C3(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C2(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    vspltb v3, v2, 7
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r3)
@@ -1358,74 +1329,64 @@ entry:
 define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
-; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-LE-P8-NEXT:    slwi r4, r4, 16
+; CHECK-LE-P8-NEXT:    or r3, r4, r3
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
-; CHECK-LE-P9-NEXT:    mtvsrd v3, r4
-; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-LE-P9-NEXT:    slwi r4, r4, 16
+; CHECK-LE-P9-NEXT:    or r3, r4, r3
+; CHECK-LE-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI16_0 at toc@ha
-; CHECK-BE-P8-NEXT:    mtvsrwz v3, r4
-; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI16_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r5
-; CHECK-BE-P8-NEXT:    vperm v2, v4, v3, v2
+; CHECK-BE-P8-NEXT:    slwi r3, r3, 16
+; CHECK-BE-P8-NEXT:    or r3, r3, r4
+; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
+; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    addis r5, r2, .LCPI16_0 at toc@ha
-; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
-; CHECK-BE-P9-NEXT:    mtfprwz f1, r3
-; CHECK-BE-P9-NEXT:    addi r5, r5, .LCPI16_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv vs0, 0(r5)
-; CHECK-BE-P9-NEXT:    xxperm v2, vs1, vs0
+; CHECK-BE-P9-NEXT:    slwi r3, r3, 16
+; CHECK-BE-P9-NEXT:    or r3, r3, r4
+; CHECK-BE-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C8(r2) # %const.0
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r5
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v3, v2
+; CHECK-AIX-64-P8-NEXT:    slwi r3, r3, 16
+; CHECK-AIX-64-P8-NEXT:    or r3, r3, r4
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
+; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    ld r5, L..C4(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
-; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r3
-; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r5)
-; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT:    slwi r3, r3, 16
+; CHECK-AIX-64-P9-NEXT:    or r3, r3, r4
+; CHECK-AIX-64-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT:    slwi r3, r3, 16
+; CHECK-AIX-32-P8-NEXT:    or r3, r3, r4
+; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT:    slwi r3, r3, 16
+; CHECK-AIX-32-P9-NEXT:    or r3, r3, r4
+; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
 ; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT:    lxv v3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P9-NEXT:    blr
 entry:
   %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
@@ -1792,7 +1753,7 @@ define <16 x i8> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C7(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
@@ -1802,7 +1763,7 @@ define <16 x i8> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P9:       # %bb.0: # %entry
 ; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, 0, r3
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C4(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C3(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
@@ -2026,7 +1987,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C9(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C7(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
@@ -2036,7 +1997,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    xxsldwi vs0, f0, f0, 1
 ; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
@@ -2054,7 +2015,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C8(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw v3, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
@@ -2068,7 +2029,7 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
 ; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C4(r2) # %const.0
 ; CHECK-AIX-32-P9-NEXT:    lxv vs2, -32(r1)
 ; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs2, vs1
 ; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
diff --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll
index 628c5101c079652..8b5af689a525eea 100644
--- a/llvm/test/CodeGen/PowerPC/vec-promote.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll
@@ -176,27 +176,26 @@ entry:
 define noundef <8 x i16> @vec_promote_short_zeroed(ptr nocapture noundef readonly %p) {
 ; CHECK-BE-LABEL: vec_promote_short_zeroed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
 ; CHECK-BE-NEXT:    lhz 3, 0(3)
-; CHECK-BE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
-; CHECK-BE-NEXT:    mtvsrwz 36, 3
-; CHECK-BE-NEXT:    lxvw4x 34, 0, 4
 ; CHECK-BE-NEXT:    li 4, 0
-; CHECK-BE-NEXT:    mtvsrwz 35, 4
-; CHECK-BE-NEXT:    vperm 2, 4, 3, 2
+; CHECK-BE-NEXT:    li 5, 0
+; CHECK-BE-NEXT:    rldimi 4, 4, 32, 0
+; CHECK-BE-NEXT:    slwi 3, 3, 16
+; CHECK-BE-NEXT:    mtfprd 1, 4
+; CHECK-BE-NEXT:    rldimi 5, 3, 32, 0
+; CHECK-BE-NEXT:    mtfprd 0, 5
+; CHECK-BE-NEXT:    xxmrghd 34, 0, 1
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: vec_promote_short_zeroed:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
 ; CHECK-LE-NEXT:    lhz 3, 0(3)
-; CHECK-LE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
-; CHECK-LE-NEXT:    mtvsrd 36, 3
-; CHECK-LE-NEXT:    lxvd2x 0, 0, 4
 ; CHECK-LE-NEXT:    li 4, 0
-; CHECK-LE-NEXT:    mtvsrd 35, 4
-; CHECK-LE-NEXT:    xxswapd 34, 0
-; CHECK-LE-NEXT:    vperm 2, 3, 4, 2
+; CHECK-LE-NEXT:    rldimi 3, 4, 32, 0
+; CHECK-LE-NEXT:    rldimi 4, 4, 32, 0
+; CHECK-LE-NEXT:    mtfprd 0, 3
+; CHECK-LE-NEXT:    mtfprd 1, 4
+; CHECK-LE-NEXT:    xxmrghd 34, 1, 0
 ; CHECK-LE-NEXT:    blr
 entry:
   %0 = load i16, ptr %p, align 2
@@ -227,27 +226,26 @@ entry:
 define noundef <16 x i8> @vec_promote_char_zeroed(ptr nocapture noundef readonly %p) {
 ; CHECK-BE-LABEL: vec_promote_char_zeroed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis 4, 2, .LCPI10_0 at toc@ha
 ; CHECK-BE-NEXT:    lbz 3, 0(3)
-; CHECK-BE-NEXT:    addi 4, 4, .LCPI10_0 at toc@l
-; CHECK-BE-NEXT:    mtvsrwz 36, 3
-; CHECK-BE-NEXT:    lxvw4x 34, 0, 4
 ; CHECK-BE-NEXT:    li 4, 0
-; CHECK-BE-NEXT:    mtvsrwz 35, 4
-; CHECK-BE-NEXT:    vperm 2, 4, 3, 2
+; CHECK-BE-NEXT:    li 5, 0
+; CHECK-BE-NEXT:    rldimi 4, 4, 32, 0
+; CHECK-BE-NEXT:    slwi 3, 3, 24
+; CHECK-BE-NEXT:    mtfprd 1, 4
+; CHECK-BE-NEXT:    rldimi 5, 3, 32, 0
+; CHECK-BE-NEXT:    mtfprd 0, 5
+; CHECK-BE-NEXT:    xxmrghd 34, 0, 1
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: vec_promote_char_zeroed:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    addis 4, 2, .LCPI10_0 at toc@ha
 ; CHECK-LE-NEXT:    lbz 3, 0(3)
-; CHECK-LE-NEXT:    addi 4, 4, .LCPI10_0 at toc@l
-; CHECK-LE-NEXT:    mtvsrd 36, 3
-; CHECK-LE-NEXT:    lxvd2x 0, 0, 4
 ; CHECK-LE-NEXT:    li 4, 0
-; CHECK-LE-NEXT:    mtvsrd 35, 4
-; CHECK-LE-NEXT:    xxswapd 34, 0
-; CHECK-LE-NEXT:    vperm 2, 3, 4, 2
+; CHECK-LE-NEXT:    rldimi 3, 4, 32, 0
+; CHECK-LE-NEXT:    rldimi 4, 4, 32, 0
+; CHECK-LE-NEXT:    mtfprd 0, 3
+; CHECK-LE-NEXT:    mtfprd 1, 4
+; CHECK-LE-NEXT:    xxmrghd 34, 1, 0
 ; CHECK-LE-NEXT:    blr
 entry:
   %0 = load i8, ptr %p, align 1
diff --git a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
index 7a09d5a5e8bb2e2..72755ef5648294f 100644
--- a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
@@ -117,55 +117,45 @@ ret <4 x i16> %v2
 define dso_local <8 x i16> @test8x24(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) {
 ; CHECK-LABEL: test8x24:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    mtvsrd v2, r3
-; CHECK-NEXT:    mtvsrd v3, r4
-; CHECK-NEXT:    mtvsrd v4, r5
-; CHECK-NEXT:    mtvsrd v5, r6
-; CHECK-NEXT:    mtvsrd v0, r7
-; CHECK-NEXT:    vmrghh v2, v3, v2
-; CHECK-NEXT:    mtvsrd v3, r8
-; CHECK-NEXT:    vmrghh v4, v5, v4
-; CHECK-NEXT:    mtvsrd v5, r9
-; CHECK-NEXT:    xxmrglw vs0, v4, v2
-; CHECK-NEXT:    vmrghh v3, v3, v0
-; CHECK-NEXT:    mtvsrd v0, r10
-; CHECK-NEXT:    vmrghh v5, v0, v5
-; CHECK-NEXT:    xxmrglw vs1, v5, v3
-; CHECK-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-NEXT:    slwi r6, r6, 16
+; CHECK-NEXT:    slwi r4, r4, 16
+; CHECK-NEXT:    or r5, r6, r5
+; CHECK-NEXT:    or r3, r4, r3
+; CHECK-NEXT:    slwi r4, r8, 16
+; CHECK-NEXT:    rldimi r3, r5, 32, 0
+; CHECK-NEXT:    or r4, r4, r7
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    slwi r3, r10, 16
+; CHECK-NEXT:    or r3, r3, r9
+; CHECK-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-NEXT:    mtfprd f1, r4
+; CHECK-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8x24:
 ; CHECK-BE:       # %bb.0:
-; CHECK-BE-NEXT:    sth r10, -16(r1)
-; CHECK-BE-NEXT:    sth r9, -32(r1)
-; CHECK-BE-NEXT:    sth r8, -48(r1)
-; CHECK-BE-NEXT:    sth r7, -64(r1)
-; CHECK-BE-NEXT:    sth r6, -80(r1)
-; CHECK-BE-NEXT:    sth r5, -96(r1)
-; CHECK-BE-NEXT:    sth r4, -112(r1)
-; CHECK-BE-NEXT:    sth r3, -128(r1)
+; CHECK-BE-NEXT:    slwi r9, r9, 16
+; CHECK-BE-NEXT:    slwi r7, r7, 16
+; CHECK-BE-NEXT:    slwi r5, r5, 16
+; CHECK-BE-NEXT:    slwi r3, r3, 16
+; CHECK-BE-NEXT:    or r9, r9, r10
+; CHECK-BE-NEXT:    or r7, r7, r8
+; CHECK-BE-NEXT:    or r5, r5, r6
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    stw r9, -16(r1)
+; CHECK-BE-NEXT:    stw r7, -32(r1)
+; CHECK-BE-NEXT:    stw r5, -48(r1)
+; CHECK-BE-NEXT:    stw r3, -64(r1)
 ; CHECK-BE-NEXT:    addi r3, r1, -16
-; CHECK-BE-NEXT:    lxvw4x v2, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -32
-; CHECK-BE-NEXT:    lxvw4x v3, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -48
-; CHECK-BE-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs2, 0, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -64
-; CHECK-BE-NEXT:    lxvw4x v5, 0, r3
-; CHECK-BE-NEXT:    addi r3, r1, -80
-; CHECK-BE-NEXT:    lxvw4x v0, 0, r3
-; CHECK-BE-NEXT:    addi r3, r1, -96
-; CHECK-BE-NEXT:    lxvw4x v1, 0, r3
-; CHECK-BE-NEXT:    addi r3, r1, -112
-; CHECK-BE-NEXT:    lxvw4x v6, 0, r3
-; CHECK-BE-NEXT:    addi r3, r1, -128
-; CHECK-BE-NEXT:    lxvw4x v7, 0, r3
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v5, v4
-; CHECK-BE-NEXT:    vmrghh v4, v1, v0
-; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-BE-NEXT:    vmrghh v5, v7, v6
-; CHECK-BE-NEXT:    xxmrghw vs1, v5, v4
+; CHECK-BE-NEXT:    lxvw4x vs3, 0, r3
+; CHECK-BE-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghw vs1, vs3, vs2
 ; CHECK-BE-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-BE-NEXT:    blr
 %i11 = trunc i32 %i1 to i24
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
index cc38f921b117bff..f7e9c7ed8e1ccf6 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
@@ -16,16 +16,12 @@ define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghh v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvspdpn f0, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    rlwimi r3, r4, 16, 0, 15
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
@@ -33,37 +29,26 @@ define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    rlwimi r3, r4, 16, 0, 15
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtfprd f0, r3
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    xscvspdpn f2, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xxperm v2, vs2, vs1
-; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    rlwimi r3, r4, 16, 0, 15
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <2 x float>
@@ -75,87 +60,78 @@ entry:
 define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    xscvspdpn f0, vs1
+; CHECK-P8-NEXT:    xscvspdpn f1, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 0, 15
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghh v3, v4, v3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    vmrghh v2, v2, v4
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    mtfprwz f0, r4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P8-NEXT:    mtfprwz f1, r4
+; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xscvspdpn f1, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    xscvspdpn f0, v2
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v3, v4, v3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    vmrghh v2, v4, v2
-; CHECK-P9-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 1
+; CHECK-P9-NEXT:    mtfprwz f0, r4
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P9-NEXT:    mtfprwz f1, r4
+; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-BE-NEXT:    xxswapd vs2, v2
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    xxsldwi vs3, v2, v2, 1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    xxperm vs1, vs2, vs0
-; CHECK-BE-NEXT:    xscvspdpn f2, v2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    xxperm vs3, vs2, vs0
-; CHECK-BE-NEXT:    xxmrghw vs0, vs3, vs1
-; CHECK-BE-NEXT:    mffprd r3, f0
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xscvspdpn f0, v2
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-BE-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-NEXT:    vmrgow v2, v2, v3
+; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = fptoui <4 x float> %a to <4 x i16>
@@ -166,159 +142,134 @@ entry:
 define <8 x i16> @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvspdpn f0, vs2
-; CHECK-P8-NEXT:    xxswapd v2, vs2
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f6, v2
+; CHECK-P8-NEXT:    xxsldwi vs4, v3, v3, 1
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f6, f6
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f0, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, v3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, vs4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
-; CHECK-P8-NEXT:    vmrghh v4, v4, v5
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs5
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    vmrghh v3, v3, v5
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghh v5, v5, v0
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v4
-; CHECK-P8-NEXT:    vmrghh v2, v2, v0
-; CHECK-P8-NEXT:    xxmrglw vs1, v2, v5
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    rlwimi r3, r5, 16, 0, 15
+; CHECK-P8-NEXT:    rldimi r3, r4, 32, 0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r5, f1
+; CHECK-P8-NEXT:    rlwimi r5, r3, 16, 0, 15
+; CHECK-P8-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-P8-NEXT:    mtfprd f1, r5
+; CHECK-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxsldwi vs2, vs0, vs0, 3
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs1, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs0
-; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    rlwimi r5, r3, 16, 0, 15
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r6, f0
+; CHECK-P9-NEXT:    rlwimi r6, r3, 16, 0, 15
+; CHECK-P9-NEXT:    rldimi r6, r4, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd v2, r6, r5
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs4, vs1
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r3
-; CHECK-BE-NEXT:    xxperm vs3, vs4, vs2
-; CHECK-BE-NEXT:    xscvspdpn f4, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 3
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    rlwimi r5, r3, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r3
+; CHECK-BE-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm vs1, vs4, vs2
-; CHECK-BE-NEXT:    xxswapd vs4, vs0
-; CHECK-BE-NEXT:    xxmrghw vs1, vs1, vs3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs0, vs0, 3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r3
-; CHECK-BE-NEXT:    xxperm vs3, vs4, vs2
-; CHECK-BE-NEXT:    xscvspdpn f4, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtfprwz f0, r3
-; CHECK-BE-NEXT:    xxperm vs0, vs4, vs2
-; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs3
-; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r6, f0
+; CHECK-BE-NEXT:    rlwimi r6, r3, 16, 0, 15
+; CHECK-BE-NEXT:    rldimi r6, r4, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd v2, r6, r5
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, ptr %0, align 32
@@ -329,309 +280,262 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
+; CHECK-P8-NEXT:    li r5, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r5
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs6, r4, r6
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs8, r4, r6
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, v3
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xxsldwi vs2, v3, v3, 3
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvspdpn f2, vs3
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xxswapd v5, vs6
-; CHECK-P8-NEXT:    xxswapd v4, vs8
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
-; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xxsldwi vs10, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs11, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs4
-; CHECK-P8-NEXT:    xxsldwi vs7, v5, v5, 3
-; CHECK-P8-NEXT:    xxsldwi vs9, v5, v5, 1
-; CHECK-P8-NEXT:    xxsldwi vs10, v4, v4, 3
-; CHECK-P8-NEXT:    xxsldwi vs11, v4, v4, 1
-; CHECK-P8-NEXT:    mtvsrd v3, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xxswapd v4, vs3
+; CHECK-P8-NEXT:    xxsldwi vs4, v5, v5, 1
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 3
+; CHECK-P8-NEXT:    mffprwz r6, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, v5
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r7, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, v4
+; CHECK-P8-NEXT:    xxsldwi vs6, v4, v4, 1
+; CHECK-P8-NEXT:    xxsldwi vs7, v4, v4, 3
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    vmrghh v0, v0, v1
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v3, v3, v1
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs5
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs6
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    vmrghh v1, v1, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r8, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs4
+; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 3
+; CHECK-P8-NEXT:    xxsldwi vs8, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, vs8
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r9, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs5
+; CHECK-P8-NEXT:    rlwimi r9, r7, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r7, f3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r10, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs6
+; CHECK-P8-NEXT:    rlwimi r10, r4, 16, 0, 15
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    rldimi r10, r9, 32, 0
+; CHECK-P8-NEXT:    mffprwz r11, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs7
+; CHECK-P8-NEXT:    rlwimi r11, r8, 16, 0, 15
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, v3
+; CHECK-P8-NEXT:    rlwimi r4, r6, 16, 0, 15
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    rldimi r4, r11, 32, 0
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v5
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v2, v2, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs9
-; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    mffprwz r6, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs9
+; CHECK-P8-NEXT:    rlwimi r7, r6, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r6, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, v2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs10
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v6, v6, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs8
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xxmrglw vs1, v2, v1
-; CHECK-P8-NEXT:    vmrghh v5, v5, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs11
-; CHECK-P8-NEXT:    xxmrglw vs2, v5, v6
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    vmrghh v7, v8, v7
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v0
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    vmrghh v4, v4, v8
-; CHECK-P8-NEXT:    xxmrglw vs3, v4, v7
-; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT:    xxswapd vs0, v3
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    mffprwz r8, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs10
+; CHECK-P8-NEXT:    rlwimi r8, r6, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r6, f1
+; CHECK-P8-NEXT:    mtfprd f1, r10
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    rldimi r8, r7, 32, 0
+; CHECK-P8-NEXT:    mtfprd f0, r8
+; CHECK-P8-NEXT:    mffprwz r12, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs11
+; CHECK-P8-NEXT:    rlwimi r12, r6, 16, 0, 15
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r6, f2
+; CHECK-P8-NEXT:    rlwimi r6, r4, 16, 0, 15
+; CHECK-P8-NEXT:    rldimi r6, r12, 32, 0
+; CHECK-P8-NEXT:    mtfprd f2, r6
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs1, 0(r4)
-; CHECK-P9-NEXT:    lxv vs0, 16(r4)
-; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    xscvspdpn f4, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    lxv vs1, 32(r4)
+; CHECK-P9-NEXT:    xxsldwi vs4, vs2, vs2, 1
+; CHECK-P9-NEXT:    xscvspdpn f3, vs2
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    lxv vs3, 16(r4)
+; CHECK-P9-NEXT:    mffprwz r4, f4
+; CHECK-P9-NEXT:    xxswapd vs4, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 3
+; CHECK-P9-NEXT:    rlwimi r4, r5, 16, 0, 15
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs0
-; CHECK-P9-NEXT:    mtvsrd v2, r5
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f4
+; CHECK-P9-NEXT:    xxsldwi vs2, vs3, vs3, 1
+; CHECK-P9-NEXT:    mffprwz r6, f4
+; CHECK-P9-NEXT:    xscvspdpn f4, vs3
+; CHECK-P9-NEXT:    rlwimi r5, r6, 16, 0, 15
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r6, f4
+; CHECK-P9-NEXT:    xxswapd vs4, vs1
+; CHECK-P9-NEXT:    mffprwz r7, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs3
+; CHECK-P9-NEXT:    xxsldwi vs3, vs3, vs3, 3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    rlwimi r7, r6, 16, 0, 15
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs5
-; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    mffprwz r5, f1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mtvsrd v4, r5
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r5, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r5
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    mtvsrd v5, r5
-; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v0, r5
-; CHECK-P9-NEXT:    mffprwz r5, f0
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
-; CHECK-P9-NEXT:    vmrghh v5, v0, v5
-; CHECK-P9-NEXT:    mtvsrd v0, r5
-; CHECK-P9-NEXT:    vmrghh v4, v4, v0
-; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd vs3, vs0
-; CHECK-P9-NEXT:    xscvspdpn f4, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    mffprwz r6, f2
+; CHECK-P9-NEXT:    mffprwz r8, f3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs1
+; CHECK-P9-NEXT:    xxsldwi vs3, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 3
+; CHECK-P9-NEXT:    rlwimi r8, r6, 16, 0, 15
+; CHECK-P9-NEXT:    mffprwz r6, f4
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    rldimi r8, r7, 32, 0
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r5, f1
-; CHECK-P9-NEXT:    lxv vs1, 48(r4)
-; CHECK-P9-NEXT:    mffprwz r4, f4
-; CHECK-P9-NEXT:    mtvsrd v2, r5
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    xxmrglw vs3, v4, v5
-; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    xxmrgld vs2, vs3, vs2
-; CHECK-P9-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    mtvsrd v4, r4
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    mtvsrdd vs2, r8, r5
 ; CHECK-P9-NEXT:    stxv vs2, 0(r3)
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    mtvsrd v2, r4
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    mffprwz r7, f1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    rlwimi r5, r4, 16, 0, 15
+; CHECK-P9-NEXT:    rlwimi r7, r6, 16, 0, 15
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    rldimi r7, r5, 32, 0
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; CHECK-P9-NEXT:    rlwimi r5, r4, 16, 0, 15
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs1, v3, v2
-; CHECK-P9-NEXT:    xxmrgld vs0, vs1, vs0
+; CHECK-P9-NEXT:    mffprwz r6, f0
+; CHECK-P9-NEXT:    rlwimi r6, r4, 16, 0, 15
+; CHECK-P9-NEXT:    rldimi r6, r5, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd vs0, r6, r7
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs3, 16(r4)
-; CHECK-BE-NEXT:    lxv vs2, 0(r4)
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs0, 32(r4)
 ; CHECK-BE-NEXT:    lxv vs1, 48(r4)
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs0, 0(r5)
-; CHECK-BE-NEXT:    xscvspdpn f6, vs3
-; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT:    xscvspdpn f9, vs2
-; CHECK-BE-NEXT:    xxswapd vs5, vs3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
-; CHECK-BE-NEXT:    xxsldwi vs7, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxswapd vs8, vs2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-BE-NEXT:    xxsldwi vs10, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs11, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    xxsldwi vs4, vs2, vs2, 1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f9, f9
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvspdpn f8, vs8
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f10, vs10
-; CHECK-BE-NEXT:    xscvspdpn f11, vs11
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    xscvdpsxws f8, f8
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f10, f10
-; CHECK-BE-NEXT:    xscvdpsxws f11, f11
-; CHECK-BE-NEXT:    mffprwz r5, f6
-; CHECK-BE-NEXT:    mtfprwz f6, r5
-; CHECK-BE-NEXT:    mffprwz r5, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r5
-; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r5
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xxperm vs4, vs5, vs0
-; CHECK-BE-NEXT:    xscvspdpn f5, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtfprwz f3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f7
-; CHECK-BE-NEXT:    mtfprwz f7, r5
-; CHECK-BE-NEXT:    mffprwz r5, f8
-; CHECK-BE-NEXT:    xxperm vs3, vs6, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    mtfprwz f8, r5
+; CHECK-BE-NEXT:    lxv vs3, 0(r4)
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 3
+; CHECK-BE-NEXT:    rlwimi r4, r5, 16, 0, 15
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xxmrghw vs3, vs3, vs4
-; CHECK-BE-NEXT:    lxv vs4, 32(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtfprwz f2, r5
-; CHECK-BE-NEXT:    xxperm vs7, vs8, vs0
-; CHECK-BE-NEXT:    mffprwz r5, f10
-; CHECK-BE-NEXT:    xxperm vs2, vs9, vs0
-; CHECK-BE-NEXT:    mtfprwz f10, r5
-; CHECK-BE-NEXT:    mffprwz r5, f11
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    mtfprwz f11, r5
-; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs7
-; CHECK-BE-NEXT:    mtfprwz f5, r4
-; CHECK-BE-NEXT:    xxperm vs10, vs11, vs0
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs4, vs4, 3
-; CHECK-BE-NEXT:    mtfprwz f1, r4
-; CHECK-BE-NEXT:    xxperm vs1, vs5, vs0
-; CHECK-BE-NEXT:    xxswapd vs5, vs4
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    stxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xxmrghw vs1, vs1, vs10
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r4
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r4
-; CHECK-BE-NEXT:    xxperm vs3, vs5, vs0
-; CHECK-BE-NEXT:    xscvspdpn f5, vs4
-; CHECK-BE-NEXT:    xxsldwi vs4, vs4, vs4, 1
+; CHECK-BE-NEXT:    xxsldwi vs2, vs3, vs3, 1
+; CHECK-BE-NEXT:    mffprwz r6, f4
+; CHECK-BE-NEXT:    xscvspdpn f4, vs3
+; CHECK-BE-NEXT:    rlwimi r5, r6, 16, 0, 15
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r6, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs1
+; CHECK-BE-NEXT:    mffprwz r7, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 3
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    rlwimi r7, r6, 16, 0, 15
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r4
-; CHECK-BE-NEXT:    mffprwz r4, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r4
-; CHECK-BE-NEXT:    xxperm vs4, vs5, vs0
-; CHECK-BE-NEXT:    xxmrghw vs0, vs4, vs3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r6, f2
+; CHECK-BE-NEXT:    mffprwz r8, f3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 3
+; CHECK-BE-NEXT:    rlwimi r8, r6, 16, 0, 15
+; CHECK-BE-NEXT:    mffprwz r6, f4
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    rldimi r8, r7, 32, 0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    mtvsrdd vs2, r8, r5
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    mffprwz r7, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    rlwimi r5, r4, 16, 0, 15
+; CHECK-BE-NEXT:    rlwimi r7, r6, 16, 0, 15
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    rldimi r7, r5, 32, 0
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; CHECK-BE-NEXT:    rlwimi r5, r4, 16, 0, 15
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r6, f0
+; CHECK-BE-NEXT:    rlwimi r6, r4, 16, 0, 15
+; CHECK-BE-NEXT:    rldimi r6, r5, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd vs0, r6, r7
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -648,16 +552,13 @@ define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    slwi r4, r4, 16
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghh v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    or r3, r4, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
@@ -669,33 +570,24 @@ define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    slwi r4, r4, 16
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    or r3, r4, r3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtfprd f0, r3
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-NEXT:    xscvspdpn f2, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xxperm v2, vs2, vs1
-; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    or r3, r4, r3
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <2 x float>
@@ -707,87 +599,84 @@ entry:
 define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghh v3, v4, v3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    vmrghh v2, v2, v4
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, v2
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r3, r4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mtfprwz f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r3, r4, r3
+; CHECK-P8-NEXT:    mtfprwz f0, r3
+; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xscvspdpn f0, v2
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v3, v4, v3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    vmrghh v2, v4, v2
-; CHECK-P9-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    or r3, r4, r3
+; CHECK-P9-NEXT:    mtfprwz f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    xxswapd vs1, v2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    or r3, r4, r3
+; CHECK-P9-NEXT:    mtfprwz f1, r3
+; CHECK-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-BE-NEXT:    xxswapd vs2, v2
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    xxsldwi vs3, v2, v2, 1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    xxperm vs1, vs2, vs0
-; CHECK-BE-NEXT:    xscvspdpn f2, v2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    xxperm vs3, vs2, vs0
-; CHECK-BE-NEXT:    xxmrghw vs0, vs3, vs1
-; CHECK-BE-NEXT:    mffprd r3, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xscvspdpn f0, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    or r3, r4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    or r3, r4, r3
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    vmrgow v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = fptosi <4 x float> %a to <4 x i16>
@@ -798,159 +687,146 @@ entry:
 define <8 x i16> @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvspdpn f0, vs2
-; CHECK-P8-NEXT:    xxswapd v2, vs2
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxsldwi vs4, v3, v3, 1
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, vs4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
-; CHECK-P8-NEXT:    vmrghh v4, v4, v5
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs5
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    vmrghh v3, v3, v5
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghh v5, v5, v0
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v4
-; CHECK-P8-NEXT:    vmrghh v2, v2, v0
-; CHECK-P8-NEXT:    xxmrglw vs1, v2, v5
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, v2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs3
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    or r3, r4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r5, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs4
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r4, r4, r5
+; CHECK-P8-NEXT:    mffprwz r5, f1
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P8-NEXT:    mtfprd f0, r4
+; CHECK-P8-NEXT:    slwi r5, r5, 16
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, v3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs5
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    or r3, r4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    or r4, r5, r4
+; CHECK-P8-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NEXT:    slwi r4, r4, 16
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxsldwi vs2, vs0, vs0, 3
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs1, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs0
-; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    or r3, r4, r3
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    slwi r5, r5, 16
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    or r4, r5, r4
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
+; CHECK-P9-NEXT:    slwi r5, r5, 16
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    or r3, r5, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r6, f0
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    slwi r6, r6, 16
+; CHECK-P9-NEXT:    or r5, r6, r5
+; CHECK-P9-NEXT:    rldimi r5, r3, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd v2, r5, r4
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs4, vs1
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r3
-; CHECK-BE-NEXT:    xxperm vs3, vs4, vs2
-; CHECK-BE-NEXT:    xscvspdpn f4, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    slwi r4, r4, 16
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    or r3, r4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    slwi r5, r5, 16
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    or r4, r5, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r3
+; CHECK-BE-NEXT:    rldimi r4, r3, 32, 0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm vs1, vs4, vs2
-; CHECK-BE-NEXT:    xxswapd vs4, vs0
-; CHECK-BE-NEXT:    xxmrghw vs1, vs1, vs3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs0, vs0, 3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r3
-; CHECK-BE-NEXT:    xxperm vs3, vs4, vs2
-; CHECK-BE-NEXT:    xscvspdpn f4, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    slwi r5, r5, 16
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    or r3, r5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtfprwz f0, r3
-; CHECK-BE-NEXT:    xxperm vs0, vs4, vs2
-; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs3
-; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r6, f0
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    slwi r6, r6, 16
+; CHECK-BE-NEXT:    or r5, r6, r5
+; CHECK-BE-NEXT:    rldimi r5, r3, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd v2, r5, r4
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, ptr %0, align 32
@@ -961,309 +837,286 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
+; CHECK-P8-NEXT:    li r5, 48
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r5
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs6, r4, r6
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs8, r4, r6
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, v3
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xxsldwi vs2, v3, v3, 3
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvspdpn f2, vs3
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xxswapd v5, vs6
-; CHECK-P8-NEXT:    xxswapd v4, vs8
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
-; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xxsldwi vs10, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs11, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs4
-; CHECK-P8-NEXT:    xxsldwi vs7, v5, v5, 3
-; CHECK-P8-NEXT:    xxsldwi vs9, v5, v5, 1
-; CHECK-P8-NEXT:    xxsldwi vs10, v4, v4, 3
-; CHECK-P8-NEXT:    xxsldwi vs11, v4, v4, 1
-; CHECK-P8-NEXT:    mtvsrd v3, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs3
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    xxswapd v4, vs3
+; CHECK-P8-NEXT:    xxsldwi vs4, v5, v5, 1
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r6, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, v5
+; CHECK-P8-NEXT:    slwi r6, r6, 16
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xxsldwi vs6, v4, v4, 1
+; CHECK-P8-NEXT:    xxsldwi vs7, v4, v4, 3
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    vmrghh v0, v0, v1
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v3, v3, v1
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs5
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs6
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    vmrghh v1, v1, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v5
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v2, v2, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs9
-; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    mffprwz r7, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, v4
+; CHECK-P8-NEXT:    slwi r7, r7, 16
+; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 3
+; CHECK-P8-NEXT:    xxsldwi vs8, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, vs8
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r8, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs4
+; CHECK-P8-NEXT:    slwi r8, r8, 16
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r9, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs5
+; CHECK-P8-NEXT:    or r7, r7, r9
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r10, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs6
+; CHECK-P8-NEXT:    or r4, r4, r10
+; CHECK-P8-NEXT:    mffprwz r10, f3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    rldimi r4, r7, 32, 0
+; CHECK-P8-NEXT:    mffprwz r11, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs7
+; CHECK-P8-NEXT:    or r8, r8, r11
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r9, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, v3
+; CHECK-P8-NEXT:    or r6, r6, r9
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    rldimi r6, r8, 32, 0
+; CHECK-P8-NEXT:    mtfprd f3, r6
+; CHECK-P8-NEXT:    mffprwz r9, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs9
+; CHECK-P8-NEXT:    slwi r9, r9, 16
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    or r9, r9, r10
+; CHECK-P8-NEXT:    mffprwz r10, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, v2
+; CHECK-P8-NEXT:    mffprwz r11, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs10
+; CHECK-P8-NEXT:    slwi r10, r10, 16
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    or r10, r10, r11
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r11, f1
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs10
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v6, v6, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs8
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xxmrglw vs1, v2, v1
-; CHECK-P8-NEXT:    vmrghh v5, v5, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs11
-; CHECK-P8-NEXT:    xxmrglw vs2, v5, v6
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    vmrghh v7, v8, v7
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v0
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    vmrghh v4, v4, v8
-; CHECK-P8-NEXT:    xxmrglw vs3, v4, v7
-; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT:    xxswapd vs0, v3
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    rldimi r10, r9, 32, 0
+; CHECK-P8-NEXT:    mtfprd f0, r10
+; CHECK-P8-NEXT:    mffprwz r12, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs11
+; CHECK-P8-NEXT:    slwi r11, r11, 16
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r11, r11, r12
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r6, f2
+; CHECK-P8-NEXT:    or r4, r4, r6
+; CHECK-P8-NEXT:    rldimi r4, r11, 32, 0
+; CHECK-P8-NEXT:    mtfprd f2, r4
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    xxmrghd vs0, vs0, vs2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs1, 0(r4)
-; CHECK-P9-NEXT:    lxv vs0, 16(r4)
-; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs1, 16(r4)
+; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    xscvspdpn f3, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    lxv vs3, 32(r4)
+; CHECK-P9-NEXT:    mffprwz r4, f4
+; CHECK-P9-NEXT:    slwi r5, r5, 16
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    xscvspdpn f4, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mffprwz r6, f4
+; CHECK-P9-NEXT:    xxsldwi vs4, vs2, vs2, 1
+; CHECK-P9-NEXT:    slwi r6, r6, 16
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mffprwz r7, f4
+; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-P9-NEXT:    or r5, r5, r7
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mffprwz r7, f4
+; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 1
+; CHECK-P9-NEXT:    or r4, r4, r7
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mffprwz r7, f4
+; CHECK-P9-NEXT:    xxsldwi vs4, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
+; CHECK-P9-NEXT:    or r6, r6, r7
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mffprwz r8, f2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    mffprwz r7, f4
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NEXT:    slwi r8, r8, 16
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    or r7, r8, r7
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs0
-; CHECK-P9-NEXT:    mtvsrd v2, r5
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f4
+; CHECK-P9-NEXT:    rldimi r7, r5, 32, 0
+; CHECK-P9-NEXT:    mffprwz r8, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs3
+; CHECK-P9-NEXT:    mffprwz r9, f1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs3, vs3, 3
+; CHECK-P9-NEXT:    slwi r9, r9, 16
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    or r8, r9, r8
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs5
-; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    mffprwz r5, f1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mtvsrd v4, r5
+; CHECK-P9-NEXT:    rldimi r8, r4, 32, 0
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r5, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r5
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    mtvsrd v5, r5
+; CHECK-P9-NEXT:    mffprwz r10, f2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs0, vs0, 1
+; CHECK-P9-NEXT:    mffprwz r9, f1
+; CHECK-P9-NEXT:    mtvsrdd vs1, r8, r7
+; CHECK-P9-NEXT:    slwi r10, r10, 16
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    or r9, r10, r9
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    rldimi r9, r6, 32, 0
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v0, r5
-; CHECK-P9-NEXT:    mffprwz r5, f0
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
-; CHECK-P9-NEXT:    vmrghh v5, v0, v5
-; CHECK-P9-NEXT:    mtvsrd v0, r5
-; CHECK-P9-NEXT:    vmrghh v4, v4, v0
-; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd vs3, vs0
-; CHECK-P9-NEXT:    xscvspdpn f4, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xxsldwi vs2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
+; CHECK-P9-NEXT:    slwi r5, r5, 16
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    or r4, r5, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r5, f1
-; CHECK-P9-NEXT:    lxv vs1, 48(r4)
-; CHECK-P9-NEXT:    mffprwz r4, f4
-; CHECK-P9-NEXT:    mtvsrd v2, r5
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    xxmrglw vs3, v4, v5
-; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    xxmrgld vs2, vs3, vs2
-; CHECK-P9-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    stxv vs2, 0(r3)
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    mtvsrd v2, r4
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    mffprwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs1, v3, v2
-; CHECK-P9-NEXT:    xxmrgld vs0, vs1, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r6, f0
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    slwi r6, r6, 16
+; CHECK-P9-NEXT:    or r5, r6, r5
+; CHECK-P9-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd vs0, r5, r9
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs3, 16(r4)
-; CHECK-BE-NEXT:    lxv vs2, 0(r4)
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    lxv vs1, 48(r4)
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs0, 0(r5)
-; CHECK-BE-NEXT:    xscvspdpn f6, vs3
-; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT:    xscvspdpn f9, vs2
-; CHECK-BE-NEXT:    xxswapd vs5, vs3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
-; CHECK-BE-NEXT:    xxsldwi vs7, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxswapd vs8, vs2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-BE-NEXT:    xxsldwi vs10, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs11, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    lxv vs0, 32(r4)
+; CHECK-BE-NEXT:    xscvspdpn f3, vs2
+; CHECK-BE-NEXT:    xscvspdpn f4, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    lxv vs3, 48(r4)
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    slwi r5, r5, 16
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    xscvspdpn f4, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r6, f4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs2, vs2, 1
+; CHECK-BE-NEXT:    slwi r6, r6, 16
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f9, f9
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r7, f4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
+; CHECK-BE-NEXT:    or r5, r5, r7
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r7, f4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 1
+; CHECK-BE-NEXT:    or r4, r4, r7
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r7, f4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    or r6, r6, r7
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f10, vs10
-; CHECK-BE-NEXT:    xscvspdpn f11, vs11
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    xscvdpsxws f8, f8
+; CHECK-BE-NEXT:    mffprwz r8, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    mffprwz r7, f4
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    slwi r8, r8, 16
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    or r7, r8, r7
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f10, f10
-; CHECK-BE-NEXT:    xscvdpsxws f11, f11
-; CHECK-BE-NEXT:    mffprwz r5, f6
-; CHECK-BE-NEXT:    mtfprwz f6, r5
-; CHECK-BE-NEXT:    mffprwz r5, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r5
-; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xxperm vs4, vs5, vs0
-; CHECK-BE-NEXT:    xscvspdpn f5, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtfprwz f3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f7
-; CHECK-BE-NEXT:    mtfprwz f7, r5
-; CHECK-BE-NEXT:    mffprwz r5, f8
-; CHECK-BE-NEXT:    xxperm vs3, vs6, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    rldimi r7, r5, 32, 0
+; CHECK-BE-NEXT:    mffprwz r8, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs3
+; CHECK-BE-NEXT:    mffprwz r9, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs3, vs3, 3
+; CHECK-BE-NEXT:    slwi r9, r9, 16
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    mtfprwz f8, r5
-; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xxmrghw vs3, vs3, vs4
-; CHECK-BE-NEXT:    lxv vs4, 32(r4)
+; CHECK-BE-NEXT:    or r8, r9, r8
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    rldimi r8, r4, 32, 0
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mtfprwz f2, r5
-; CHECK-BE-NEXT:    xxperm vs7, vs8, vs0
-; CHECK-BE-NEXT:    mffprwz r5, f10
-; CHECK-BE-NEXT:    xxperm vs2, vs9, vs0
-; CHECK-BE-NEXT:    mtfprwz f10, r5
-; CHECK-BE-NEXT:    mffprwz r5, f11
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    mtfprwz f11, r5
-; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs7
-; CHECK-BE-NEXT:    mtfprwz f5, r4
-; CHECK-BE-NEXT:    xxperm vs10, vs11, vs0
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs4, vs4, 3
-; CHECK-BE-NEXT:    mtfprwz f1, r4
-; CHECK-BE-NEXT:    xxperm vs1, vs5, vs0
-; CHECK-BE-NEXT:    xxswapd vs5, vs4
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    stxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xxmrghw vs1, vs1, vs10
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r4
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r4
-; CHECK-BE-NEXT:    xxperm vs3, vs5, vs0
-; CHECK-BE-NEXT:    xscvspdpn f5, vs4
-; CHECK-BE-NEXT:    xxsldwi vs4, vs4, vs4, 1
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r4
-; CHECK-BE-NEXT:    mffprwz r4, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r4
-; CHECK-BE-NEXT:    xxperm vs4, vs5, vs0
-; CHECK-BE-NEXT:    xxmrghw vs0, vs4, vs3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    mffprwz r10, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
+; CHECK-BE-NEXT:    mffprwz r9, f1
+; CHECK-BE-NEXT:    mtvsrdd vs1, r8, r7
+; CHECK-BE-NEXT:    slwi r10, r10, 16
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    or r9, r10, r9
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    rldimi r9, r6, 32, 0
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    slwi r5, r5, 16
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    or r4, r5, r4
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r6, f0
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    slwi r6, r6, 16
+; CHECK-BE-NEXT:    or r5, r6, r5
+; CHECK-BE-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd vs0, r5, r9
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
index c6e808d145ebb36..31a4f64a20afea6 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
@@ -16,18 +16,13 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghb v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprd r3, f0
-; CHECK-P8-NEXT:    clrldi r3, r3, 48
-; CHECK-P8-NEXT:    sth r3, -2(r1)
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f0, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    rlwimi r4, r3, 8, 0, 23
+; CHECK-P8-NEXT:    sth r4, -2(r1)
 ; CHECK-P8-NEXT:    lhz r3, -2(r1)
 ; CHECK-P8-NEXT:    blr
 ;
@@ -36,40 +31,21 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    addi r3, r1, -2
-; CHECK-P9-NEXT:    vmrghb v2, v3, v2
-; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
-; CHECK-P9-NEXT:    stxsihx v2, 0, r3
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    rlwimi r4, r3, 8, 0, 23
+; CHECK-P9-NEXT:    sth r4, -2(r1)
 ; CHECK-P9-NEXT:    lhz r3, -2(r1)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtfprd f0, r3
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    xscvspdpn f2, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs1, 0(r3)
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    xxperm v2, vs2, vs1
-; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
-; CHECK-BE-NEXT:    stxsihx v2, 0, r3
+; CHECK-BE-NEXT:    li r3, -1
+; CHECK-BE-NEXT:    sth r3, -2(r1)
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -82,89 +58,68 @@ entry:
 define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    rlwimi r3, r4, 8, 16, 23
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs2
+; CHECK-P8-NEXT:    rlwimi r3, r4, 16, 8, 15
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghb v3, v4, v3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    vmrghb v2, v2, v4
-; CHECK-P8-NEXT:    vmrglh v2, v2, v3
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    rlwimi r3, r4, 24, 0, 7
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    xscvspdpn f0, v2
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghb v3, v4, v3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    rlwimi r3, r4, 8, 16, 23
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vmrghb v2, v4, v2
-; CHECK-P9-NEXT:    vmrglh v2, v2, v3
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    xscvspdpn f0, v2
+; CHECK-P9-NEXT:    rlwimi r3, r4, 16, 8, 15
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    rlwimi r3, r4, 24, 0, 7
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxswapd vs1, v2
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm v3, vs1, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT:    rlwimi r3, r4, 8, 16, 23
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xscvspdpn f0, v2
+; CHECK-BE-NEXT:    rlwimi r3, r4, 16, 8, 15
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    rlwimi r3, r4, 24, 0, 7
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = fptoui <4 x float> %a to <4 x i8>
@@ -175,54 +130,48 @@ entry:
 define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 3
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs2
-; CHECK-P8-NEXT:    xxswapd v2, vs2
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs4
+; CHECK-P8-NEXT:    rlwimi r4, r3, 8, 16, 23
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 8, 15
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs3
+; CHECK-P8-NEXT:    rlwimi r4, r3, 24, 0, 7
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, vs4
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
-; CHECK-P8-NEXT:    vmrghb v4, v4, v5
-; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs5
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    vmrghb v3, v3, v5
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrglh v3, v3, v4
-; CHECK-P8-NEXT:    vmrghb v5, v5, v0
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    vmrghb v2, v2, v0
-; CHECK-P8-NEXT:    vmrglh v2, v2, v5
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, v3
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 8, 15
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    rlwimi r4, r3, 24, 0, 7
+; CHECK-P8-NEXT:    mtfprwz f1, r4
+; CHECK-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
@@ -231,51 +180,45 @@ define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghb v2, v3, v2
+; CHECK-P9-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    rlwimi r4, r3, 24, 0, 7
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mtfprwz f1, r4
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v3, v4, v3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    rlwimi r4, r3, 24, 0, 7
+; CHECK-P9-NEXT:    mtfprwz f0, r4
+; CHECK-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
@@ -283,55 +226,46 @@ define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    xxperm v2, vs3, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    rlwimi r4, r3, 24, 0, 7
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xxperm v3, vs3, vs2
+; CHECK-BE-NEXT:    mtvsrwz v2, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm v3, vs1, vs2
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs1, vs2
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-BE-NEXT:    mffprd r3, f0
+; CHECK-BE-NEXT:    rlwimi r4, r3, 24, 0, 7
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-NEXT:    vmrgow v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, ptr %0, align 32
@@ -343,305 +277,256 @@ entry:
 define <16 x i8> @test16elt(ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
-; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lxvd2x vs4, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs8, r3, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, v3
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    xxsldwi vs2, v3, v3, 3
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs4
+; CHECK-P8-NEXT:    xxswapd v5, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvspdpn f2, vs3
+; CHECK-P8-NEXT:    xxsldwi vs8, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs11, v2, v2, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    xscvspdpn f1, vs4
-; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
-; CHECK-P8-NEXT:    xxswapd v5, vs6
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xxswapd v4, vs8
-; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    xxsldwi vs7, v5, v5, 3
-; CHECK-P8-NEXT:    xxsldwi vs9, v5, v5, 1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xxsldwi vs10, v4, v4, 3
-; CHECK-P8-NEXT:    xxsldwi vs11, v4, v4, 1
-; CHECK-P8-NEXT:    vmrghb v0, v0, v1
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs5
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    vmrghb v3, v3, v1
-; CHECK-P8-NEXT:    mtvsrd v1, r3
+; CHECK-P8-NEXT:    xxsldwi vs3, v5, v5, 3
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    xxsldwi vs7, v5, v5, 1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs3
+; CHECK-P8-NEXT:    xxsldwi vs5, v4, v4, 3
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xxsldwi vs9, v4, v4, 1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r6, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs5
+; CHECK-P8-NEXT:    rlwimi r6, r3, 8, 16, 23
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT:    xxsldwi vs10, v3, v3, 1
 ; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs6
+; CHECK-P8-NEXT:    rlwimi r3, r4, 8, 16, 23
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs7
+; CHECK-P8-NEXT:    rlwimi r4, r5, 8, 16, 23
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs9
+; CHECK-P8-NEXT:    rlwimi r6, r5, 16, 8, 15
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r5, f0
 ; CHECK-P8-NEXT:    xscvspdpn f0, v5
-; CHECK-P8-NEXT:    vmrglh v3, v3, v0
+; CHECK-P8-NEXT:    rlwimi r3, r5, 16, 8, 15
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghb v1, v1, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs9
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    vmrghb v2, v2, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v4
+; CHECK-P8-NEXT:    rlwimi r6, r5, 24, 0, 7
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r5, f0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs10
+; CHECK-P8-NEXT:    rlwimi r3, r5, 24, 0, 7
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    rldimi r3, r6, 32, 0
+; CHECK-P8-NEXT:    mtfprd f1, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs8
-; CHECK-P8-NEXT:    vmrglh v2, v2, v1
+; CHECK-P8-NEXT:    xscvspdpn f0, v3
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghb v6, v6, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    vmrghb v5, v5, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    xscvspdpn f0, vs4
+; CHECK-P8-NEXT:    rlwimi r4, r3, 24, 0, 7
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs8
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r5, f0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs11
-; CHECK-P8-NEXT:    vmrglh v5, v5, v6
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    rlwimi r5, r3, 8, 16, 23
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    rlwimi r5, r3, 16, 8, 15
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghb v7, v8, v7
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
-; CHECK-P8-NEXT:    vmrghb v4, v4, v8
-; CHECK-P8-NEXT:    vmrglh v4, v4, v7
-; CHECK-P8-NEXT:    xxmrglw vs1, v4, v5
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    rlwimi r5, r3, 24, 0, 7
+; CHECK-P8-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-P8-NEXT:    mtfprd f0, r5
+; CHECK-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    lxv vs0, 48(r3)
-; CHECK-P9-NEXT:    lxv vs1, 32(r3)
-; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-P9-NEXT:    lxv vs3, 16(r3)
+; CHECK-P9-NEXT:    lxv vs0, 32(r3)
+; CHECK-P9-NEXT:    lxv vs1, 48(r3)
+; CHECK-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-P9-NEXT:    xxswapd vs4, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    xxswapd vs4, vs3
-; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    xscvspdpn f4, vs3
-; CHECK-P9-NEXT:    xxsldwi vs3, vs3, vs3, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r4, f4
+; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 1
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    vmrghb v2, v3, v2
+; CHECK-P9-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-P9-NEXT:    mffprwz r3, f3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 24, 0, 7
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs2
-; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    vmrghb v3, v4, v3
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v5, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    rlwimi r3, r5, 8, 16, 23
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    rlwimi r3, r5, 16, 8, 15
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    rlwimi r3, r5, 24, 0, 7
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    rldimi r3, r4, 32, 0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    vmrghb v2, v3, v2
+; CHECK-P9-NEXT:    rlwimi r5, r4, 8, 16, 23
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    rlwimi r5, r4, 16, 8, 15
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
+; CHECK-P9-NEXT:    rlwimi r5, r4, 24, 0, 7
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r6, f1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v3, v4, v3
+; CHECK-P9-NEXT:    rlwimi r6, r4, 8, 16, 23
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    rlwimi r6, r4, 16, 8, 15
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    rlwimi r6, r4, 24, 0, 7
+; CHECK-P9-NEXT:    rldimi r6, r5, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd v2, r6, r3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs4, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs5, vs3, vs3, 3
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    xxswapd vs5, vs3
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    xxperm v2, vs5, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
+; CHECK-BE-NEXT:    lxv vs3, 32(r3)
+; CHECK-BE-NEXT:    lxv vs0, 16(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    lxv vs2, 48(r3)
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 1
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xxperm v3, vs5, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs2
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    rlwimi r4, r3, 24, 0, 7
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
-; CHECK-BE-NEXT:    xscvspdpn f3, vs2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs3, vs4
-; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xxmrghw vs2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    rlwimi r3, r5, 8, 16, 23
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    xxperm v2, vs3, vs4
-; CHECK-BE-NEXT:    xscvspdpn f3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    rlwimi r3, r5, 16, 8, 15
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    rlwimi r3, r5, 24, 0, 7
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    rldimi r3, r4, 32, 0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    rlwimi r5, r4, 8, 16, 23
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    rlwimi r5, r4, 16, 8, 15
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    rlwimi r5, r4, 24, 0, 7
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm v3, vs1, vs4
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    mffprwz r6, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    rlwimi r6, r4, 8, 16, 23
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs1, vs4
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    rlwimi r6, r4, 16, 8, 15
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    rlwimi r6, r4, 24, 0, 7
+; CHECK-BE-NEXT:    rldimi r6, r5, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd v2, r6, r3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x float>, ptr %0, align 64
@@ -656,17 +541,13 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    slwi r4, r4, 8
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghb v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprd r3, f0
-; CHECK-P8-NEXT:    clrldi r3, r3, 48
+; CHECK-P8-NEXT:    or r3, r4, r3
 ; CHECK-P8-NEXT:    sth r3, -2(r1)
 ; CHECK-P8-NEXT:    lhz r3, -2(r1)
 ; CHECK-P8-NEXT:    blr
@@ -680,36 +561,18 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    slwi r4, r4, 8
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    addi r3, r1, -2
-; CHECK-P9-NEXT:    vmrghb v2, v3, v2
-; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
-; CHECK-P9-NEXT:    stxsihx v2, 0, r3
+; CHECK-P9-NEXT:    or r3, r4, r3
+; CHECK-P9-NEXT:    sth r3, -2(r1)
 ; CHECK-P9-NEXT:    lhz r3, -2(r1)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtfprd f0, r3
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-NEXT:    xscvspdpn f2, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs1, 0(r3)
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    xxperm v2, vs2, vs1
-; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
-; CHECK-BE-NEXT:    stxsihx v2, 0, r3
+; CHECK-BE-NEXT:    li r3, -1
+; CHECK-BE-NEXT:    sth r3, -2(r1)
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -722,89 +585,77 @@ entry:
 define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, v2
 ; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs2
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs1
+; CHECK-P8-NEXT:    slwi r3, r3, 24
+; CHECK-P8-NEXT:    slwi r4, r4, 16
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghb v3, v4, v3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    vmrghb v2, v2, v4
-; CHECK-P8-NEXT:    vmrglh v2, v2, v3
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    slwi r4, r4, 8
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    or r3, r3, r4
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-P9-NEXT:    slwi r3, r3, 24
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    xscvspdpn f0, v2
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    or r3, r3, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghb v3, v4, v3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-P9-NEXT:    slwi r4, r4, 8
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    or r3, r3, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vmrghb v2, v4, v2
-; CHECK-P9-NEXT:    vmrglh v2, v2, v3
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    or r3, r3, r4
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxswapd vs1, v2
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm v3, vs1, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    xscvspdpn f0, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT:    slwi r3, r3, 24
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-NEXT:    slwi r4, r4, 8
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    or r3, r3, r4
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = fptosi <4 x float> %a to <4 x i8>
@@ -815,54 +666,54 @@ entry:
 define i64 @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xscvspdpn f6, v2
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvspdpn f0, vs2
-; CHECK-P8-NEXT:    xxswapd v2, vs2
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f6, f6
+; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, vs4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
-; CHECK-P8-NEXT:    vmrghb v4, v4, v5
-; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, v3
+; CHECK-P8-NEXT:    slwi r3, r3, 24
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs4
+; CHECK-P8-NEXT:    slwi r4, r4, 8
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mtfprwz f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs3
+; CHECK-P8-NEXT:    slwi r3, r3, 24
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r3, r3, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs5
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    vmrghb v3, v3, v5
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrglh v3, v3, v4
-; CHECK-P8-NEXT:    vmrghb v5, v5, v0
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    vmrghb v2, v2, v0
-; CHECK-P8-NEXT:    vmrglh v2, v2, v5
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    slwi r4, r4, 8
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mtfprwz f1, r3
+; CHECK-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
@@ -871,51 +722,51 @@ define i64 @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 1
+; CHECK-P9-NEXT:    slwi r3, r3, 24
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 3
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    or r3, r3, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs0
+; CHECK-P9-NEXT:    slwi r4, r4, 8
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mtfprwz f1, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    xxsldwi vs2, vs0, vs0, 1
+; CHECK-P9-NEXT:    slwi r3, r3, 24
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v3, v4, v3
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    slwi r4, r4, 8
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mtfprwz f0, r3
+; CHECK-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
@@ -923,55 +774,52 @@ define i64 @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    xxperm v2, vs3, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 1
+; CHECK-BE-NEXT:    slwi r3, r3, 24
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 3
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    slwi r4, r4, 8
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    slwi r3, r3, 24
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xxperm v3, vs3, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; CHECK-BE-NEXT:    slwi r4, r4, 16
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm v3, vs1, vs2
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    or r3, r3, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs1, vs2
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-BE-NEXT:    mffprd r3, f0
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    slwi r4, r4, 8
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    vmrgow v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, ptr %0, align 32
@@ -983,305 +831,292 @@ entry:
 define <16 x i8> @test16elt_signed(ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs8, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r5
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    xscvspdpn f12, v4
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, v3
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    xxsldwi vs2, v3, v3, 3
 ; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs7, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs11, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xxsldwi vs4, v4, v4, 1
+; CHECK-P8-NEXT:    xscvspdpn f4, vs4
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs8, v4, v4, 3
+; CHECK-P8-NEXT:    xxsldwi vs10, v3, v3, 3
+; CHECK-P8-NEXT:    xscvdpsxws f12, f12
+; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
+; CHECK-P8-NEXT:    xxsldwi vs9, v5, v5, 3
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    mffprwz r3, f12
+; CHECK-P8-NEXT:    xscvspdpn f12, v5
+; CHECK-P8-NEXT:    mffprwz r6, f4
+; CHECK-P8-NEXT:    xscvspdpn f4, vs5
+; CHECK-P8-NEXT:    slwi r3, r3, 24
+; CHECK-P8-NEXT:    xscvdpsxws f12, f12
+; CHECK-P8-NEXT:    slwi r6, r6, 16
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    or r3, r3, r6
+; CHECK-P8-NEXT:    mffprwz r4, f12
+; CHECK-P8-NEXT:    xscvspdpn f12, v3
+; CHECK-P8-NEXT:    mffprwz r6, f4
+; CHECK-P8-NEXT:    xscvspdpn f4, vs6
+; CHECK-P8-NEXT:    slwi r4, r4, 24
+; CHECK-P8-NEXT:    xscvdpsxws f12, f12
+; CHECK-P8-NEXT:    slwi r6, r6, 16
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    or r4, r4, r6
+; CHECK-P8-NEXT:    mffprwz r5, f12
+; CHECK-P8-NEXT:    mffprwz r6, f4
+; CHECK-P8-NEXT:    slwi r5, r5, 24
+; CHECK-P8-NEXT:    slwi r6, r6, 16
+; CHECK-P8-NEXT:    or r5, r5, r6
+; CHECK-P8-NEXT:    mffprwz r6, f2
 ; CHECK-P8-NEXT:    xscvspdpn f2, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    slwi r6, r6, 8
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    xscvspdpn f1, vs4
-; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
-; CHECK-P8-NEXT:    xxswapd v5, vs6
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xxswapd v4, vs8
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    xxsldwi vs7, v5, v5, 3
-; CHECK-P8-NEXT:    xxsldwi vs9, v5, v5, 1
+; CHECK-P8-NEXT:    or r3, r3, r6
+; CHECK-P8-NEXT:    mffprwz r6, f2
+; CHECK-P8-NEXT:    xscvspdpn f2, vs9
+; CHECK-P8-NEXT:    slwi r6, r6, 8
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    or r4, r4, r6
+; CHECK-P8-NEXT:    mffprwz r6, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs8
+; CHECK-P8-NEXT:    slwi r6, r6, 8
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xxsldwi vs10, v4, v4, 3
-; CHECK-P8-NEXT:    xxsldwi vs11, v4, v4, 1
-; CHECK-P8-NEXT:    vmrghb v0, v0, v1
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs5
-; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    or r5, r5, r6
+; CHECK-P8-NEXT:    mffprwz r6, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs10
+; CHECK-P8-NEXT:    or r3, r3, r6
+; CHECK-P8-NEXT:    mffprwz r6, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs7
+; CHECK-P8-NEXT:    or r4, r4, r6
+; CHECK-P8-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, v2
+; CHECK-P8-NEXT:    mtfprd f2, r4
+; CHECK-P8-NEXT:    or r3, r5, r3
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    vmrghb v3, v3, v1
-; CHECK-P8-NEXT:    mtvsrd v1, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs6
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v5
-; CHECK-P8-NEXT:    vmrglh v3, v3, v0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghb v1, v1, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs9
-; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    xscvspdpn f1, vs7
+; CHECK-P8-NEXT:    slwi r4, r4, 24
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    vmrghb v2, v2, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs10
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs8
-; CHECK-P8-NEXT:    vmrglh v2, v2, v1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghb v6, v6, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    vmrghb v5, v5, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r5, f1
+; CHECK-P8-NEXT:    slwi r5, r5, 16
+; CHECK-P8-NEXT:    or r4, r4, r5
+; CHECK-P8-NEXT:    mffprwz r5, f0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs11
-; CHECK-P8-NEXT:    vmrglh v5, v5, v6
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    slwi r5, r5, 8
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghb v7, v8, v7
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
-; CHECK-P8-NEXT:    vmrghb v4, v4, v8
-; CHECK-P8-NEXT:    vmrglh v4, v4, v7
-; CHECK-P8-NEXT:    xxmrglw vs1, v4, v5
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    or r4, r4, r5
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    or r4, r4, r5
+; CHECK-P8-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P8-NEXT:    mtfprd f0, r4
+; CHECK-P8-NEXT:    xxmrghd v2, vs2, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    lxv vs0, 48(r3)
-; CHECK-P9-NEXT:    lxv vs1, 32(r3)
-; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    lxv vs3, 16(r3)
+; CHECK-P9-NEXT:    lxv vs0, 32(r3)
+; CHECK-P9-NEXT:    lxv vs1, 48(r3)
+; CHECK-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-P9-NEXT:    xscvspdpn f4, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    xxswapd vs4, vs3
-; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 1
+; CHECK-P9-NEXT:    slwi r3, r3, 24
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    xscvspdpn f4, vs3
-; CHECK-P9-NEXT:    xxsldwi vs3, vs3, vs3, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r4, f4
+; CHECK-P9-NEXT:    xxswapd vs4, vs3
+; CHECK-P9-NEXT:    xxsldwi vs3, vs3, vs3, 3
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    or r3, r3, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r4, f4
+; CHECK-P9-NEXT:    slwi r4, r4, 8
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs2
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 1
+; CHECK-P9-NEXT:    slwi r4, r4, 24
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mffprwz r5, f3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 3
+; CHECK-P9-NEXT:    slwi r5, r5, 16
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs2
-; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    vmrghb v3, v4, v3
+; CHECK-P9-NEXT:    or r4, r4, r5
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    slwi r5, r5, 8
+; CHECK-P9-NEXT:    or r4, r4, r5
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs1
+; CHECK-P9-NEXT:    or r4, r4, r5
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    rldimi r4, r3, 32, 0
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 1
+; CHECK-P9-NEXT:    slwi r3, r3, 24
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 3
+; CHECK-P9-NEXT:    slwi r5, r5, 16
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    vmrghb v2, v3, v2
+; CHECK-P9-NEXT:    or r3, r3, r5
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    slwi r5, r5, 8
+; CHECK-P9-NEXT:    or r3, r3, r5
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    or r3, r3, r5
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-P9-NEXT:    slwi r5, r5, 24
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r6, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; CHECK-P9-NEXT:    slwi r6, r6, 16
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    or r5, r5, r6
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v3, v4, v3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs2
+; CHECK-P9-NEXT:    mffprwz r6, f1
+; CHECK-P9-NEXT:    slwi r6, r6, 8
+; CHECK-P9-NEXT:    or r5, r5, r6
+; CHECK-P9-NEXT:    mffprwz r6, f0
+; CHECK-P9-NEXT:    or r5, r5, r6
+; CHECK-P9-NEXT:    rldimi r5, r3, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd v2, r5, r4
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs4, 0(r3)
-; CHECK-BE-NEXT:    xxsldwi vs5, vs3, vs3, 3
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    xxswapd vs5, vs3
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    xxperm v2, vs5, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
+; CHECK-BE-NEXT:    lxv vs3, 32(r3)
+; CHECK-BE-NEXT:    lxv vs0, 16(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    lxv vs2, 48(r3)
+; CHECK-BE-NEXT:    xscvspdpn f4, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 1
+; CHECK-BE-NEXT:    slwi r3, r3, 24
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 3
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    slwi r4, r4, 8
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs2
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 1
+; CHECK-BE-NEXT:    slwi r4, r4, 24
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xxperm v3, vs5, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs2
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 3
+; CHECK-BE-NEXT:    slwi r5, r5, 16
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
-; CHECK-BE-NEXT:    xscvspdpn f3, vs2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    or r4, r4, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    slwi r5, r5, 8
+; CHECK-BE-NEXT:    or r4, r4, r5
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    or r4, r4, r5
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    rldimi r4, r3, 32, 0
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs3, vs4
-; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xxmrghw vs2, v3, v2
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    xxperm v2, vs3, vs4
-; CHECK-BE-NEXT:    xscvspdpn f3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 1
+; CHECK-BE-NEXT:    slwi r3, r3, 24
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 3
+; CHECK-BE-NEXT:    slwi r5, r5, 16
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    or r3, r3, r5
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    slwi r5, r5, 8
+; CHECK-BE-NEXT:    or r3, r3, r5
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    or r3, r3, r5
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    slwi r5, r5, 24
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r6, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; CHECK-BE-NEXT:    slwi r6, r6, 16
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm v3, vs1, vs4
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    or r5, r5, r6
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs1, vs4
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
+; CHECK-BE-NEXT:    mffprwz r6, f1
+; CHECK-BE-NEXT:    slwi r6, r6, 8
+; CHECK-BE-NEXT:    or r5, r5, r6
+; CHECK-BE-NEXT:    mffprwz r6, f0
+; CHECK-BE-NEXT:    or r5, r5, r6
+; CHECK-BE-NEXT:    rldimi r5, r3, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd v2, r5, r4
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x float>, ptr %0, align 64
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
index 00ca205e8597257..3bfaa01462b7ea8 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
@@ -12,47 +12,32 @@
 define i32 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghh v2, v2, v3
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    rlwimi r3, r4, 16, 0, 15
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xscvdpsxws f0, v2
-; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mffprwz r4, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    rlwimi r3, r4, 16, 0, 15
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxswapd vs2, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, v2
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
-; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    rlwimi r3, r4, 16, 0, 15
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = fptoui <2 x double> %a to <2 x i16>
@@ -63,26 +48,24 @@ entry:
 define i64 @test4elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
 ; CHECK-P8-NEXT:    xxswapd vs1, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f2
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f3
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f2
+; CHECK-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    xxswapd vs3, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghh v2, v3, v2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghh v3, v4, v3
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P8-NEXT:    mtfprwz f1, r4
+; CHECK-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
@@ -95,19 +78,17 @@ define i64 @test4elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P9-NEXT:    mtfprwz f1, r4
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P9-NEXT:    mtfprwz f0, r4
+; CHECK-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
@@ -115,27 +96,22 @@ define i64 @test4elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm vs1, vs3, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtfprwz f0, r3
-; CHECK-BE-NEXT:    xxperm vs0, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-BE-NEXT:    mffprd r3, f0
+; CHECK-BE-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-NEXT:    vmrgow v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <4 x double>, ptr %0, align 32
@@ -148,136 +124,111 @@ define <8 x i16> @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    lxvd2x vs6, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs7, vs6
 ; CHECK-P8-NEXT:    xxswapd vs3, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-P8-NEXT:    xxswapd vs2, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f6
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r5, f1
 ; CHECK-P8-NEXT:    xxswapd vs5, vs4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    mffprwz r3, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    mffprwz r3, f5
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    rlwimi r5, r3, 16, 0, 15
 ; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    xxswapd vs7, vs6
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    vmrghh v2, v5, v2
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    mffprwz r3, f7
-; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f7
+; CHECK-P8-NEXT:    mtfprd f1, r5
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 0, 15
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghh v3, v5, v3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT:    vmrghh v4, v5, v4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    vmrghh v5, v0, v5
-; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f6
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    rlwimi r5, r3, 16, 0, 15
+; CHECK-P8-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-P8-NEXT:    mtfprd f0, r5
+; CHECK-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    lxv vs0, 48(r3)
-; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    lxv vs3, 16(r3)
+; CHECK-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-P9-NEXT:    lxv vs1, 48(r3)
+; CHECK-P9-NEXT:    lxv vs0, 32(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mffprwz r4, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 0, 15
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f1
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    rlwimi r5, r3, 16, 0, 15
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mffprwz r4, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 0, 15
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs2
+; CHECK-P9-NEXT:    mffprwz r6, f0
+; CHECK-P9-NEXT:    rlwimi r6, r3, 16, 0, 15
+; CHECK-P9-NEXT:    rldimi r6, r4, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd v2, r6, r5
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs4, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f5, f3
+; CHECK-BE-NEXT:    lxv vs3, 32(r3)
+; CHECK-BE-NEXT:    lxv vs2, 48(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    lxv vs0, 16(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    xxperm vs3, vs5, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f2
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    xxperm vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    rlwimi r5, r3, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm vs1, vs3, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f3, f0
+; CHECK-BE-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtfprwz f0, r3
-; CHECK-BE-NEXT:    xxperm vs0, vs3, vs4
-; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r6, f0
+; CHECK-BE-NEXT:    rlwimi r6, r3, 16, 0, 15
+; CHECK-BE-NEXT:    rldimi r6, r4, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd v2, r6, r5
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, ptr %0, align 64
@@ -288,265 +239,218 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs5, 0, r4
-; CHECK-P8-NEXT:    li r5, 80
-; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r5
+; CHECK-P8-NEXT:    li r11, 80
+; CHECK-P8-NEXT:    li r10, 64
+; CHECK-P8-NEXT:    li r9, 112
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs9, r4, r6
-; CHECK-P8-NEXT:    li r6, 64
-; CHECK-P8-NEXT:    lxvd2x vs7, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs11, r4, r6
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs12, r4, r6
-; CHECK-P8-NEXT:    li r6, 96
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
-; CHECK-P8-NEXT:    li r6, 112
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r6
-; CHECK-P8-NEXT:    xxswapd vs6, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r8, 96
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r11
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r10
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT:    xxswapd v3, vs7
+; CHECK-P8-NEXT:    xxswapd vs11, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f6, f6
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    xxswapd vs10, vs9
-; CHECK-P8-NEXT:    xscvdpsxws f9, f9
-; CHECK-P8-NEXT:    xxswapd vs4, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    xxswapd vs10, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    xxswapd vs9, vs4
 ; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    xxswapd vs8, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    mffprwz r4, f7
-; CHECK-P8-NEXT:    xxswapd vs13, vs11
+; CHECK-P8-NEXT:    xxswapd vs8, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
+; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    xscvdpsxws f9, f9
 ; CHECK-P8-NEXT:    xscvdpsxws f8, f8
-; CHECK-P8-NEXT:    xscvdpsxws f13, f13
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    xxswapd v2, vs12
-; CHECK-P8-NEXT:    xscvdpsxws f12, f12
-; CHECK-P8-NEXT:    mffprwz r4, f9
-; CHECK-P8-NEXT:    xscvdpsxws v2, v2
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mffprwz r4, f12
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f11
-; CHECK-P8-NEXT:    xxswapd v3, vs2
-; CHECK-P8-NEXT:    xscvdpsxws v3, v3
+; CHECK-P8-NEXT:    xxswapd v2, vs2
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mffprwz r4, f8
-; CHECK-P8-NEXT:    xxswapd vs1, vs0
+; CHECK-P8-NEXT:    xscvdpsxws v2, v2
+; CHECK-P8-NEXT:    xxswapd vs13, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f13, f13
+; CHECK-P8-NEXT:    xxswapd vs12, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r4, f10
-; CHECK-P8-NEXT:    mtvsrd v10, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, v2
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    mffprwz r4, f13
-; CHECK-P8-NEXT:    vmrghh v4, v8, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    vmrghh v5, v9, v5
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, v3
-; CHECK-P8-NEXT:    vmrghh v0, v10, v0
-; CHECK-P8-NEXT:    vmrghh v2, v2, v1
-; CHECK-P8-NEXT:    vmrghh v3, v8, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    vmrghh v1, v9, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    xscvdpsxws f12, f12
+; CHECK-P8-NEXT:    mffprwz r4, f6
+; CHECK-P8-NEXT:    mffprwz r6, f5
+; CHECK-P8-NEXT:    mffprwz r7, f4
+; CHECK-P8-NEXT:    mffprwz r8, f3
+; CHECK-P8-NEXT:    mffprwz r9, f2
+; CHECK-P8-NEXT:    mffprwz r10, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f7
+; CHECK-P8-NEXT:    xscvdpsxws f2, v3
+; CHECK-P8-NEXT:    mffprwz r11, f0
+; CHECK-P8-NEXT:    mffprwz r12, f11
+; CHECK-P8-NEXT:    rlwimi r4, r12, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r12, f10
+; CHECK-P8-NEXT:    rlwimi r6, r12, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r12, f9
+; CHECK-P8-NEXT:    rldimi r6, r4, 32, 0
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xxmrglw vs2, v1, v3
-; CHECK-P8-NEXT:    xxmrglw vs1, v2, v0
-; CHECK-P8-NEXT:    vmrghh v6, v6, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxmrglw vs0, v5, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    vmrghh v7, v7, v8
-; CHECK-P8-NEXT:    xxmrglw vs3, v7, v6
-; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    mtfprd f0, r6
+; CHECK-P8-NEXT:    mffprwz r6, f2
+; CHECK-P8-NEXT:    rlwimi r7, r12, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r12, f8
+; CHECK-P8-NEXT:    rlwimi r4, r6, 16, 0, 15
+; CHECK-P8-NEXT:    rlwimi r8, r12, 16, 0, 15
+; CHECK-P8-NEXT:    mfvsrwz r12, v2
+; CHECK-P8-NEXT:    rldimi r8, r7, 32, 0
+; CHECK-P8-NEXT:    mtfprd f3, r8
+; CHECK-P8-NEXT:    rlwimi r9, r12, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r12, f13
+; CHECK-P8-NEXT:    rlwimi r10, r12, 16, 0, 15
+; CHECK-P8-NEXT:    mffprwz r12, f12
+; CHECK-P8-NEXT:    rldimi r10, r9, 32, 0
+; CHECK-P8-NEXT:    mtfprd f1, r10
+; CHECK-P8-NEXT:    rlwimi r11, r12, 16, 0, 15
+; CHECK-P8-NEXT:    rldimi r4, r11, 32, 0
+; CHECK-P8-NEXT:    mtfprd f2, r4
+; CHECK-P8-NEXT:    xxmrghd vs0, vs3, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    xxmrghd vs1, vs1, vs2
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 0(r4)
-; CHECK-P9-NEXT:    lxv vs1, 16(r4)
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f3, f2
-; CHECK-P9-NEXT:    xscvdpsxws f4, f1
-; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f5, f0
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    lxv vs3, 64(r4)
-; CHECK-P9-NEXT:    mtvsrd v2, r5
-; CHECK-P9-NEXT:    mffprwz r5, f4
-; CHECK-P9-NEXT:    lxv vs4, 48(r4)
-; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    mffprwz r5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f7, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    mtvsrd v4, r5
-; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    lxv vs2, 80(r4)
+; CHECK-P9-NEXT:    lxv vs7, 16(r4)
+; CHECK-P9-NEXT:    lxv vs6, 0(r4)
+; CHECK-P9-NEXT:    lxv vs5, 48(r4)
+; CHECK-P9-NEXT:    lxv vs4, 32(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f8, f7
+; CHECK-P9-NEXT:    xxswapd vs7, vs7
+; CHECK-P9-NEXT:    lxv vs3, 80(r4)
+; CHECK-P9-NEXT:    lxv vs0, 96(r4)
+; CHECK-P9-NEXT:    lxv vs1, 112(r4)
+; CHECK-P9-NEXT:    lxv vs2, 64(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    mffprwz r4, f8
+; CHECK-P9-NEXT:    mffprwz r5, f7
+; CHECK-P9-NEXT:    xscvdpsxws f7, f6
+; CHECK-P9-NEXT:    xxswapd vs6, vs6
+; CHECK-P9-NEXT:    rlwimi r5, r4, 16, 0, 15
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    mffprwz r4, f7
+; CHECK-P9-NEXT:    mffprwz r6, f6
+; CHECK-P9-NEXT:    xscvdpsxws f6, f5
+; CHECK-P9-NEXT:    xxswapd vs5, vs5
+; CHECK-P9-NEXT:    rlwimi r6, r4, 16, 0, 15
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    rldimi r6, r5, 32, 0
+; CHECK-P9-NEXT:    mffprwz r4, f6
+; CHECK-P9-NEXT:    mffprwz r7, f5
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f4
 ; CHECK-P9-NEXT:    xxswapd vs4, vs4
-; CHECK-P9-NEXT:    mtvsrd v5, r5
-; CHECK-P9-NEXT:    mffprwz r5, f1
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    lxv vs1, 96(r4)
+; CHECK-P9-NEXT:    rlwimi r7, r4, 16, 0, 15
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    vmrghh v2, v2, v5
-; CHECK-P9-NEXT:    mtvsrd v5, r5
-; CHECK-P9-NEXT:    mffprwz r5, f0
-; CHECK-P9-NEXT:    lxv vs0, 112(r4)
-; CHECK-P9-NEXT:    vmrghh v3, v3, v5
-; CHECK-P9-NEXT:    mtvsrd v5, r5
 ; CHECK-P9-NEXT:    mffprwz r4, f5
-; CHECK-P9-NEXT:    vmrghh v4, v4, v5
-; CHECK-P9-NEXT:    xxmrglw vs6, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r4
+; CHECK-P9-NEXT:    mffprwz r8, f4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
+; CHECK-P9-NEXT:    rlwimi r8, r4, 16, 0, 15
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    rldimi r8, r7, 32, 0
 ; CHECK-P9-NEXT:    mffprwz r4, f4
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    mffprwz r4, f7
-; CHECK-P9-NEXT:    vmrghh v2, v2, v3
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xscvdpsxws f4, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xxmrglw vs4, v2, v4
-; CHECK-P9-NEXT:    mtvsrd v2, r4
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    xxmrgld vs4, vs4, vs6
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f1
+; CHECK-P9-NEXT:    mffprwz r9, f3
+; CHECK-P9-NEXT:    mtvsrdd vs3, r8, r6
+; CHECK-P9-NEXT:    rlwimi r9, r4, 16, 0, 15
+; CHECK-P9-NEXT:    stxv vs3, 0(r3)
+; CHECK-P9-NEXT:    mffprwz r4, f4
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    stxv vs4, 0(r3)
-; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    rlwimi r5, r4, 16, 0, 15
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r4
-; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    rldimi r5, r9, 32, 0
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    mffprwz r6, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    rlwimi r6, r4, 16, 0, 15
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    xxmrgld vs0, vs0, vs2
+; CHECK-P9-NEXT:    mffprwz r7, f0
+; CHECK-P9-NEXT:    rlwimi r7, r4, 16, 0, 15
+; CHECK-P9-NEXT:    rldimi r7, r6, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd vs0, r7, r5
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs7, 48(r4)
-; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    lxv vs1, 80(r4)
-; CHECK-BE-NEXT:    lxv vs2, 96(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f9, f7
+; CHECK-BE-NEXT:    lxv vs7, 32(r4)
+; CHECK-BE-NEXT:    lxv vs6, 48(r4)
+; CHECK-BE-NEXT:    lxv vs5, 0(r4)
+; CHECK-BE-NEXT:    lxv vs4, 16(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f8, f7
 ; CHECK-BE-NEXT:    xxswapd vs7, vs7
-; CHECK-BE-NEXT:    lxv vs3, 112(r4)
-; CHECK-BE-NEXT:    lxv vs4, 0(r4)
-; CHECK-BE-NEXT:    lxv vs5, 16(r4)
-; CHECK-BE-NEXT:    lxv vs6, 32(r4)
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs8, 0(r4)
+; CHECK-BE-NEXT:    lxv vs3, 96(r4)
+; CHECK-BE-NEXT:    lxv vs0, 80(r4)
+; CHECK-BE-NEXT:    lxv vs1, 64(r4)
+; CHECK-BE-NEXT:    lxv vs2, 112(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mffprwz r4, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r4
-; CHECK-BE-NEXT:    mffprwz r4, f7
-; CHECK-BE-NEXT:    mtfprwz f7, r4
-; CHECK-BE-NEXT:    xxperm vs7, vs9, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f6
+; CHECK-BE-NEXT:    mffprwz r4, f8
+; CHECK-BE-NEXT:    mffprwz r5, f7
+; CHECK-BE-NEXT:    xscvdpsxws f7, f6
 ; CHECK-BE-NEXT:    xxswapd vs6, vs6
+; CHECK-BE-NEXT:    rlwimi r5, r4, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    mffprwz r4, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r4
-; CHECK-BE-NEXT:    mffprwz r4, f6
-; CHECK-BE-NEXT:    mtfprwz f6, r4
-; CHECK-BE-NEXT:    xxperm vs6, vs9, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f5
+; CHECK-BE-NEXT:    mffprwz r4, f7
+; CHECK-BE-NEXT:    mffprwz r6, f6
+; CHECK-BE-NEXT:    xscvdpsxws f6, f5
 ; CHECK-BE-NEXT:    xxswapd vs5, vs5
+; CHECK-BE-NEXT:    rlwimi r6, r4, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xxmrghw vs6, vs6, vs7
-; CHECK-BE-NEXT:    mffprwz r4, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r4
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r4
-; CHECK-BE-NEXT:    xxperm vs5, vs9, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f4
+; CHECK-BE-NEXT:    rldimi r6, r5, 32, 0
+; CHECK-BE-NEXT:    mffprwz r4, f6
+; CHECK-BE-NEXT:    mffprwz r7, f5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
+; CHECK-BE-NEXT:    rlwimi r7, r4, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r4, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r4
-; CHECK-BE-NEXT:    mffprwz r4, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r4
-; CHECK-BE-NEXT:    xxperm vs4, vs9, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f3
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    mffprwz r8, f4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    rlwimi r8, r4, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xxmrghw vs4, vs4, vs5
-; CHECK-BE-NEXT:    xscvdpsxws f5, f2
+; CHECK-BE-NEXT:    rldimi r8, r7, 32, 0
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs6
-; CHECK-BE-NEXT:    mffprwz r4, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r4
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r4
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r4
-; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    xxperm vs3, vs9, vs8
-; CHECK-BE-NEXT:    mtfprwz f2, r4
-; CHECK-BE-NEXT:    xxperm vs2, vs5, vs8
-; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f1
+; CHECK-BE-NEXT:    mffprwz r9, f3
+; CHECK-BE-NEXT:    mtvsrdd vs3, r8, r6
+; CHECK-BE-NEXT:    rlwimi r9, r4, 16, 0, 15
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    rlwimi r5, r4, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r4
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r4
-; CHECK-BE-NEXT:    xxperm vs1, vs3, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f3, f0
+; CHECK-BE-NEXT:    rldimi r5, r9, 32, 0
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    mffprwz r6, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    rlwimi r6, r4, 16, 0, 15
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r4
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    mtfprwz f0, r4
-; CHECK-BE-NEXT:    xxperm vs0, vs3, vs8
-; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs2
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r7, f0
+; CHECK-BE-NEXT:    rlwimi r7, r4, 16, 0, 15
+; CHECK-BE-NEXT:    rldimi r7, r6, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd vs0, r7, r5
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -559,47 +463,35 @@ entry:
 define i32 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghh v2, v2, v3
-; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, v2
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r3, r4, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xscvdpsxws f0, v2
-; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, v2
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    or r3, r4, r3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxswapd vs2, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, v2
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
-; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
+; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    or r3, r4, r3
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = fptosi <2 x double> %a to <2 x i16>
@@ -610,26 +502,26 @@ entry:
 define i64 @test4elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
 ; CHECK-P8-NEXT:    xxswapd vs1, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f2
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r3, r4, r3
+; CHECK-P8-NEXT:    mtfprwz f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f3
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    xxswapd vs3, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghh v2, v3, v2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghh v3, v4, v3
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r3, r4, r3
+; CHECK-P8-NEXT:    mtfprwz f1, r3
+; CHECK-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
@@ -638,23 +530,23 @@ define i64 @test4elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    xscvdpsxws f2, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    slwi r4, r4, 16
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f0
-; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    or r3, r4, r3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mtfprwz f1, r3
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    or r3, r4, r3
+; CHECK-P9-NEXT:    mtfprwz f0, r3
+; CHECK-P9-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
@@ -662,27 +554,24 @@ define i64 @test4elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f3, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm vs1, vs3, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtfprwz f0, r3
-; CHECK-BE-NEXT:    xxperm vs0, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-BE-NEXT:    mffprd r3, f0
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    or r3, r4, r3
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    or r3, r4, r3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    vmrgow v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <4 x double>, ptr %0, align 32
@@ -695,136 +584,123 @@ define <8 x i16> @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    lxvd2x vs6, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs7, vs6
 ; CHECK-P8-NEXT:    xxswapd vs3, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-P8-NEXT:    xxswapd vs2, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f6
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    xxswapd vs5, vs4
 ; CHECK-P8-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    xxswapd vs7, vs6
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    vmrghh v2, v5, v2
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    mffprwz r3, f7
-; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    xscvdpsxws f4, f5
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r3, r4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f3
+; CHECK-P8-NEXT:    mffprwz r5, f1
+; CHECK-P8-NEXT:    slwi r5, r5, 16
+; CHECK-P8-NEXT:    or r4, r5, r4
+; CHECK-P8-NEXT:    rldimi r4, r3, 32, 0
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghh v3, v5, v3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT:    vmrghh v4, v5, v4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    vmrghh v5, v0, v5
-; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f2
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f6
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r3, r4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f7
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    slwi r5, r5, 16
+; CHECK-P8-NEXT:    or r4, r5, r4
+; CHECK-P8-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P8-NEXT:    mtfprd f0, r4
+; CHECK-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    lxv vs0, 48(r3)
-; CHECK-P9-NEXT:    lxv vs1, 32(r3)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
+; CHECK-P9-NEXT:    lxv vs3, 16(r3)
+; CHECK-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-P9-NEXT:    lxv vs1, 48(r3)
+; CHECK-P9-NEXT:    lxv vs0, 32(r3)
+; CHECK-P9-NEXT:    xxswapd vs4, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghh v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    or r3, r4, r3
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f0
-; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    slwi r5, r5, 16
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    or r4, r5, r4
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs2
+; CHECK-P9-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P9-NEXT:    slwi r5, r5, 16
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    or r3, r5, r3
+; CHECK-P9-NEXT:    mffprwz r6, f0
+; CHECK-P9-NEXT:    slwi r6, r6, 16
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    or r5, r6, r5
+; CHECK-P9-NEXT:    rldimi r5, r3, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd v2, r5, r4
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs4, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f5, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    lxv vs3, 32(r3)
+; CHECK-BE-NEXT:    lxv vs2, 48(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    lxv vs0, 16(r3)
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    xxperm vs3, vs5, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    xxperm vs2, vs5, vs4
-; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    or r3, r4, r3
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    xxperm vs1, vs3, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    slwi r5, r5, 16
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    or r4, r5, r4
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtfprwz f0, r3
-; CHECK-BE-NEXT:    xxperm vs0, vs3, vs4
-; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
+; CHECK-BE-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-BE-NEXT:    slwi r5, r5, 16
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    or r3, r5, r3
+; CHECK-BE-NEXT:    mffprwz r6, f0
+; CHECK-BE-NEXT:    slwi r6, r6, 16
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    or r5, r6, r5
+; CHECK-BE-NEXT:    rldimi r5, r3, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd v2, r5, r4
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, ptr %0, align 64
@@ -835,265 +711,242 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs5, 0, r4
-; CHECK-P8-NEXT:    li r5, 80
-; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r5
+; CHECK-P8-NEXT:    li r11, 80
+; CHECK-P8-NEXT:    li r10, 64
+; CHECK-P8-NEXT:    li r9, 112
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs9, r4, r6
-; CHECK-P8-NEXT:    li r6, 64
-; CHECK-P8-NEXT:    lxvd2x vs7, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs11, r4, r6
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs12, r4, r6
-; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r8, 96
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs7, r4, r11
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r10
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r9
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
-; CHECK-P8-NEXT:    li r6, 112
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r6
-; CHECK-P8-NEXT:    xxswapd vs6, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r8
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxswapd vs11, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    xxswapd vs10, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f6, f6
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    xxswapd vs10, vs9
-; CHECK-P8-NEXT:    xscvdpsxws f9, f9
-; CHECK-P8-NEXT:    xxswapd vs4, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    xxswapd vs9, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    xxswapd vs8, vs4
 ; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    xxswapd vs8, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    mffprwz r4, f7
-; CHECK-P8-NEXT:    xxswapd vs13, vs11
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
+; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    xscvdpsxws f9, f9
 ; CHECK-P8-NEXT:    xscvdpsxws f8, f8
-; CHECK-P8-NEXT:    xscvdpsxws f13, f13
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    xxswapd v2, vs12
-; CHECK-P8-NEXT:    xscvdpsxws f12, f12
-; CHECK-P8-NEXT:    mffprwz r4, f9
+; CHECK-P8-NEXT:    xxswapd v2, vs3
 ; CHECK-P8-NEXT:    xscvdpsxws v2, v2
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mffprwz r4, f12
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f11
-; CHECK-P8-NEXT:    xxswapd v3, vs2
-; CHECK-P8-NEXT:    xscvdpsxws v3, v3
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xxswapd vs13, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f13, f13
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mffprwz r4, f8
-; CHECK-P8-NEXT:    xxswapd vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs12, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f12, f12
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r4, f10
-; CHECK-P8-NEXT:    mtvsrd v10, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, v2
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    mffprwz r4, f13
-; CHECK-P8-NEXT:    vmrghh v4, v8, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    vmrghh v5, v9, v5
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, v3
-; CHECK-P8-NEXT:    vmrghh v0, v10, v0
-; CHECK-P8-NEXT:    vmrghh v2, v2, v1
-; CHECK-P8-NEXT:    vmrghh v3, v8, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mffprwz r6, f6
+; CHECK-P8-NEXT:    mffprwz r7, f5
+; CHECK-P8-NEXT:    mffprwz r8, f4
+; CHECK-P8-NEXT:    mffprwz r9, f3
+; CHECK-P8-NEXT:    mffprwz r10, f2
+; CHECK-P8-NEXT:    xscvdpsxws f2, v3
+; CHECK-P8-NEXT:    mffprwz r11, f1
+; CHECK-P8-NEXT:    mffprwz r12, f11
+; CHECK-P8-NEXT:    slwi r12, r12, 16
+; CHECK-P8-NEXT:    or r4, r12, r4
+; CHECK-P8-NEXT:    mffprwz r12, f10
+; CHECK-P8-NEXT:    slwi r12, r12, 16
+; CHECK-P8-NEXT:    or r6, r12, r6
+; CHECK-P8-NEXT:    mffprwz r12, f9
+; CHECK-P8-NEXT:    rldimi r6, r4, 32, 0
 ; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    vmrghh v1, v9, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xxmrglw vs2, v1, v3
-; CHECK-P8-NEXT:    xxmrglw vs1, v2, v0
-; CHECK-P8-NEXT:    vmrghh v6, v6, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxmrglw vs0, v5, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    vmrghh v7, v7, v8
-; CHECK-P8-NEXT:    xxmrglw vs3, v7, v6
-; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    mtfprd f1, r6
+; CHECK-P8-NEXT:    mffprwz r6, f0
+; CHECK-P8-NEXT:    slwi r12, r12, 16
+; CHECK-P8-NEXT:    or r7, r12, r7
+; CHECK-P8-NEXT:    mffprwz r12, f8
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    or r4, r4, r6
+; CHECK-P8-NEXT:    slwi r12, r12, 16
+; CHECK-P8-NEXT:    or r8, r12, r8
+; CHECK-P8-NEXT:    mfvsrwz r12, v2
+; CHECK-P8-NEXT:    rldimi r8, r7, 32, 0
+; CHECK-P8-NEXT:    mtfprd f3, r8
+; CHECK-P8-NEXT:    slwi r12, r12, 16
+; CHECK-P8-NEXT:    or r9, r12, r9
+; CHECK-P8-NEXT:    mffprwz r12, f13
+; CHECK-P8-NEXT:    slwi r12, r12, 16
+; CHECK-P8-NEXT:    or r10, r12, r10
+; CHECK-P8-NEXT:    mffprwz r12, f12
+; CHECK-P8-NEXT:    rldimi r10, r9, 32, 0
+; CHECK-P8-NEXT:    mtfprd f2, r10
+; CHECK-P8-NEXT:    slwi r12, r12, 16
+; CHECK-P8-NEXT:    or r11, r12, r11
+; CHECK-P8-NEXT:    rldimi r4, r11, 32, 0
+; CHECK-P8-NEXT:    xxmrghd vs0, vs3, vs1
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs1
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 0(r4)
-; CHECK-P9-NEXT:    lxv vs1, 16(r4)
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f3, f2
-; CHECK-P9-NEXT:    xscvdpsxws f4, f1
-; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f5, f0
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    lxv vs3, 64(r4)
-; CHECK-P9-NEXT:    mtvsrd v2, r5
-; CHECK-P9-NEXT:    mffprwz r5, f4
-; CHECK-P9-NEXT:    lxv vs4, 48(r4)
-; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    mffprwz r5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f7, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    mtvsrd v4, r5
-; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    lxv vs2, 80(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f5, f4
-; CHECK-P9-NEXT:    xxswapd vs4, vs4
-; CHECK-P9-NEXT:    mtvsrd v5, r5
-; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    lxv vs7, 16(r4)
+; CHECK-P9-NEXT:    lxv vs6, 0(r4)
+; CHECK-P9-NEXT:    lxv vs0, 96(r4)
+; CHECK-P9-NEXT:    lxv vs1, 112(r4)
+; CHECK-P9-NEXT:    xxswapd vs8, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    lxv vs2, 64(r4)
+; CHECK-P9-NEXT:    lxv vs3, 80(r4)
+; CHECK-P9-NEXT:    lxv vs4, 32(r4)
+; CHECK-P9-NEXT:    lxv vs5, 48(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    mffprwz r4, f7
+; CHECK-P9-NEXT:    xxswapd vs7, vs6
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    slwi r4, r4, 16
+; CHECK-P9-NEXT:    mffprwz r5, f8
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    or r4, r4, r5
+; CHECK-P9-NEXT:    mffprwz r5, f6
+; CHECK-P9-NEXT:    xxswapd vs6, vs5
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    slwi r5, r5, 16
+; CHECK-P9-NEXT:    mffprwz r6, f7
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    or r5, r5, r6
+; CHECK-P9-NEXT:    mffprwz r6, f5
+; CHECK-P9-NEXT:    xxswapd vs5, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-P9-NEXT:    slwi r6, r6, 16
+; CHECK-P9-NEXT:    mffprwz r7, f6
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    or r6, r6, r7
+; CHECK-P9-NEXT:    mffprwz r7, f4
+; CHECK-P9-NEXT:    xxswapd vs4, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    lxv vs1, 96(r4)
+; CHECK-P9-NEXT:    slwi r7, r7, 16
+; CHECK-P9-NEXT:    mffprwz r8, f5
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    vmrghh v2, v2, v5
-; CHECK-P9-NEXT:    mtvsrd v5, r5
-; CHECK-P9-NEXT:    mffprwz r5, f0
-; CHECK-P9-NEXT:    lxv vs0, 112(r4)
-; CHECK-P9-NEXT:    vmrghh v3, v3, v5
-; CHECK-P9-NEXT:    mtvsrd v5, r5
-; CHECK-P9-NEXT:    mffprwz r4, f5
-; CHECK-P9-NEXT:    vmrghh v4, v4, v5
-; CHECK-P9-NEXT:    xxmrglw vs6, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r4
-; CHECK-P9-NEXT:    mffprwz r4, f4
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    mffprwz r4, f7
-; CHECK-P9-NEXT:    vmrghh v2, v2, v3
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs2
+; CHECK-P9-NEXT:    or r7, r7, r8
+; CHECK-P9-NEXT:    mffprwz r8, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xxmrglw vs4, v2, v4
-; CHECK-P9-NEXT:    mtvsrd v2, r4
-; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    xxmrgld vs4, vs4, vs6
+; CHECK-P9-NEXT:    rldimi r7, r6, 32, 0
+; CHECK-P9-NEXT:    slwi r8, r8, 16
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mffprwz r9, f4
+; CHECK-P9-NEXT:    or r8, r8, r9
 ; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    stxv vs4, 0(r3)
-; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    mtvsrdd vs3, r7, r5
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r4, f3
-; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r4
-; CHECK-P9-NEXT:    mffprwz r4, f1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f0
-; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    slwi r5, r5, 16
+; CHECK-P9-NEXT:    stxv vs3, 0(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    or r4, r5, r4
+; CHECK-P9-NEXT:    rldimi r4, r8, 32, 0
+; CHECK-P9-NEXT:    mffprwz r6, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    xxmrgld vs0, vs0, vs2
+; CHECK-P9-NEXT:    slwi r6, r6, 16
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    or r5, r6, r5
+; CHECK-P9-NEXT:    mffprwz r7, f0
+; CHECK-P9-NEXT:    slwi r7, r7, 16
+; CHECK-P9-NEXT:    mffprwz r6, f1
+; CHECK-P9-NEXT:    or r6, r7, r6
+; CHECK-P9-NEXT:    rldimi r6, r5, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd vs0, r6, r4
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs7, 48(r4)
-; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    lxv vs1, 80(r4)
-; CHECK-BE-NEXT:    lxv vs2, 96(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f9, f7
-; CHECK-BE-NEXT:    xxswapd vs7, vs7
-; CHECK-BE-NEXT:    lxv vs3, 112(r4)
-; CHECK-BE-NEXT:    lxv vs4, 0(r4)
-; CHECK-BE-NEXT:    lxv vs5, 16(r4)
-; CHECK-BE-NEXT:    lxv vs6, 32(r4)
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs8, 0(r4)
+; CHECK-BE-NEXT:    lxv vs7, 32(r4)
+; CHECK-BE-NEXT:    lxv vs6, 48(r4)
+; CHECK-BE-NEXT:    lxv vs0, 80(r4)
+; CHECK-BE-NEXT:    lxv vs1, 64(r4)
+; CHECK-BE-NEXT:    xxswapd vs8, vs7
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mffprwz r4, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r4
+; CHECK-BE-NEXT:    lxv vs2, 112(r4)
+; CHECK-BE-NEXT:    lxv vs3, 96(r4)
+; CHECK-BE-NEXT:    lxv vs4, 16(r4)
+; CHECK-BE-NEXT:    lxv vs5, 0(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f8, f8
 ; CHECK-BE-NEXT:    mffprwz r4, f7
-; CHECK-BE-NEXT:    mtfprwz f7, r4
-; CHECK-BE-NEXT:    xxperm vs7, vs9, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f6
-; CHECK-BE-NEXT:    xxswapd vs6, vs6
+; CHECK-BE-NEXT:    xxswapd vs7, vs6
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    mffprwz r4, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r4
-; CHECK-BE-NEXT:    mffprwz r4, f6
-; CHECK-BE-NEXT:    mtfprwz f6, r4
-; CHECK-BE-NEXT:    xxperm vs6, vs9, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f5
-; CHECK-BE-NEXT:    xxswapd vs5, vs5
+; CHECK-BE-NEXT:    slwi r4, r4, 16
+; CHECK-BE-NEXT:    mffprwz r5, f8
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    or r4, r4, r5
+; CHECK-BE-NEXT:    mffprwz r5, f6
+; CHECK-BE-NEXT:    xxswapd vs6, vs5
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xxmrghw vs6, vs6, vs7
-; CHECK-BE-NEXT:    mffprwz r4, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r4
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r4
-; CHECK-BE-NEXT:    xxperm vs5, vs9, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f4
-; CHECK-BE-NEXT:    xxswapd vs4, vs4
+; CHECK-BE-NEXT:    slwi r5, r5, 16
+; CHECK-BE-NEXT:    mffprwz r6, f7
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    or r5, r5, r6
+; CHECK-BE-NEXT:    mffprwz r6, f5
+; CHECK-BE-NEXT:    xxswapd vs5, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r4, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r4
-; CHECK-BE-NEXT:    mffprwz r4, f4
-; CHECK-BE-NEXT:    mtfprwz f4, r4
-; CHECK-BE-NEXT:    xxperm vs4, vs9, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-BE-NEXT:    slwi r6, r6, 16
+; CHECK-BE-NEXT:    mffprwz r7, f6
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    or r6, r6, r7
+; CHECK-BE-NEXT:    mffprwz r7, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xxmrghw vs4, vs4, vs5
-; CHECK-BE-NEXT:    xscvdpsxws f5, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    slwi r7, r7, 16
+; CHECK-BE-NEXT:    mffprwz r8, f5
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    or r7, r7, r8
+; CHECK-BE-NEXT:    mffprwz r8, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs6
-; CHECK-BE-NEXT:    mffprwz r4, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r4
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
+; CHECK-BE-NEXT:    rldimi r7, r6, 32, 0
+; CHECK-BE-NEXT:    slwi r8, r8, 16
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r9, f4
+; CHECK-BE-NEXT:    or r8, r8, r9
 ; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r4
-; CHECK-BE-NEXT:    mffprwz r4, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r4
-; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    xxperm vs3, vs9, vs8
-; CHECK-BE-NEXT:    mtfprwz f2, r4
-; CHECK-BE-NEXT:    xxperm vs2, vs5, vs8
-; CHECK-BE-NEXT:    xxmrghw vs2, vs2, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    mtvsrdd vs3, r7, r5
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r4
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r4
-; CHECK-BE-NEXT:    xxperm vs1, vs3, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f3, f0
-; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    slwi r5, r5, 16
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    or r4, r5, r4
+; CHECK-BE-NEXT:    rldimi r4, r8, 32, 0
+; CHECK-BE-NEXT:    mffprwz r6, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r4
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    mtfprwz f0, r4
-; CHECK-BE-NEXT:    xxperm vs0, vs3, vs8
-; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs1
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs2
+; CHECK-BE-NEXT:    slwi r6, r6, 16
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    or r5, r6, r5
+; CHECK-BE-NEXT:    mffprwz r7, f0
+; CHECK-BE-NEXT:    slwi r7, r7, 16
+; CHECK-BE-NEXT:    mffprwz r6, f1
+; CHECK-BE-NEXT:    or r6, r7, r6
+; CHECK-BE-NEXT:    rldimi r6, r5, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd vs0, r6, r4
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
index 770689ba98049b1..8d0064666fdcaac 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
@@ -12,18 +12,13 @@
 define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghb v2, v2, v3
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprd r3, f0
-; CHECK-P8-NEXT:    clrldi r3, r3, 48
-; CHECK-P8-NEXT:    sth r3, -2(r1)
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    rlwimi r4, r3, 8, 0, 23
+; CHECK-P8-NEXT:    sth r4, -2(r1)
 ; CHECK-P8-NEXT:    lhz r3, -2(r1)
 ; CHECK-P8-NEXT:    blr
 ;
@@ -32,33 +27,17 @@ define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xscvdpsxws f0, v2
 ; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    addi r3, r1, -2
-; CHECK-P9-NEXT:    vmrghb v2, v3, v2
-; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
-; CHECK-P9-NEXT:    stxsihx v2, 0, r3
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    rlwimi r4, r3, 8, 0, 23
+; CHECK-P9-NEXT:    sth r4, -2(r1)
 ; CHECK-P9-NEXT:    lhz r3, -2(r1)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxswapd vs2, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, v2
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
-; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
-; CHECK-BE-NEXT:    stxsihx v2, 0, r3
+; CHECK-BE-NEXT:    li r3, -1
+; CHECK-BE-NEXT:    sth r3, -2(r1)
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -79,19 +58,13 @@ define i32 @test4elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f2
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    rlwimi r3, r4, 8, 16, 23
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xxswapd vs3, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghb v2, v3, v2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghb v3, v4, v3
-; CHECK-P8-NEXT:    vmrglh v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f3
+; CHECK-P8-NEXT:    rlwimi r3, r4, 16, 8, 15
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    rlwimi r3, r4, 24, 0, 7
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt:
@@ -101,50 +74,35 @@ define i32 @test4elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    mffprwz r4, f2
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f0
-; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghb v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    rlwimi r3, r4, 8, 16, 23
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    rlwimi r3, r4, 16, 8, 15
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    rlwimi r3, r4, 24, 0, 7
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f0
-; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xxperm v2, vs3, vs2
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xxperm v3, vs1, vs2
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    rlwimi r3, r4, 8, 16, 23
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    rlwimi r3, r4, 16, 8, 15
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    rlwimi r3, r4, 24, 0, 7
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <4 x double>, ptr %0, align 32
@@ -156,48 +114,42 @@ entry:
 define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r3
 ; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
-; CHECK-P8-NEXT:    xxswapd vs3, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    xxswapd vs2, vs0
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f6
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    xxswapd vs1, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f6
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-P8-NEXT:    xxswapd vs5, vs4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    xxswapd vs7, vs6
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    vmrghb v2, v5, v2
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    mffprwz r3, f7
-; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    xscvdpsxws f2, f7
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    rlwimi r4, r3, 24, 0, 7
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mtfprwz f2, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f4
+; CHECK-P8-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f5
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghb v3, v5, v3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    vmrglh v2, v3, v2
-; CHECK-P8-NEXT:    vmrghb v4, v5, v4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    vmrghb v5, v0, v5
-; CHECK-P8-NEXT:    vmrglh v3, v5, v4
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    rlwimi r4, r3, 24, 0, 7
+; CHECK-P8-NEXT:    mtfprwz f0, r4
+; CHECK-P8-NEXT:    xxmrghw vs0, vs0, vs2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
@@ -206,91 +158,76 @@ define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs3, 0(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghb v2, v2, v3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f0
-; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    rlwimi r4, r3, 24, 0, 7
+; CHECK-P9-NEXT:    mtfprwz f2, r4
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
+; CHECK-P9-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    rlwimi r4, r3, 24, 0, 7
+; CHECK-P9-NEXT:    mtfprwz f0, r4
+; CHECK-P9-NEXT:    xxmrghw vs0, vs0, vs2
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs4, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f5, f3
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xxperm v2, vs5, vs4
+; CHECK-BE-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    rlwimi r4, r3, 24, 0, 7
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
+; CHECK-BE-NEXT:    mtvsrwz v2, r4
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f0
-; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xxperm v3, vs2, vs4
+; CHECK-BE-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs1, vs4
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-BE-NEXT:    mffprd r3, f0
+; CHECK-BE-NEXT:    rlwimi r4, r3, 24, 0, 7
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-NEXT:    vmrgow v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, ptr %0, align 64
@@ -302,261 +239,212 @@ entry:
 define <16 x i8> @test16elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    lxvd2x vs4, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs7, r3, r4
-; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs9, r3, r4
 ; CHECK-P8-NEXT:    li r4, 64
-; CHECK-P8-NEXT:    lxvd2x vs12, r3, r4
-; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    li r5, 96
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r5, 80
 ; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs12, r3, r5
+; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lxvd2x vs9, r3, r4
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    xxswapd vs5, vs4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    xxswapd vs13, vs3
+; CHECK-P8-NEXT:    lxvd2x vs13, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs5, vs3
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvdpsxws f13, f13
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    xxswapd vs8, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f8, f8
+; CHECK-P8-NEXT:    xxswapd vs7, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxswapd vs4, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xxswapd v2, vs12
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mffprwz r5, f4
+; CHECK-P8-NEXT:    rlwimi r3, r5, 8, 16, 23
+; CHECK-P8-NEXT:    mffprwz r5, f5
 ; CHECK-P8-NEXT:    xxswapd vs10, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f6, f6
 ; CHECK-P8-NEXT:    xscvdpsxws f10, f10
-; CHECK-P8-NEXT:    xxswapd vs8, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    mffprwz r4, f7
-; CHECK-P8-NEXT:    xscvdpsxws f8, f8
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    rlwimi r4, r5, 8, 16, 23
 ; CHECK-P8-NEXT:    xxswapd vs11, vs9
 ; CHECK-P8-NEXT:    xscvdpsxws f9, f9
-; CHECK-P8-NEXT:    mffprwz r3, f9
-; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    mffprwz r5, f9
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f8
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    xxswapd v2, vs12
-; CHECK-P8-NEXT:    xscvdpsxws f12, f12
-; CHECK-P8-NEXT:    mffprwz r3, f12
-; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mffprwz r3, f5
-; CHECK-P8-NEXT:    xscvdpsxws v2, v2
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    mffprwz r3, f11
-; CHECK-P8-NEXT:    xxswapd v3, vs2
-; CHECK-P8-NEXT:    xscvdpsxws v3, v3
-; CHECK-P8-NEXT:    mffprwz r4, f10
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xxswapd vs1, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghb v4, v8, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, v2
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    mffprwz r4, f13
-; CHECK-P8-NEXT:    vmrghb v5, v9, v5
-; CHECK-P8-NEXT:    vmrghb v0, v8, v0
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, v3
-; CHECK-P8-NEXT:    vmrglh v4, v5, v4
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    vmrghb v2, v2, v1
-; CHECK-P8-NEXT:    vmrghb v1, v8, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    vmrglh v2, v2, v0
-; CHECK-P8-NEXT:    vmrghb v3, v3, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r3
+; CHECK-P8-NEXT:    rlwimi r3, r5, 16, 8, 15
+; CHECK-P8-NEXT:    mffprwz r5, f6
+; CHECK-P8-NEXT:    rlwimi r4, r5, 16, 8, 15
+; CHECK-P8-NEXT:    mffprwz r5, f11
+; CHECK-P8-NEXT:    xxswapd v3, vs13
+; CHECK-P8-NEXT:    rlwimi r3, r5, 24, 0, 7
+; CHECK-P8-NEXT:    mffprwz r5, f10
+; CHECK-P8-NEXT:    rlwimi r4, r5, 24, 0, 7
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f12
+; CHECK-P8-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P8-NEXT:    mffprwz r3, f8
+; CHECK-P8-NEXT:    mtfprd f2, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f13
+; CHECK-P8-NEXT:    rlwimi r4, r3, 8, 16, 23
 ; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    vmrglh v3, v3, v1
-; CHECK-P8-NEXT:    vmrghb v6, v6, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r3
+; CHECK-P8-NEXT:    xscvdpsxws f1, v3
+; CHECK-P8-NEXT:    rlwimi r4, r3, 16, 8, 15
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f7
+; CHECK-P8-NEXT:    rlwimi r4, r3, 24, 0, 7
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    rlwimi r5, r3, 8, 16, 23
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, v2
+; CHECK-P8-NEXT:    rlwimi r5, r3, 16, 8, 15
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    vmrghb v7, v7, v8
-; CHECK-P8-NEXT:    vmrglh v5, v7, v6
-; CHECK-P8-NEXT:    xxmrglw vs1, v5, v3
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    rlwimi r5, r3, 24, 0, 7
+; CHECK-P8-NEXT:    rldimi r5, r4, 32, 0
+; CHECK-P8-NEXT:    mtfprd f0, r5
+; CHECK-P8-NEXT:    xxmrghd v2, vs0, vs2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs7, 0(r3)
-; CHECK-P9-NEXT:    lxv vs6, 16(r3)
-; CHECK-P9-NEXT:    lxv vs0, 112(r3)
-; CHECK-P9-NEXT:    lxv vs1, 96(r3)
+; CHECK-P9-NEXT:    lxv vs7, 32(r3)
+; CHECK-P9-NEXT:    lxv vs6, 48(r3)
+; CHECK-P9-NEXT:    lxv vs5, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 80(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f8, f7
 ; CHECK-P9-NEXT:    xxswapd vs7, vs7
-; CHECK-P9-NEXT:    lxv vs2, 80(r3)
-; CHECK-P9-NEXT:    lxv vs3, 64(r3)
-; CHECK-P9-NEXT:    lxv vs4, 48(r3)
-; CHECK-P9-NEXT:    lxv vs5, 32(r3)
+; CHECK-P9-NEXT:    lxv vs1, 64(r3)
+; CHECK-P9-NEXT:    lxv vs2, 112(r3)
+; CHECK-P9-NEXT:    lxv vs3, 96(r3)
+; CHECK-P9-NEXT:    lxv vs4, 16(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    mffprwz r3, f8
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f7
-; CHECK-P9-NEXT:    xscvdpsxws f7, f6
-; CHECK-P9-NEXT:    xxswapd vs6, vs6
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r4, f7
+; CHECK-P9-NEXT:    xxswapd vs7, vs6
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    vmrghb v2, v2, v3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    mffprwz r3, f7
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-P9-NEXT:    mffprwz r3, f6
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f5
 ; CHECK-P9-NEXT:    xxswapd vs5, vs5
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    rlwimi r4, r3, 24, 0, 7
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f6
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r5, f6
 ; CHECK-P9-NEXT:    mffprwz r3, f5
-; CHECK-P9-NEXT:    xscvdpsxws f5, f4
-; CHECK-P9-NEXT:    xxswapd vs4, vs4
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    xxswapd vs5, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f5
-; CHECK-P9-NEXT:    xscvdpsxws f5, f3
+; CHECK-P9-NEXT:    rlwimi r3, r5, 8, 16, 23
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    mffprwz r5, f5
+; CHECK-P9-NEXT:    rlwimi r3, r5, 16, 8, 15
+; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    rlwimi r3, r5, 24, 0, 7
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    mffprwz r3, f5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    xxmrglw vs4, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    rldimi r3, r4, 32, 0
+; CHECK-P9-NEXT:    mffprwz r4, f4
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghb v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    rlwimi r5, r4, 8, 16, 23
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    rlwimi r5, r4, 16, 8, 15
+; CHECK-P9-NEXT:    mffprwz r4, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    rlwimi r5, r4, 24, 0, 7
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f0
-; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    mffprwz r6, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs4
+; CHECK-P9-NEXT:    rlwimi r6, r4, 8, 16, 23
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    rlwimi r6, r4, 16, 8, 15
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    rlwimi r6, r4, 24, 0, 7
+; CHECK-P9-NEXT:    rldimi r6, r5, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd v2, r6, r3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs7, 112(r3)
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f9, f7
+; CHECK-BE-NEXT:    lxv vs7, 80(r3)
+; CHECK-BE-NEXT:    lxv vs6, 64(r3)
+; CHECK-BE-NEXT:    lxv vs5, 112(r3)
+; CHECK-BE-NEXT:    lxv vs0, 32(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f8, f7
 ; CHECK-BE-NEXT:    xxswapd vs7, vs7
-; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs4, 64(r3)
-; CHECK-BE-NEXT:    lxv vs5, 80(r3)
-; CHECK-BE-NEXT:    lxv vs6, 96(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs8, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 48(r3)
+; CHECK-BE-NEXT:    lxv vs2, 0(r3)
+; CHECK-BE-NEXT:    lxv vs3, 16(r3)
+; CHECK-BE-NEXT:    lxv vs4, 96(r3)
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mffprwz r3, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r3
-; CHECK-BE-NEXT:    mffprwz r3, f7
-; CHECK-BE-NEXT:    xscvdpsxws f7, f6
-; CHECK-BE-NEXT:    xxswapd vs6, vs6
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    mffprwz r3, f8
+; CHECK-BE-NEXT:    mffprwz r4, f7
+; CHECK-BE-NEXT:    xxswapd vs7, vs6
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xxperm v2, vs9, vs8
+; CHECK-BE-NEXT:    rlwimi r4, r3, 8, 16, 23
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
 ; CHECK-BE-NEXT:    mffprwz r3, f7
-; CHECK-BE-NEXT:    mtfprwz f7, r3
+; CHECK-BE-NEXT:    rlwimi r4, r3, 16, 8, 15
 ; CHECK-BE-NEXT:    mffprwz r3, f6
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f5
 ; CHECK-BE-NEXT:    xxswapd vs5, vs5
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    rlwimi r4, r3, 24, 0, 7
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xxperm v3, vs7, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f6
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mtfprwz f6, r3
+; CHECK-BE-NEXT:    mffprwz r5, f6
 ; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    xscvdpsxws f5, f4
-; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xxswapd vs5, vs4
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xxperm v3, vs6, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs5, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f5, f3
+; CHECK-BE-NEXT:    rlwimi r3, r5, 8, 16, 23
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    rlwimi r3, r5, 16, 8, 15
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    rlwimi r3, r5, 24, 0, 7
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghw vs4, v3, v2
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    rldimi r3, r4, 32, 0
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xxperm v2, vs5, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    rlwimi r5, r4, 8, 16, 23
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    rlwimi r5, r4, 16, 8, 15
+; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    rlwimi r5, r4, 24, 0, 7
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xxperm v3, vs3, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f0
-; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    mffprwz r4, f2
+; CHECK-BE-NEXT:    mffprwz r6, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xxperm v3, vs2, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs1, vs8
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs4
+; CHECK-BE-NEXT:    rlwimi r6, r4, 8, 16, 23
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    rlwimi r6, r4, 16, 8, 15
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    rlwimi r6, r4, 24, 0, 7
+; CHECK-BE-NEXT:    rldimi r6, r5, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd v2, r6, r3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, ptr %0, align 128
@@ -567,53 +455,34 @@ entry:
 define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghb v2, v2, v3
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprd r3, f0
-; CHECK-P8-NEXT:    clrldi r3, r3, 48
+; CHECK-P8-NEXT:    xscvdpsxws f0, v2
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    slwi r4, r4, 8
+; CHECK-P8-NEXT:    or r3, r4, r3
 ; CHECK-P8-NEXT:    sth r3, -2(r1)
 ; CHECK-P8-NEXT:    lhz r3, -2(r1)
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xscvdpsxws f0, v2
-; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    addi r3, r1, -2
-; CHECK-P9-NEXT:    vmrghb v2, v3, v2
-; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
-; CHECK-P9-NEXT:    stxsihx v2, 0, r3
+; CHECK-P9-NEXT:    xscvdpsxws f0, v2
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    slwi r4, r4, 8
+; CHECK-P9-NEXT:    or r3, r4, r3
+; CHECK-P9-NEXT:    sth r3, -2(r1)
 ; CHECK-P9-NEXT:    lhz r3, -2(r1)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxswapd vs2, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, v2
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    xxperm v2, vs1, vs0
-; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
-; CHECK-BE-NEXT:    stxsihx v2, 0, r3
+; CHECK-BE-NEXT:    li r3, -1
+; CHECK-BE-NEXT:    sth r3, -2(r1)
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -625,81 +494,69 @@ entry:
 define i32 @test4elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
 ; CHECK-P8-NEXT:    xxswapd vs1, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f3
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    slwi r3, r3, 24
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f2
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    xxswapd vs3, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghb v2, v3, v2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghb v3, v4, v3
-; CHECK-P8-NEXT:    vmrglh v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    slwi r4, r4, 8
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    or r3, r3, r4
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    lxv vs1, 16(r3)
+; CHECK-P9-NEXT:    lxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    slwi r3, r3, 24
+; CHECK-P9-NEXT:    mffprwz r4, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    slwi r4, r4, 16
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghb v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    slwi r4, r4, 8
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    or r3, r3, r4
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f3, f1
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    lxv vs0, 16(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    slwi r3, r3, 24
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    slwi r4, r4, 16
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xxperm v2, vs3, vs2
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    xxperm v3, vs1, vs2
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    slwi r4, r4, 8
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    or r3, r3, r4
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <4 x double>, ptr %0, align 32
@@ -711,141 +568,138 @@ entry:
 define i64 @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    lxvd2x vs6, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 32
 ; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
-; CHECK-P8-NEXT:    xxswapd vs3, vs1
+; CHECK-P8-NEXT:    xxswapd vs7, vs6
+; CHECK-P8-NEXT:    xxswapd vs1, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f7
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    slwi r3, r3, 24
+; CHECK-P8-NEXT:    xxswapd vs5, vs4
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f6
+; CHECK-P8-NEXT:    slwi r4, r4, 8
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f5
+; CHECK-P8-NEXT:    mtfprwz f2, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    xxswapd vs2, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    slwi r3, r3, 24
+; CHECK-P8-NEXT:    or r3, r3, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f6
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    xxswapd vs5, vs4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    xxswapd vs7, vs6
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    vmrghb v2, v5, v2
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    mffprwz r3, f7
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    vmrghb v3, v5, v3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    vmrglh v2, v3, v2
-; CHECK-P8-NEXT:    vmrghb v4, v5, v4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
-; CHECK-P8-NEXT:    vmrghb v5, v0, v5
-; CHECK-P8-NEXT:    vmrglh v3, v5, v4
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f4
+; CHECK-P8-NEXT:    slwi r4, r4, 8
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mtfprwz f0, r3
+; CHECK-P8-NEXT:    xxmrghw vs0, vs0, vs2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    lxv vs0, 48(r3)
-; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    lxv vs3, 16(r3)
+; CHECK-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-P9-NEXT:    lxv vs1, 48(r3)
+; CHECK-P9-NEXT:    lxv vs0, 32(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    slwi r3, r3, 24
+; CHECK-P9-NEXT:    mffprwz r4, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    slwi r4, r4, 16
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghb v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    slwi r4, r4, 8
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mtfprwz f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mffprwz r4, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    slwi r3, r3, 24
+; CHECK-P9-NEXT:    slwi r4, r4, 16
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    slwi r4, r4, 8
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mtfprwz f0, r3
+; CHECK-P9-NEXT:    xxmrghw vs0, vs0, vs2
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs4, 0(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f5, f3
+; CHECK-BE-NEXT:    lxv vs3, 32(r3)
+; CHECK-BE-NEXT:    lxv vs2, 48(r3)
+; CHECK-BE-NEXT:    lxv vs0, 16(r3)
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    slwi r3, r3, 24
+; CHECK-BE-NEXT:    mffprwz r4, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    slwi r4, r4, 16
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xxperm v2, vs5, vs4
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    slwi r4, r4, 8
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    or r3, r3, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xxperm v3, vs3, vs4
+; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    slwi r3, r3, 24
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    slwi r4, r4, 16
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xxperm v3, vs2, vs4
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs1, vs4
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-BE-NEXT:    mffprd r3, f0
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    slwi r4, r4, 8
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f0
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    vmrgow v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, ptr %0, align 64
@@ -858,260 +712,247 @@ define <16 x i8> @test16elt_signed(ptr nocapture readonly) local_unnamed_addr #2
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    lxvd2x vs4, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r6, 16
+; CHECK-P8-NEXT:    li r5, 112
+; CHECK-P8-NEXT:    lxvd2x vs9, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs7, r3, r4
-; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs9, r3, r4
-; CHECK-P8-NEXT:    li r4, 64
-; CHECK-P8-NEXT:    lxvd2x vs12, r3, r4
-; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    li r6, 64
+; CHECK-P8-NEXT:    lxvd2x vs5, r3, r5
+; CHECK-P8-NEXT:    li r5, 32
 ; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    xxswapd vs5, vs4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    xxswapd vs13, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvdpsxws f13, f13
-; CHECK-P8-NEXT:    xxswapd vs10, vs6
-; CHECK-P8-NEXT:    xscvdpsxws f6, f6
-; CHECK-P8-NEXT:    xscvdpsxws f10, f10
-; CHECK-P8-NEXT:    xxswapd vs8, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    mffprwz r4, f7
-; CHECK-P8-NEXT:    xscvdpsxws f8, f8
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    lxvd2x vs8, r3, r5
+; CHECK-P8-NEXT:    lxvd2x vs12, r3, r6
+; CHECK-P8-NEXT:    lxvd2x vs13, r3, r4
 ; CHECK-P8-NEXT:    xxswapd vs11, vs9
 ; CHECK-P8-NEXT:    xscvdpsxws f9, f9
-; CHECK-P8-NEXT:    mffprwz r3, f9
-; CHECK-P8-NEXT:    mtvsrd v0, r3
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f8
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    xxswapd v2, vs12
-; CHECK-P8-NEXT:    xscvdpsxws f12, f12
-; CHECK-P8-NEXT:    mffprwz r3, f12
-; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mffprwz r3, f5
-; CHECK-P8-NEXT:    xscvdpsxws v2, v2
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    mffprwz r3, f11
-; CHECK-P8-NEXT:    xxswapd v3, vs2
-; CHECK-P8-NEXT:    xscvdpsxws v3, v3
-; CHECK-P8-NEXT:    mffprwz r4, f10
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xxswapd vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs3, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxswapd vs7, vs5
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    xxswapd vs6, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghb v4, v8, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, v2
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    mffprwz r4, f13
-; CHECK-P8-NEXT:    vmrghb v5, v9, v5
-; CHECK-P8-NEXT:    vmrghb v0, v8, v0
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, v3
-; CHECK-P8-NEXT:    vmrglh v4, v5, v4
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    vmrghb v2, v2, v1
-; CHECK-P8-NEXT:    vmrghb v1, v8, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r3
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    xxswapd vs4, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xxswapd vs10, vs8
+; CHECK-P8-NEXT:    xscvdpsxws f8, f8
+; CHECK-P8-NEXT:    xxswapd v2, vs12
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mffprwz r5, f5
+; CHECK-P8-NEXT:    xxswapd v3, vs13
 ; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    vmrglh v2, v2, v0
-; CHECK-P8-NEXT:    vmrghb v3, v3, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r3
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    vmrglh v3, v3, v1
-; CHECK-P8-NEXT:    vmrghb v6, v6, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r3
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v4
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    vmrghb v7, v7, v8
-; CHECK-P8-NEXT:    vmrglh v5, v7, v6
-; CHECK-P8-NEXT:    xxmrglw vs1, v5, v3
-; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f2, v3
+; CHECK-P8-NEXT:    mffprwz r6, f4
+; CHECK-P8-NEXT:    slwi r4, r4, 16
+; CHECK-P8-NEXT:    slwi r5, r5, 16
+; CHECK-P8-NEXT:    slwi r3, r3, 16
+; CHECK-P8-NEXT:    slwi r6, r6, 24
+; CHECK-P8-NEXT:    or r3, r6, r3
+; CHECK-P8-NEXT:    mffprwz r6, f3
+; CHECK-P8-NEXT:    slwi r6, r6, 24
+; CHECK-P8-NEXT:    or r4, r6, r4
+; CHECK-P8-NEXT:    mffprwz r6, f7
+; CHECK-P8-NEXT:    slwi r6, r6, 24
+; CHECK-P8-NEXT:    or r5, r6, r5
+; CHECK-P8-NEXT:    mffprwz r6, f10
+; CHECK-P8-NEXT:    slwi r6, r6, 8
+; CHECK-P8-NEXT:    or r3, r3, r6
+; CHECK-P8-NEXT:    mffprwz r6, f11
+; CHECK-P8-NEXT:    slwi r6, r6, 8
+; CHECK-P8-NEXT:    or r4, r4, r6
+; CHECK-P8-NEXT:    mffprwz r6, f8
+; CHECK-P8-NEXT:    or r3, r3, r6
+; CHECK-P8-NEXT:    mffprwz r6, f9
+; CHECK-P8-NEXT:    or r4, r4, r6
+; CHECK-P8-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f13
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    slwi r3, r3, 8
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f6
+; CHECK-P8-NEXT:    or r3, r5, r3
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, v2
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    slwi r5, r5, 16
+; CHECK-P8-NEXT:    slwi r4, r4, 24
+; CHECK-P8-NEXT:    or r4, r4, r5
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f12
+; CHECK-P8-NEXT:    slwi r5, r5, 8
+; CHECK-P8-NEXT:    or r4, r4, r5
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    or r4, r4, r5
+; CHECK-P8-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P8-NEXT:    mtfprd f0, r4
+; CHECK-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs7, 0(r3)
-; CHECK-P9-NEXT:    lxv vs6, 16(r3)
-; CHECK-P9-NEXT:    lxv vs0, 112(r3)
-; CHECK-P9-NEXT:    lxv vs1, 96(r3)
+; CHECK-P9-NEXT:    lxv vs7, 48(r3)
+; CHECK-P9-NEXT:    lxv vs6, 32(r3)
+; CHECK-P9-NEXT:    lxv vs0, 64(r3)
+; CHECK-P9-NEXT:    lxv vs1, 80(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f8, f7
 ; CHECK-P9-NEXT:    xxswapd vs7, vs7
-; CHECK-P9-NEXT:    lxv vs2, 80(r3)
-; CHECK-P9-NEXT:    lxv vs3, 64(r3)
-; CHECK-P9-NEXT:    lxv vs4, 48(r3)
-; CHECK-P9-NEXT:    lxv vs5, 32(r3)
+; CHECK-P9-NEXT:    lxv vs2, 96(r3)
+; CHECK-P9-NEXT:    lxv vs3, 112(r3)
+; CHECK-P9-NEXT:    lxv vs4, 0(r3)
+; CHECK-P9-NEXT:    lxv vs5, 16(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    mffprwz r3, f8
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f7
+; CHECK-P9-NEXT:    slwi r3, r3, 24
+; CHECK-P9-NEXT:    mffprwz r4, f7
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f6
 ; CHECK-P9-NEXT:    xxswapd vs6, vs6
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    slwi r4, r4, 16
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    vmrghb v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r3, f7
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f6
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f7
+; CHECK-P9-NEXT:    slwi r4, r4, 8
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f6
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f5
 ; CHECK-P9-NEXT:    xxswapd vs5, vs5
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    or r3, r3, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f6
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f5
+; CHECK-P9-NEXT:    mffprwz r4, f6
+; CHECK-P9-NEXT:    slwi r4, r4, 24
+; CHECK-P9-NEXT:    mffprwz r5, f5
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f4
 ; CHECK-P9-NEXT:    xxswapd vs4, vs4
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    slwi r5, r5, 16
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f5
-; CHECK-P9-NEXT:    xscvdpsxws f5, f3
+; CHECK-P9-NEXT:    or r4, r4, r5
+; CHECK-P9-NEXT:    mffprwz r5, f5
+; CHECK-P9-NEXT:    slwi r5, r5, 8
+; CHECK-P9-NEXT:    or r4, r4, r5
+; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    or r4, r4, r5
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    mffprwz r3, f5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    xxmrglw vs4, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r3
-; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    slwi r3, r3, 24
+; CHECK-P9-NEXT:    mffprwz r5, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    slwi r5, r5, 16
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghb v2, v2, v3
-; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    or r3, r3, r5
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    slwi r5, r5, 8
+; CHECK-P9-NEXT:    or r3, r3, r5
+; CHECK-P9-NEXT:    mffprwz r5, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    or r3, r3, r5
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    slwi r5, r5, 24
+; CHECK-P9-NEXT:    mffprwz r6, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    slwi r6, r6, 16
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs4
+; CHECK-P9-NEXT:    or r5, r5, r6
+; CHECK-P9-NEXT:    mffprwz r6, f1
+; CHECK-P9-NEXT:    slwi r6, r6, 8
+; CHECK-P9-NEXT:    or r5, r5, r6
+; CHECK-P9-NEXT:    mffprwz r6, f0
+; CHECK-P9-NEXT:    or r5, r5, r6
+; CHECK-P9-NEXT:    rldimi r5, r3, 32, 0
+; CHECK-P9-NEXT:    mtvsrdd v2, r5, r4
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs7, 112(r3)
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f9, f7
+; CHECK-BE-NEXT:    lxv vs7, 64(r3)
+; CHECK-BE-NEXT:    lxv vs6, 80(r3)
+; CHECK-BE-NEXT:    lxv vs0, 48(r3)
+; CHECK-BE-NEXT:    lxv vs1, 32(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f8, f7
 ; CHECK-BE-NEXT:    xxswapd vs7, vs7
-; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    lxv vs4, 64(r3)
-; CHECK-BE-NEXT:    lxv vs5, 80(r3)
-; CHECK-BE-NEXT:    lxv vs6, 96(r3)
-; CHECK-BE-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs8, 0(r3)
+; CHECK-BE-NEXT:    lxv vs2, 16(r3)
+; CHECK-BE-NEXT:    lxv vs3, 0(r3)
+; CHECK-BE-NEXT:    lxv vs4, 112(r3)
+; CHECK-BE-NEXT:    lxv vs5, 96(r3)
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mffprwz r3, f9
-; CHECK-BE-NEXT:    mtfprwz f9, r3
-; CHECK-BE-NEXT:    mffprwz r3, f7
+; CHECK-BE-NEXT:    mffprwz r3, f8
+; CHECK-BE-NEXT:    slwi r3, r3, 24
+; CHECK-BE-NEXT:    mffprwz r4, f7
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f6
 ; CHECK-BE-NEXT:    xxswapd vs6, vs6
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    slwi r4, r4, 16
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xxperm v2, vs9, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f7
-; CHECK-BE-NEXT:    mtfprwz f7, r3
-; CHECK-BE-NEXT:    mffprwz r3, f6
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f7
+; CHECK-BE-NEXT:    slwi r4, r4, 8
+; CHECK-BE-NEXT:    or r3, r3, r4
+; CHECK-BE-NEXT:    mffprwz r4, f6
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f5
 ; CHECK-BE-NEXT:    xxswapd vs5, vs5
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    or r3, r3, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xxperm v3, vs7, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f6
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mtfprwz f6, r3
-; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    mffprwz r4, f6
+; CHECK-BE-NEXT:    slwi r4, r4, 24
+; CHECK-BE-NEXT:    mffprwz r5, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    slwi r5, r5, 16
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xxperm v3, vs6, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs5, vs8
-; CHECK-BE-NEXT:    xscvdpsxws f5, f3
+; CHECK-BE-NEXT:    or r4, r4, r5
+; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    slwi r5, r5, 8
+; CHECK-BE-NEXT:    or r4, r4, r5
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    or r4, r4, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghw vs4, v3, v2
-; CHECK-BE-NEXT:    mffprwz r3, f5
-; CHECK-BE-NEXT:    mtfprwz f5, r3
-; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    rldimi r4, r3, 32, 0
+; CHECK-BE-NEXT:    mffprwz r3, f4
+; CHECK-BE-NEXT:    slwi r3, r3, 24
+; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-NEXT:    slwi r5, r5, 16
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xxperm v2, vs5, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtfprwz f3, r3
-; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    or r3, r3, r5
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    slwi r5, r5, 8
+; CHECK-BE-NEXT:    or r3, r3, r5
+; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    or r3, r3, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xxperm v3, vs3, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mtfprwz f2, r3
-; CHECK-BE-NEXT:    mffprwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    slwi r5, r5, 24
+; CHECK-BE-NEXT:    mffprwz r6, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    slwi r6, r6, 16
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xxperm v3, vs2, vs8
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtfprwz f1, r3
-; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xxperm v4, vs1, vs8
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
-; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs4
+; CHECK-BE-NEXT:    or r5, r5, r6
+; CHECK-BE-NEXT:    mffprwz r6, f1
+; CHECK-BE-NEXT:    slwi r6, r6, 8
+; CHECK-BE-NEXT:    or r5, r5, r6
+; CHECK-BE-NEXT:    mffprwz r6, f0
+; CHECK-BE-NEXT:    or r5, r5, r6
+; CHECK-BE-NEXT:    rldimi r5, r3, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd v2, r5, r4
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, ptr %0, align 128
diff --git a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
index be1dc57bbf1ff22..4b82d9ebb4c1342 100644
--- a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
@@ -288,91 +288,80 @@ entry:
 define <8 x i16> @testInvalidExtend(<16 x i8> %a) {
 ; CHECK-LE-LABEL: testInvalidExtend:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    li 3, 0
 ; CHECK-LE-NEXT:    li 4, 2
-; CHECK-LE-NEXT:    li 5, 4
 ; CHECK-LE-NEXT:    li 6, 6
-; CHECK-LE-NEXT:    vextubrx 3, 3, 2
+; CHECK-LE-NEXT:    li 3, 0
+; CHECK-LE-NEXT:    li 5, 4
 ; CHECK-LE-NEXT:    vextubrx 4, 4, 2
-; CHECK-LE-NEXT:    vextubrx 5, 5, 2
 ; CHECK-LE-NEXT:    vextubrx 6, 6, 2
-; CHECK-LE-NEXT:    li 7, 8
+; CHECK-LE-NEXT:    vextubrx 3, 3, 2
+; CHECK-LE-NEXT:    vextubrx 5, 5, 2
 ; CHECK-LE-NEXT:    li 8, 10
-; CHECK-LE-NEXT:    li 9, 12
 ; CHECK-LE-NEXT:    li 10, 14
-; CHECK-LE-NEXT:    extsb 3, 3
+; CHECK-LE-NEXT:    li 7, 8
+; CHECK-LE-NEXT:    li 9, 12
 ; CHECK-LE-NEXT:    extsb 4, 4
-; CHECK-LE-NEXT:    extsb 5, 5
 ; CHECK-LE-NEXT:    extsb 6, 6
-; CHECK-LE-NEXT:    vextubrx 7, 7, 2
 ; CHECK-LE-NEXT:    vextubrx 8, 8, 2
-; CHECK-LE-NEXT:    extsb 7, 7
+; CHECK-LE-NEXT:    vextubrx 10, 10, 2
+; CHECK-LE-NEXT:    extsb 3, 3
+; CHECK-LE-NEXT:    extsb 5, 5
 ; CHECK-LE-NEXT:    extsb 8, 8
-; CHECK-LE-NEXT:    mtvsrd 35, 4
+; CHECK-LE-NEXT:    extsb 10, 10
+; CHECK-LE-NEXT:    slwi 6, 6, 16
+; CHECK-LE-NEXT:    slwi 4, 4, 16
+; CHECK-LE-NEXT:    vextubrx 7, 7, 2
 ; CHECK-LE-NEXT:    vextubrx 9, 9, 2
-; CHECK-LE-NEXT:    vextubrx 10, 10, 2
-; CHECK-LE-NEXT:    mtvsrd 34, 3
-; CHECK-LE-NEXT:    mtvsrd 36, 6
+; CHECK-LE-NEXT:    extsb 7, 7
 ; CHECK-LE-NEXT:    extsb 9, 9
-; CHECK-LE-NEXT:    extsb 10, 10
-; CHECK-LE-NEXT:    vmrghh 2, 3, 2
-; CHECK-LE-NEXT:    mtvsrd 35, 5
-; CHECK-LE-NEXT:    vmrghh 3, 4, 3
-; CHECK-LE-NEXT:    mtvsrd 36, 10
-; CHECK-LE-NEXT:    xxmrglw 0, 35, 34
-; CHECK-LE-NEXT:    mtvsrd 34, 7
-; CHECK-LE-NEXT:    mtvsrd 35, 8
-; CHECK-LE-NEXT:    vmrghh 2, 3, 2
-; CHECK-LE-NEXT:    mtvsrd 35, 9
-; CHECK-LE-NEXT:    vmrghh 3, 4, 3
-; CHECK-LE-NEXT:    xxmrglw 1, 35, 34
-; CHECK-LE-NEXT:    xxmrgld 34, 1, 0
+; CHECK-LE-NEXT:    or 5, 6, 5
+; CHECK-LE-NEXT:    or 3, 4, 3
+; CHECK-LE-NEXT:    slwi 4, 10, 16
+; CHECK-LE-NEXT:    rldimi 3, 5, 32, 0
+; CHECK-LE-NEXT:    slwi 5, 8, 16
+; CHECK-LE-NEXT:    or 4, 4, 9
+; CHECK-LE-NEXT:    or 5, 5, 7
+; CHECK-LE-NEXT:    rldimi 5, 4, 32, 0
+; CHECK-LE-NEXT:    mtvsrdd 34, 5, 3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testInvalidExtend:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    li 9, 12
-; CHECK-BE-NEXT:    li 10, 14
+; CHECK-BE-NEXT:    li 3, 0
 ; CHECK-BE-NEXT:    li 7, 8
-; CHECK-BE-NEXT:    li 8, 10
-; CHECK-BE-NEXT:    vextublx 9, 9, 2
-; CHECK-BE-NEXT:    vextublx 10, 10, 2
-; CHECK-BE-NEXT:    vextublx 7, 7, 2
-; CHECK-BE-NEXT:    vextublx 8, 8, 2
+; CHECK-BE-NEXT:    li 4, 2
 ; CHECK-BE-NEXT:    li 5, 4
+; CHECK-BE-NEXT:    vextublx 3, 3, 2
+; CHECK-BE-NEXT:    vextublx 7, 7, 2
+; CHECK-BE-NEXT:    li 8, 10
+; CHECK-BE-NEXT:    li 9, 12
+; CHECK-BE-NEXT:    vextublx 4, 4, 2
 ; CHECK-BE-NEXT:    li 6, 6
-; CHECK-BE-NEXT:    li 3, 0
-; CHECK-BE-NEXT:    li 4, 2
-; CHECK-BE-NEXT:    extsb 9, 9
-; CHECK-BE-NEXT:    extsb 10, 10
-; CHECK-BE-NEXT:    extsb 7, 7
-; CHECK-BE-NEXT:    extsb 8, 8
+; CHECK-BE-NEXT:    li 10, 14
+; CHECK-BE-NEXT:    extsb 3, 3
 ; CHECK-BE-NEXT:    vextublx 5, 5, 2
+; CHECK-BE-NEXT:    vextublx 8, 8, 2
+; CHECK-BE-NEXT:    vextublx 9, 9, 2
+; CHECK-BE-NEXT:    extsb 7, 7
 ; CHECK-BE-NEXT:    vextublx 6, 6, 2
+; CHECK-BE-NEXT:    extsb 4, 4
 ; CHECK-BE-NEXT:    extsb 5, 5
+; CHECK-BE-NEXT:    extsb 8, 8
+; CHECK-BE-NEXT:    extsb 9, 9
 ; CHECK-BE-NEXT:    extsb 6, 6
-; CHECK-BE-NEXT:    mtfprwz 1, 9
-; CHECK-BE-NEXT:    addis 9, 2, .LCPI11_0 at toc@ha
-; CHECK-BE-NEXT:    mtfprwz 0, 10
-; CHECK-BE-NEXT:    mtfprwz 3, 7
-; CHECK-BE-NEXT:    vextublx 3, 3, 2
-; CHECK-BE-NEXT:    extsb 3, 3
-; CHECK-BE-NEXT:    mtfprwz 4, 3
-; CHECK-BE-NEXT:    addi 9, 9, .LCPI11_0 at toc@l
-; CHECK-BE-NEXT:    vextublx 4, 4, 2
-; CHECK-BE-NEXT:    extsb 4, 4
-; CHECK-BE-NEXT:    lxv 2, 0(9)
-; CHECK-BE-NEXT:    xxperm 0, 1, 2
-; CHECK-BE-NEXT:    mtfprwz 1, 8
-; CHECK-BE-NEXT:    xxperm 1, 3, 2
-; CHECK-BE-NEXT:    mtfprwz 3, 5
-; CHECK-BE-NEXT:    xxmrghw 0, 1, 0
-; CHECK-BE-NEXT:    mtfprwz 1, 6
-; CHECK-BE-NEXT:    xxperm 1, 3, 2
-; CHECK-BE-NEXT:    mtfprwz 3, 4
-; CHECK-BE-NEXT:    xxperm 3, 4, 2
-; CHECK-BE-NEXT:    xxmrghw 1, 3, 1
-; CHECK-BE-NEXT:    xxmrghd 34, 1, 0
+; CHECK-BE-NEXT:    slwi 7, 7, 16
+; CHECK-BE-NEXT:    vextublx 10, 10, 2
+; CHECK-BE-NEXT:    slwi 3, 3, 16
+; CHECK-BE-NEXT:    extsb 10, 10
+; CHECK-BE-NEXT:    or 7, 7, 8
+; CHECK-BE-NEXT:    slwi 8, 9, 16
+; CHECK-BE-NEXT:    or 3, 3, 4
+; CHECK-BE-NEXT:    slwi 4, 5, 16
+; CHECK-BE-NEXT:    or 8, 8, 10
+; CHECK-BE-NEXT:    or 4, 4, 6
+; CHECK-BE-NEXT:    rldimi 8, 7, 32, 0
+; CHECK-BE-NEXT:    rldimi 4, 3, 32, 0
+; CHECK-BE-NEXT:    mtvsrdd 34, 4, 8
 ; CHECK-BE-NEXT:    blr
 entry:
 



More information about the llvm-commits mailing list