[llvm] 335e8bf - [PowerPC] emit VSX instructions instead of VMX instructions for vector loads and stores

Quinn Pham via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 15 10:06:10 PDT 2022


Author: Quinn Pham
Date: 2022-06-15T12:06:04-05:00
New Revision: 335e8bf1008fff5723ba36f27350eb6d5e7f187c

URL: https://github.com/llvm/llvm-project/commit/335e8bf1008fff5723ba36f27350eb6d5e7f187c
DIFF: https://github.com/llvm/llvm-project/commit/335e8bf1008fff5723ba36f27350eb6d5e7f187c.diff

LOG: [PowerPC] emit VSX instructions instead of VMX instructions for vector loads and stores

This patch changes the PowerPC backend to generate VSX load/store instructions
for all vector loads/stores on Power8 and earlier  (LE) instead of VMX
load/store instructions. The reason for this change is because VMX instructions
require the vector to be 16-byte aligned. So, a vector load/store will fail with
VMX instructions if the vector is misaligned. Also, `gcc` generates VSX
instructions in this situation which allow for unaligned access but require a
swap instruction after loading/before storing. This is not an issue for BE
because we already emit VSX instructions since no swap is required. And this is
not an issue on Power9 and up since we have access to `lxv[x]`/`stxv[x]` which
allow for unaligned access and do not require swaps.

This patch also delays the VSX load/store for LE combines until after
LegalizeOps to prioritize other load/store combines.

Reviewed By: #powerpc, stefanp

Differential Revision: https://reviews.llvm.org/D127309

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
    llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
    llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
    llvm/test/CodeGen/PowerPC/build-vector-tests.ll
    llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
    llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
    llvm/test/CodeGen/PowerPC/extract-and-store.ll
    llvm/test/CodeGen/PowerPC/f128-aggregates.ll
    llvm/test/CodeGen/PowerPC/f128-arith.ll
    llvm/test/CodeGen/PowerPC/f128-compare.ll
    llvm/test/CodeGen/PowerPC/f128-conv.ll
    llvm/test/CodeGen/PowerPC/f128-fma.ll
    llvm/test/CodeGen/PowerPC/f128-passByValue.ll
    llvm/test/CodeGen/PowerPC/f128-rounding.ll
    llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
    llvm/test/CodeGen/PowerPC/float-logic-ops.ll
    llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
    llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
    llvm/test/CodeGen/PowerPC/load-and-splat.ll
    llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
    llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
    llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll
    llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
    llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll
    llvm/test/CodeGen/PowerPC/pr25080.ll
    llvm/test/CodeGen/PowerPC/recipest.ll
    llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
    llvm/test/CodeGen/PowerPC/sat-add.ll
    llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
    llvm/test/CodeGen/PowerPC/signbit-shift.ll
    llvm/test/CodeGen/PowerPC/store_fptoi.ll
    llvm/test/CodeGen/PowerPC/test-vector-insert.ll
    llvm/test/CodeGen/PowerPC/toc-float.ll
    llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
    llvm/test/CodeGen/PowerPC/vavg.ll
    llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
    llvm/test/CodeGen/PowerPC/vec-itofp.ll
    llvm/test/CodeGen/PowerPC/vec-trunc.ll
    llvm/test/CodeGen/PowerPC/vec-trunc2.ll
    llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll
    llvm/test/CodeGen/PowerPC/vec_constants.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
    llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
    llvm/test/CodeGen/PowerPC/vector-ldst.ll
    llvm/test/CodeGen/PowerPC/vector-rotates.ll
    llvm/test/CodeGen/PowerPC/vselect-constants.ll
    llvm/test/CodeGen/PowerPC/vsx-ldst.ll
    llvm/test/CodeGen/PowerPC/vsx.ll
    llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
    llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 86258a6ad2eeb..6d451f8499294 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14552,6 +14552,11 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
 // builtins) into loads with swaps.
 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
+  // Delay VSX load for LE combine until after LegalizeOps to prioritize other
+  // load combines.
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
   SDValue Chain;
@@ -14586,13 +14591,6 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
 
   MVT VecTy = N->getValueType(0).getSimpleVT();
 
-  // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
-  // aligned and the type is a vector with elements up to 4 bytes
-  if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
-      VecTy.getScalarSizeInBits() <= 32) {
-    return SDValue();
-  }
-
   SDValue LoadOps[] = { Chain, Base };
   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
                                          DAG.getVTList(MVT::v2f64, MVT::Other),
@@ -14620,6 +14618,11 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
 // builtins) into stores with swaps.
 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
                                                DAGCombinerInfo &DCI) const {
+  // Delay VSX store for LE combine until after LegalizeOps to prioritize other
+  // store combines.
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
   SDValue Chain;
@@ -14657,13 +14660,6 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
   SDValue Src = N->getOperand(SrcOpnd);
   MVT VecTy = Src.getValueType().getSimpleVT();
 
-  // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
-  // aligned and the type is a vector with elements up to 4 bytes
-  if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
-      VecTy.getScalarSizeInBits() <= 32) {
-    return SDValue();
-  }
-
   // All stores are done as v2f64 and possible bit cast.
   if (VecTy != MVT::v2f64) {
     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);

diff  --git a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
index a96916807ad42..6d64845e81d55 100644
--- a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
+++ b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
@@ -25,12 +25,12 @@ define dso_local signext i32 @test_32byte_vector() nounwind {
 ; CHECK-LE-NEXT:    addis r4, r2, .LCPI0_1 at toc@ha
 ; CHECK-LE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
 ; CHECK-LE-NEXT:    addi r4, r4, .LCPI0_1 at toc@l
-; CHECK-LE-NEXT:    lvx v2, 0, r3
-; CHECK-LE-NEXT:    lvx v3, 0, r4
+; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-LE-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-LE-NEXT:    addi r4, r1, 48
 ; CHECK-LE-NEXT:    addi r3, r1, 32
-; CHECK-LE-NEXT:    stvx v2, 0, r4
-; CHECK-LE-NEXT:    stvx v3, 0, r3
+; CHECK-LE-NEXT:    stxvd2x vs0, 0, r4
+; CHECK-LE-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-LE-NEXT:    bl test
 ; CHECK-LE-NEXT:    nop
 ; CHECK-LE-NEXT:    lwa r3, 32(r1)
@@ -99,9 +99,9 @@ define dso_local signext i32 @test_32byte_aligned_vector() nounwind {
 ; CHECK-LE-NEXT:    stdux r1, r1, r0
 ; CHECK-LE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-LE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-LE-NEXT:    lvx v2, 0, r3
+; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    addi r3, r1, 32
-; CHECK-LE-NEXT:    stvx v2, 0, r3
+; CHECK-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    bl test1
 ; CHECK-LE-NEXT:    nop
 ; CHECK-LE-NEXT:    lwa r3, 32(r1)
@@ -158,7 +158,69 @@ define dso_local void @test_Array() nounwind {
 ; CHECK-OPT-NEXT: entry:
 ; CHECK-OPT-NEXT: %Arr2 = alloca [64 x i16], align 2
 ; CHECK-OPT: store <16 x i16> [[TMP0:%.*]], <16 x i16>* [[TMP0:%.*]], align 2
-
+; CHECK-LE-LABEL: test_Array:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mflr r0
+; CHECK-LE-NEXT:    std r0, 16(r1)
+; CHECK-LE-NEXT:    stdu r1, -176(r1)
+; CHECK-LE-NEXT:    addis r4, r2, Arr1 at toc@ha
+; CHECK-LE-NEXT:    li r3, 0
+; CHECK-LE-NEXT:    li r6, 65
+; CHECK-LE-NEXT:    addi r5, r1, 46
+; CHECK-LE-NEXT:    addi r4, r4, Arr1 at toc@l
+; CHECK-LE-NEXT:    stw r3, 44(r1)
+; CHECK-LE-NEXT:    addi r4, r4, -1
+; CHECK-LE-NEXT:    mtctr r6
+; CHECK-LE-NEXT:    bdz .LBB2_2
+; CHECK-LE-NEXT:    .p2align 5
+; CHECK-LE-NEXT:  .LBB2_1: # %for.body
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    lbz r6, 1(r4)
+; CHECK-LE-NEXT:    addi r7, r5, 2
+; CHECK-LE-NEXT:    addi r4, r4, 1
+; CHECK-LE-NEXT:    addi r3, r3, 1
+; CHECK-LE-NEXT:    sth r6, 2(r5)
+; CHECK-LE-NEXT:    mr r5, r7
+; CHECK-LE-NEXT:    bdnz .LBB2_1
+; CHECK-LE-NEXT:  .LBB2_2: # %for.cond.cleanup
+; CHECK-LE-NEXT:    addi r3, r1, 48
+; CHECK-LE-NEXT:    bl test_arr
+; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:    addi r1, r1, 176
+; CHECK-LE-NEXT:    ld r0, 16(r1)
+; CHECK-LE-NEXT:    mtlr r0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_Array:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mflr r0
+; CHECK-BE-NEXT:    std r0, 16(r1)
+; CHECK-BE-NEXT:    stdu r1, -256(r1)
+; CHECK-BE-NEXT:    addis r5, r2, Arr1 at toc@ha
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    addi r5, r5, Arr1 at toc@l
+; CHECK-BE-NEXT:    addi r4, r1, 126
+; CHECK-BE-NEXT:    li r6, 65
+; CHECK-BE-NEXT:    stw r3, 124(r1)
+; CHECK-BE-NEXT:    addi r5, r5, -1
+; CHECK-BE-NEXT:    mtctr r6
+; CHECK-BE-NEXT:    bdz .LBB2_2
+; CHECK-BE-NEXT:  .LBB2_1: # %for.body
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    lbz r6, 1(r5)
+; CHECK-BE-NEXT:    addi r5, r5, 1
+; CHECK-BE-NEXT:    addi r3, r3, 1
+; CHECK-BE-NEXT:    sth r6, 2(r4)
+; CHECK-BE-NEXT:    addi r4, r4, 2
+; CHECK-BE-NEXT:    bdnz .LBB2_1
+; CHECK-BE-NEXT:  .LBB2_2: # %for.cond.cleanup
+; CHECK-BE-NEXT:    addi r3, r1, 128
+; CHECK-BE-NEXT:    bl test_arr
+; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:    addi r1, r1, 256
+; CHECK-BE-NEXT:    ld r0, 16(r1)
+; CHECK-BE-NEXT:    mtlr r0
+; CHECK-BE-NEXT:    blr
 entry:
   %Arr2 = alloca [64 x i16], align 2
   %i = alloca i32, align 4

diff  --git a/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
index 45ca1fbe9e292..6bb952f1df99b 100644
--- a/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
+++ b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
@@ -13,8 +13,9 @@ define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) {
 ; CHECK-P8:  # %bb.0: # %entry
 ; CHECK-P8:    lfiwzx f0, 0, r3
 ; CHECK-P8:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8:    xxspltw v2, vs0, 1
-; CHECK-P8:    stvx v2, 0, r4
+; CHECK-P8:    xxspltw vs0, vs0, 1
+; CHECK-P8:    xxswapd vs0, vs0
+; CHECK-P8;    stxvd2x vs0, 0, r4
 ; CHECK-P8:    lis r4, 1024
 ; CHECK-P8:    lfiwax f0, 0, r3
 ; CHECK-P8:    addis r3, r2, .LC1 at toc@ha

diff  --git a/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll b/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
index 51d6ba429f60d..caaef14b52527 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
@@ -47,11 +47,13 @@ define void @test_aix_splatimm(i32 %arg, i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    mullw 4, 4, 5
 ; CHECK-NEXT:    vsplth 2, 2, 3
-; CHECK-NEXT:    stvx 2, 0, 3
+; CHECK-NEXT:    xxswapd 0, 34
 ; CHECK-NEXT:    neg 4, 4
 ; CHECK-NEXT:    mtvsrd 35, 4
+; CHECK-NEXT:    stxvd2x 0, 0, 3
 ; CHECK-NEXT:    vsplth 3, 3, 3
-; CHECK-NEXT:    stvx 3, 0, 3
+; CHECK-NEXT:    xxswapd 1, 35
+; CHECK-NEXT:    stxvd2x 1, 0, 3
 bb:
   br i1 undef, label %bb22, label %bb3
 

diff  --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index edf42bf4a9b08..90364c65da734 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -879,7 +879,8 @@ define <4 x i32> @fromDiffConstsi() {
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; P8LE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19>
@@ -947,12 +948,13 @@ define <4 x i32> @fromDiffMemConsDi(i32* nocapture readonly %arr) {
 ;
 ; P8LE-LABEL: fromDiffMemConsDi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    lxvd2x vs0, 0, r3
 ; P8LE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; P8LE-NEXT:    addi r3, r4, .LCPI7_0 at toc@l
-; P8LE-NEXT:    lvx v2, 0, r3
-; P8LE-NEXT:    xxswapd v3, vs0
-; P8LE-NEXT:    vperm v2, v3, v3, v2
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
@@ -1059,10 +1061,11 @@ define <4 x i32> @fromDiffMemVarDi(i32* nocapture readonly %arr, i32 signext %el
 ; P8LE-NEXT:    sldi r4, r4, 2
 ; P8LE-NEXT:    addis r5, r2, .LCPI9_0 at toc@ha
 ; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    addi r4, r5, .LCPI9_0 at toc@l
 ; P8LE-NEXT:    addi r3, r3, -12
+; P8LE-NEXT:    lxvd2x vs1, 0, r4
 ; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    addi r3, r5, .LCPI9_0 at toc@l
-; P8LE-NEXT:    lvx v3, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
@@ -1404,7 +1407,8 @@ define <4 x i32> @fromDiffConstsConvftoi() {
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r3, r2, .LCPI16_0 at toc@ha
 ; P8LE-NEXT:    addi r3, r3, .LCPI16_0 at toc@l
-; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
@@ -1475,12 +1479,13 @@ define <4 x i32> @fromDiffMemConsDConvftoi(float* nocapture readonly %ptr) {
 ;
 ; P8LE-LABEL: fromDiffMemConsDConvftoi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    lxvd2x vs0, 0, r3
 ; P8LE-NEXT:    addis r4, r2, .LCPI18_0 at toc@ha
-; P8LE-NEXT:    addi r3, r4, .LCPI18_0 at toc@l
-; P8LE-NEXT:    lvx v2, 0, r3
-; P8LE-NEXT:    xxswapd v3, vs0
-; P8LE-NEXT:    vperm v2, v3, v3, v2
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addi r4, r4, .LCPI18_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    xvcvspsxws v2, v2
 ; P8LE-NEXT:    blr
 entry:
@@ -1858,7 +1863,8 @@ define <4 x i32> @fromDiffConstsConvdtoi() {
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r3, r2, .LCPI25_0 at toc@ha
 ; P8LE-NEXT:    addi r3, r3, .LCPI25_0 at toc@l
-; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
@@ -2398,7 +2404,8 @@ define <4 x i32> @fromDiffConstsui() {
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r3, r2, .LCPI37_0 at toc@ha
 ; P8LE-NEXT:    addi r3, r3, .LCPI37_0 at toc@l
-; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19>
@@ -2466,12 +2473,13 @@ define <4 x i32> @fromDiffMemConsDui(i32* nocapture readonly %arr) {
 ;
 ; P8LE-LABEL: fromDiffMemConsDui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    lxvd2x vs0, 0, r3
 ; P8LE-NEXT:    addis r4, r2, .LCPI39_0 at toc@ha
-; P8LE-NEXT:    addi r3, r4, .LCPI39_0 at toc@l
-; P8LE-NEXT:    lvx v2, 0, r3
-; P8LE-NEXT:    xxswapd v3, vs0
-; P8LE-NEXT:    vperm v2, v3, v3, v2
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addi r4, r4, .LCPI39_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
@@ -2578,10 +2586,11 @@ define <4 x i32> @fromDiffMemVarDui(i32* nocapture readonly %arr, i32 signext %e
 ; P8LE-NEXT:    sldi r4, r4, 2
 ; P8LE-NEXT:    addis r5, r2, .LCPI41_0 at toc@ha
 ; P8LE-NEXT:    add r3, r3, r4
+; P8LE-NEXT:    addi r4, r5, .LCPI41_0 at toc@l
 ; P8LE-NEXT:    addi r3, r3, -12
+; P8LE-NEXT:    lxvd2x vs1, 0, r4
 ; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    addi r3, r5, .LCPI41_0 at toc@l
-; P8LE-NEXT:    lvx v3, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
@@ -2923,7 +2932,8 @@ define <4 x i32> @fromDiffConstsConvftoui() {
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r3, r2, .LCPI48_0 at toc@ha
 ; P8LE-NEXT:    addi r3, r3, .LCPI48_0 at toc@l
-; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
@@ -2994,12 +3004,13 @@ define <4 x i32> @fromDiffMemConsDConvftoui(float* nocapture readonly %ptr) {
 ;
 ; P8LE-LABEL: fromDiffMemConsDConvftoui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    lxvd2x vs0, 0, r3
 ; P8LE-NEXT:    addis r4, r2, .LCPI50_0 at toc@ha
-; P8LE-NEXT:    addi r3, r4, .LCPI50_0 at toc@l
-; P8LE-NEXT:    lvx v2, 0, r3
-; P8LE-NEXT:    xxswapd v3, vs0
-; P8LE-NEXT:    vperm v2, v3, v3, v2
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    addi r4, r4, .LCPI50_0 at toc@l
+; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    xvcvspuxws v2, v2
 ; P8LE-NEXT:    blr
 entry:
@@ -3378,7 +3389,8 @@ define <4 x i32> @fromDiffConstsConvdtoui() {
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r3, r2, .LCPI57_0 at toc@ha
 ; P8LE-NEXT:    addi r3, r3, .LCPI57_0 at toc@l
-; P8LE-NEXT:    lvx v2, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    blr
 entry:
   ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 911aa7d992327..db41105676bfe 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -83,7 +83,8 @@ define dso_local <16 x i8> @testmrghb2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P7:       # %bb.0: # %entry
 ; CHECK-P7-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-P7-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-P7-NEXT:    lvx v4, 0, r3
+; CHECK-P7-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT:    xxswapd v4, vs0
 ; CHECK-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-P7-NEXT:    blr
 ;
@@ -157,7 +158,8 @@ define dso_local <16 x i8> @testmrghh2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P7:       # %bb.0: # %entry
 ; CHECK-P7-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-P7-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-P7-NEXT:    lvx v4, 0, r3
+; CHECK-P7-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT:    xxswapd v4, vs0
 ; CHECK-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-P7-NEXT:    blr
 ;
@@ -231,7 +233,8 @@ define dso_local <16 x i8> @testmrglb2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P7:       # %bb.0: # %entry
 ; CHECK-P7-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-P7-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-P7-NEXT:    lvx v4, 0, r3
+; CHECK-P7-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT:    xxswapd v4, vs0
 ; CHECK-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-P7-NEXT:    blr
 ;
@@ -305,7 +308,8 @@ define dso_local <16 x i8> @testmrglh2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P7:       # %bb.0: # %entry
 ; CHECK-P7-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
 ; CHECK-P7-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
-; CHECK-P7-NEXT:    lvx v4, 0, r3
+; CHECK-P7-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT:    xxswapd v4, vs0
 ; CHECK-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-P7-NEXT:    blr
 ;
@@ -379,7 +383,8 @@ define dso_local <16 x i8> @testmrghw2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P7:       # %bb.0: # %entry
 ; CHECK-P7-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
 ; CHECK-P7-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
-; CHECK-P7-NEXT:    lvx v4, 0, r3
+; CHECK-P7-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT:    xxswapd v4, vs0
 ; CHECK-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-P7-NEXT:    blr
 ;
@@ -453,7 +458,8 @@ define dso_local <16 x i8> @testmrglw2(<16 x i8> %a, <16 x i8> %b) local_unnamed
 ; CHECK-P7:       # %bb.0: # %entry
 ; CHECK-P7-NEXT:    addis r3, r2, .LCPI11_0 at toc@ha
 ; CHECK-P7-NEXT:    addi r3, r3, .LCPI11_0 at toc@l
-; CHECK-P7-NEXT:    lvx v4, 0, r3
+; CHECK-P7-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P7-NEXT:    xxswapd v4, vs0
 ; CHECK-P7-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-P7-NEXT:    blr
 ;
@@ -511,10 +517,11 @@ define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_un
 ; CHECK-P7-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P7-NEXT:    std r3, -16(r1)
 ; CHECK-P7-NEXT:    addis r3, r2, .LCPI12_0 at toc@ha
-; CHECK-P7-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P7-NEXT:    addi r3, r3, .LCPI12_0 at toc@l
-; CHECK-P7-NEXT:    lvx v3, 0, r3
+; CHECK-P7-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P7-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-P7-NEXT:    xxswapd v2, vs0
+; CHECK-P7-NEXT:    xxswapd v3, vs1
 ; CHECK-P7-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-P7-NEXT:    blr
 ;
@@ -691,8 +698,10 @@ define dso_local <16 x i8> @no_crash_bitcast(i32 %a) {
 ; CHECK-P7-NEXT:    stw r3, -16(r1)
 ; CHECK-P7-NEXT:    addi r3, r1, -16
 ; CHECK-P7-NEXT:    addi r4, r4, .LCPI14_0 at toc@l
-; CHECK-P7-NEXT:    lvx v3, 0, r3
-; CHECK-P7-NEXT:    lvx v2, 0, r4
+; CHECK-P7-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P7-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P7-NEXT:    xxswapd v3, vs1
+; CHECK-P7-NEXT:    xxswapd v2, vs0
 ; CHECK-P7-NEXT:    vperm v2, v3, v3, v2
 ; CHECK-P7-NEXT:    blr
 ;
@@ -722,7 +731,8 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI15_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI15_0 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vmrgow v2, v3, v2
 ; CHECK-P8-NEXT:    blr
 ;
@@ -761,9 +771,11 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_
 ; CHECK-P7-NEXT:    addis r3, r2, .LCPI15_0 at toc@ha
 ; CHECK-P7-NEXT:    addis r4, r2, .LCPI15_1 at toc@ha
 ; CHECK-P7-NEXT:    addi r3, r3, .LCPI15_0 at toc@l
-; CHECK-P7-NEXT:    lvx v3, 0, r3
+; CHECK-P7-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P7-NEXT:    addi r3, r4, .LCPI15_1 at toc@l
-; CHECK-P7-NEXT:    lvx v4, 0, r3
+; CHECK-P7-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P7-NEXT:    xxswapd v3, vs0
+; CHECK-P7-NEXT:    xxswapd v4, vs1
 ; CHECK-P7-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P7-NEXT:    blr
 ;
@@ -1096,8 +1108,8 @@ entry:
 define <2 x i64> @testSplati64_1(<2 x i64>* nocapture readonly %ptr) #0 {
 ; CHECK-P8-LABEL: testSplati64_1:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    xxspltd v2, vs0, 1
+; CHECK-P8-NEXT:    addi r3, r3, 8
+; CHECK-P8-NEXT:    lxvdsx v2, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testSplati64_1:
@@ -1128,8 +1140,8 @@ define <2 x i64> @testSplati64_1(<2 x i64>* nocapture readonly %ptr) #0 {
 ;
 ; CHECK-P7-LABEL: testSplati64_1:
 ; CHECK-P7:       # %bb.0: # %entry
-; CHECK-P7-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P7-NEXT:    xxspltd v2, vs0, 1
+; CHECK-P7-NEXT:    addi r3, r3, 8
+; CHECK-P7-NEXT:    lxvdsx v2, 0, r3
 ; CHECK-P7-NEXT:    blr
 ;
 ; P8-AIX-LABEL: testSplati64_1:
@@ -1149,7 +1161,8 @@ define dso_local void @testByteSplat() #0 {
 ; CHECK-P8-NEXT:    lbzx r3, 0, r3
 ; CHECK-P8-NEXT:    mtvsrwz v2, r3
 ; CHECK-P8-NEXT:    vspltb v2, v2, 7
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testByteSplat:
@@ -1182,7 +1195,8 @@ define dso_local void @testByteSplat() #0 {
 ; CHECK-P7-NEXT:    lvx v3, 0, r3
 ; CHECK-P7-NEXT:    vperm v2, v3, v3, v2
 ; CHECK-P7-NEXT:    vspltb v2, v2, 15
-; CHECK-P7-NEXT:    stvx v2, 0, r3
+; CHECK-P7-NEXT:    xxswapd vs0, v2
+; CHECK-P7-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P7-NEXT:    blr
 ;
 ; P8-AIX-LABEL: testByteSplat:

diff  --git a/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll b/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
index 153d0206a39c9..e2536d2e8e9ff 100644
--- a/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
+++ b/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
@@ -12,9 +12,11 @@ define <16 x i8> @test_vpermxorb() local_unnamed_addr {
 ; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI0_1 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx 2, 0, 3
-; CHECK-LE-P8-NEXT:    addi 3, 4, .LCPI0_1 at toc@l
-; CHECK-LE-P8-NEXT:    lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:    addi 4, 4, .LCPI0_1 at toc@l
+; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT:    lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT:    xxswapd 34, 0
+; CHECK-LE-P8-NEXT:    xxswapd 35, 1
 ; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -52,9 +54,11 @@ define <8 x i16> @test_vpermxorh() local_unnamed_addr {
 ; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI1_1 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx 2, 0, 3
-; CHECK-LE-P8-NEXT:    addi 3, 4, .LCPI1_1 at toc@l
-; CHECK-LE-P8-NEXT:    lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:    addi 4, 4, .LCPI1_1 at toc@l
+; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT:    lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT:    xxswapd 34, 0
+; CHECK-LE-P8-NEXT:    xxswapd 35, 1
 ; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -91,9 +95,11 @@ define <4 x i32> @test_vpermxorw() local_unnamed_addr {
 ; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI2_1 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx 2, 0, 3
-; CHECK-LE-P8-NEXT:    addi 3, 4, .LCPI2_1 at toc@l
-; CHECK-LE-P8-NEXT:    lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:    addi 4, 4, .LCPI2_1 at toc@l
+; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT:    lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT:    xxswapd 34, 0
+; CHECK-LE-P8-NEXT:    xxswapd 35, 1
 ; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -130,9 +136,11 @@ define <2 x i64> @test_vpermxord() local_unnamed_addr {
 ; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI3_1 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx 2, 0, 3
-; CHECK-LE-P8-NEXT:    addi 3, 4, .LCPI3_1 at toc@l
-; CHECK-LE-P8-NEXT:    lvx 3, 0, 3
+; CHECK-LE-P8-NEXT:    addi 4, 4, .LCPI3_1 at toc@l
+; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT:    lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT:    xxswapd 34, 0
+; CHECK-LE-P8-NEXT:    xxswapd 35, 1
 ; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
index ae680115abc65..cd8598599b86a 100644
--- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll
+++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
@@ -573,8 +573,9 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture
 ; CHECK-NEXT:    xxsldwi vs1, vs34, vs34, 1
 ; CHECK-NEXT:    li r4, 20
 ; CHECK-NEXT:    addi r3, r3, .LCPI16_0 at toc@l
-; CHECK-NEXT:    lvx v3, 0, r3
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-NEXT:    li r3, 16
+; CHECK-NEXT:    xxswapd vs35, vs0
 ; CHECK-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-NEXT:    xxswapd vs0, vs35
 ; CHECK-NEXT:    stxvd2x vs0, 0, r5

diff  --git a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
index bac79e2639c07..a1daeeb66d99a 100644
--- a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
@@ -31,7 +31,8 @@ define fp128 @testArray_01(fp128* nocapture readonly %sa) {
 ; CHECK-P8-LABEL: testArray_01:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addi r3, r3, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
 entry:
@@ -61,7 +62,8 @@ define fp128 @testArray_02() {
 ; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
 ; CHECK-P8-NEXT:    addi r3, r3, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
 entry:
@@ -148,11 +150,12 @@ define fp128 @testStruct_03(%struct.With9fp128params* byval(%struct.With9fp128pa
 ; CHECK-P8-NEXT:    std r4, 40(r1)
 ; CHECK-P8-NEXT:    std r5, 48(r1)
 ; CHECK-P8-NEXT:    std r6, 56(r1)
-; CHECK-P8-NEXT:    lvx v2, r12, r11
+; CHECK-P8-NEXT:    lxvd2x vs0, r12, r11
 ; CHECK-P8-NEXT:    std r7, 64(r1)
 ; CHECK-P8-NEXT:    std r8, 72(r1)
 ; CHECK-P8-NEXT:    std r9, 80(r1)
 ; CHECK-P8-NEXT:    std r10, 88(r1)
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
 entry:
@@ -288,7 +291,8 @@ define fp128 @testMixedAggregate([3 x i128] %a.coerce) {
 ; CHECK-P8-NEXT:    addi r3, r1, -16
 ; CHECK-P8-NEXT:    std r8, -8(r1)
 ; CHECK-P8-NEXT:    std r7, -16(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
 entry:
@@ -314,7 +318,8 @@ define fp128 @testMixedAggregate_02([4 x i128] %a.coerce) {
 ; CHECK-P8-NEXT:    addi r3, r1, -16
 ; CHECK-P8-NEXT:    std r6, -8(r1)
 ; CHECK-P8-NEXT:    std r5, -16(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
 entry:
@@ -364,7 +369,8 @@ define fp128 @testMixedAggregate_03([4 x i128] %sa.coerce) {
 ; CHECK-P8-NEXT:    addi r4, r1, 48
 ; CHECK-P8-NEXT:    std r6, 56(r1)
 ; CHECK-P8-NEXT:    std r5, 48(r1)
-; CHECK-P8-NEXT:    lvx v31, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2
@@ -438,11 +444,12 @@ define fp128 @testNestedAggregate(%struct.MixedC* byval(%struct.MixedC) nocaptur
 ; CHECK-P8-NEXT:    std r9, 80(r1)
 ; CHECK-P8-NEXT:    std r10, 88(r1)
 ; CHECK-P8-NEXT:    addi r7, r1, 32
-; CHECK-P8-NEXT:    lvx v2, r7, r11
+; CHECK-P8-NEXT:    lxvd2x vs0, r7, r11
 ; CHECK-P8-NEXT:    std r3, 32(r1)
 ; CHECK-P8-NEXT:    std r4, 40(r1)
 ; CHECK-P8-NEXT:    std r5, 48(r1)
 ; CHECK-P8-NEXT:    std r6, 56(r1)
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
 entry:
@@ -468,7 +475,8 @@ define fp128 @testUnion_01([1 x i128] %a.coerce) {
 ; CHECK-P8-NEXT:    addi r5, r1, -16
 ; CHECK-P8-NEXT:    std r4, -8(r1)
 ; CHECK-P8-NEXT:    std r3, -16(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
 entry:
@@ -494,7 +502,8 @@ define fp128 @testUnion_02([1 x i128] %a.coerce) {
 ; CHECK-P8-NEXT:    addi r5, r1, -16
 ; CHECK-P8-NEXT:    std r4, -8(r1)
 ; CHECK-P8-NEXT:    std r3, -16(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
 entry:
@@ -520,7 +529,8 @@ define fp128 @testUnion_03([4 x i128] %a.coerce) {
 ; CHECK-P8-NEXT:    addi r3, r1, -16
 ; CHECK-P8-NEXT:    std r8, -8(r1)
 ; CHECK-P8-NEXT:    std r7, -16(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
 entry:
@@ -594,14 +604,15 @@ define fp128 @sum_float128(i32 signext %count, ...) {
 ; CHECK-P8-NEXT:    std r5, 112(r1)
 ; CHECK-P8-NEXT:    std r6, 120(r1)
 ; CHECK-P8-NEXT:    std r7, 128(r1)
-; CHECK-P8-NEXT:    addi r3, r11, .LCPI17_0 at toc@l
+; CHECK-P8-NEXT:    addi r11, r11, .LCPI17_0 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r11
 ; CHECK-P8-NEXT:    std r8, 136(r1)
 ; CHECK-P8-NEXT:    std r9, 144(r1)
 ; CHECK-P8-NEXT:    std r10, 152(r1)
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    blt cr0, .LBB17_2
 ; CHECK-P8-NEXT:  # %bb.1: # %if.end
 ; CHECK-P8-NEXT:    addi r30, r1, 104
-; CHECK-P8-NEXT:    lvx v3, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -613,10 +624,9 @@ define fp128 @sum_float128(i32 signext %count, ...) {
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    b .LBB17_3
-; CHECK-P8-NEXT:  .LBB17_2:
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:  .LBB17_3: # %cleanup
+; CHECK-P8-NEXT:    vmr v3, v2
+; CHECK-P8-NEXT:  .LBB17_2: # %cleanup
+; CHECK-P8-NEXT:    vmr v2, v3
 ; CHECK-P8-NEXT:    addi r1, r1, 64
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll
index dc67d48e871a5..63b35a8d4e4c1 100644
--- a/llvm/test/CodeGen/PowerPC/f128-arith.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll
@@ -23,12 +23,14 @@ define dso_local void @qpAdd(fp128* nocapture readonly %a, fp128* nocapture %res
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -59,12 +61,14 @@ define dso_local void @qpSub(fp128* nocapture readonly %a, fp128* nocapture %res
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    bl __subkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -95,12 +99,14 @@ define dso_local void @qpMul(fp128* nocapture readonly %a, fp128* nocapture %res
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -131,12 +137,14 @@ define dso_local void @qpDiv(fp128* nocapture readonly %a, fp128* nocapture %res
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    bl __divkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -161,9 +169,9 @@ define dso_local void @testLdNSt(i8* nocapture readonly %PtrC, fp128* nocapture
 ; CHECK-P8-LABEL: testLdNSt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addi r3, r3, 4
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r3, r4, 8
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i8, i8* %PtrC, i64 4
@@ -193,11 +201,13 @@ define dso_local void @qpSqrt(fp128* nocapture readonly %a, fp128* nocapture %re
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl sqrtf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -223,18 +233,18 @@ define dso_local void @qpCpsgn(fp128* nocapture readonly %a, fp128* nocapture re
 ;
 ; CHECK-P8-LABEL: qpCpsgn:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    addi r3, r1, -16
-; CHECK-P8-NEXT:    addi r4, r1, -32
-; CHECK-P8-NEXT:    stvx v3, 0, r3
-; CHECK-P8-NEXT:    stvx v2, 0, r4
-; CHECK-P8-NEXT:    lbz r3, -1(r1)
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addi r4, r1, -16
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    addi r3, r1, -32
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lbz r4, -1(r1)
 ; CHECK-P8-NEXT:    lbz r6, -17(r1)
-; CHECK-P8-NEXT:    rlwimi r6, r3, 0, 0, 24
+; CHECK-P8-NEXT:    rlwimi r6, r4, 0, 0, 24
 ; CHECK-P8-NEXT:    stb r6, -17(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    stvx v2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r5
 ; CHECK-P8-NEXT:    blr
                      fp128* nocapture %res) {
 entry:
@@ -257,14 +267,14 @@ define dso_local void @qpAbs(fp128* nocapture readonly %a, fp128* nocapture %res
 ;
 ; CHECK-P8-LABEL: qpAbs:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r3, r1, -16
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r5, -1(r1)
 ; CHECK-P8-NEXT:    clrlwi r5, r5, 25
 ; CHECK-P8-NEXT:    stb r5, -1(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    stvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    blr
 entry:
   %0 = load fp128, fp128* %a, align 16
@@ -285,20 +295,20 @@ define dso_local void @qpNAbs(fp128* nocapture readonly %a, fp128* nocapture %re
 ;
 ; CHECK-P8-LABEL: qpNAbs:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r3, r1, -32
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r5, -17(r1)
 ; CHECK-P8-NEXT:    clrlwi r5, r5, 25
 ; CHECK-P8-NEXT:    stb r5, -17(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r3, r1, -16
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r5, -1(r1)
 ; CHECK-P8-NEXT:    xori r5, r5, 128
 ; CHECK-P8-NEXT:    stb r5, -1(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    stvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    blr
 entry:
   %0 = load fp128, fp128* %a, align 16
@@ -319,14 +329,14 @@ define dso_local void @qpNeg(fp128* nocapture readonly %a, fp128* nocapture %res
 ;
 ; CHECK-P8-LABEL: qpNeg:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r3, r1, -16
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r5, -1(r1)
 ; CHECK-P8-NEXT:    xori r5, r5, 128
 ; CHECK-P8-NEXT:    stb r5, -1(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    stvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    blr
 entry:
   %0 = load fp128, fp128* %a, align 16
@@ -359,7 +369,8 @@ define fp128 @qp_sin(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl sinf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -396,7 +407,8 @@ define fp128 @qp_cos(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl cosf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -433,7 +445,8 @@ define fp128 @qp_log(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl logf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -470,7 +483,8 @@ define fp128 @qp_log10(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl log10f128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -507,7 +521,8 @@ define fp128 @qp_log2(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl log2f128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -545,8 +560,10 @@ define fp128 @qp_minnum(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl fminf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -586,8 +603,10 @@ define fp128 @qp_maxnum(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl fmaxf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -627,8 +646,10 @@ define fp128 @qp_pow(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl powf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -667,7 +688,8 @@ define fp128 @qp_exp(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl expf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -704,7 +726,8 @@ define fp128 @qp_exp2(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl exp2f128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -749,13 +772,15 @@ define dso_local void @qp_powi(fp128* nocapture readonly %a, i32* nocapture read
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lwz r3, 0(r4)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    mr r5, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __powikf2
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -806,8 +831,10 @@ define fp128 @qp_frem() #0 {
 ; CHECK-P8-NEXT:    addis r4, r2, b at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl fmodf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -838,11 +865,13 @@ define dso_local void @qpCeil(fp128* nocapture readonly %a, fp128* nocapture %re
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl ceilf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -873,11 +902,13 @@ define dso_local void @qpFloor(fp128* nocapture readonly %a, fp128* nocapture %r
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl floorf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -908,11 +939,13 @@ define dso_local void @qpTrunc(fp128* nocapture readonly %a, fp128* nocapture %r
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl truncf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -943,11 +976,13 @@ define dso_local void @qpRound(fp128* nocapture readonly %a, fp128* nocapture %r
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl roundf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -991,8 +1026,9 @@ define dso_local void @qpLRound(fp128* nocapture readonly %a, i32* nocapture %re
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl lroundf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    stw r3, 0(r30)
@@ -1039,8 +1075,9 @@ define dso_local void @qpLLRound(fp128* nocapture readonly %a, i64* nocapture %r
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl llroundf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    std r3, 0(r30)
@@ -1074,11 +1111,13 @@ define dso_local void @qpRint(fp128* nocapture readonly %a, fp128* nocapture %re
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl rintf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1122,8 +1161,9 @@ define dso_local void @qpLRint(fp128* nocapture readonly %a, i32* nocapture %res
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl lrintf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    stw r3, 0(r30)
@@ -1170,8 +1210,9 @@ define dso_local void @qpLLRint(fp128* nocapture readonly %a, i64* nocapture %re
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl llrintf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    std r3, 0(r30)
@@ -1205,11 +1246,13 @@ define dso_local void @qpNearByInt(fp128* nocapture readonly %a, fp128* nocaptur
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl nearbyintf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1242,13 +1285,17 @@ define dso_local void @qpFMA(fp128* %a, fp128* %b, fp128* %c, fp128* %res) {
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    lvx v4, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v4, vs2
 ; CHECK-P8-NEXT:    bl fmaf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/PowerPC/f128-compare.ll b/llvm/test/CodeGen/PowerPC/f128-compare.ll
index d57ee87ea919e..5ce417aa05719 100644
--- a/llvm/test/CodeGen/PowerPC/f128-compare.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-compare.ll
@@ -35,8 +35,10 @@ define dso_local signext i32 @greater_qp() {
 ; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __gtkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -81,8 +83,10 @@ define dso_local signext i32 @less_qp() {
 ; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __ltkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -125,8 +129,10 @@ define dso_local signext i32 @greater_eq_qp() {
 ; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __gekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -170,8 +176,10 @@ define dso_local signext i32 @less_eq_qp() {
 ; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __lekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -217,8 +225,10 @@ define dso_local signext i32 @equal_qp() {
 ; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __eqkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    cntlzw r3, r3
@@ -261,8 +271,10 @@ define dso_local signext i32 @not_greater_qp() {
 ; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __gtkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -308,8 +320,10 @@ define dso_local signext i32 @not_less_qp() {
 ; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __ltkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -354,8 +368,10 @@ define dso_local signext i32 @not_greater_eq_qp() {
 ; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __gekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -399,8 +415,10 @@ define dso_local signext i32 @not_less_eq_qp() {
 ; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __lekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -445,8 +463,10 @@ define dso_local signext i32 @not_equal_qp() {
 ; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __nekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    cntlzw r3, r3
@@ -496,9 +516,11 @@ define fp128 @greater_sel_qp() {
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    lvx v30, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    lvx v31, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v30, vs1
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    vmr v2, v31
 ; CHECK-P8-NEXT:    bl __gtkf2
@@ -557,9 +579,11 @@ define fp128 @less_sel_qp() {
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    lvx v30, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    lvx v31, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v30, vs1
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    vmr v2, v31
 ; CHECK-P8-NEXT:    bl __ltkf2
@@ -619,9 +643,11 @@ define fp128 @greater_eq_sel_qp() {
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    lvx v30, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    lvx v31, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v30, vs1
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    vmr v2, v31
 ; CHECK-P8-NEXT:    bl __gekf2
@@ -681,9 +707,11 @@ define fp128 @less_eq_sel_qp() {
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    lvx v30, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    lvx v31, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v30, vs1
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    vmr v2, v31
 ; CHECK-P8-NEXT:    bl __lekf2
@@ -742,9 +770,11 @@ define fp128 @equal_sel_qp() {
 ; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    lvx v30, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    lvx v31, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v30, vs1
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    vmr v2, v31
 ; CHECK-P8-NEXT:    bl __eqkf2

diff  --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll
index 047993bf1ca90..fff30a11917a6 100644
--- a/llvm/test/CodeGen/PowerPC/f128-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll
@@ -35,7 +35,8 @@ define void @sdwConv2qp(fp128* nocapture %a, i64 %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -85,7 +86,8 @@ define void @sdwConv2qp_01(fp128* nocapture %a, i128 %b) {
 ; CHECK-P8-NEXT:    mr r4, r5
 ; CHECK-P8-NEXT:    bl __floattikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -125,7 +127,8 @@ define void @sdwConv2qp_02(fp128* nocapture %a) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -163,7 +166,8 @@ define void @sdwConv2qp_03(fp128* nocapture %a, i64* nocapture readonly %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -206,7 +210,8 @@ define void @sdwConv2qp_04(fp128* nocapture %a, i1 %b) {
 ; CHECK-P8-NEXT:    iselgt r3, r4, r3
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -241,7 +246,8 @@ define void @udwConv2qp(fp128* nocapture %a, i64 %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -291,7 +297,8 @@ define void @udwConv2qp_01(fp128* nocapture %a, i128 %b) {
 ; CHECK-P8-NEXT:    mr r4, r5
 ; CHECK-P8-NEXT:    bl __floatuntikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -331,7 +338,8 @@ define void @udwConv2qp_02(fp128* nocapture %a) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -369,7 +377,8 @@ define void @udwConv2qp_03(fp128* nocapture %a, i64* nocapture readonly %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -406,7 +415,8 @@ define void @udwConv2qp_04(fp128* nocapture %a, i1 %b) {
 ; CHECK-P8-NEXT:    clrldi r3, r4, 63
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -446,8 +456,9 @@ define fp128* @sdwConv2qp_testXForm(fp128* returned %sink,
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mr r3, r30
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -491,8 +502,9 @@ define fp128* @udwConv2qp_testXForm(fp128* returned %sink,
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mr r3, r30
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -531,7 +543,8 @@ define void @swConv2qp(fp128* nocapture %a, i32 signext %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -567,7 +580,8 @@ define void @swConv2qp_02(fp128* nocapture %a, i32* nocapture readonly %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -609,7 +623,8 @@ define void @swConv2qp_03(fp128* nocapture %a) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -646,7 +661,8 @@ define void @uwConv2qp(fp128* nocapture %a, i32 zeroext %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -682,7 +698,8 @@ define void @uwConv2qp_02(fp128* nocapture %a, i32* nocapture readonly %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -724,7 +741,8 @@ define void @uwConv2qp_03(fp128* nocapture %a) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -765,7 +783,8 @@ define void @uwConv2qp_04(fp128* nocapture %a,
 ; CHECK-P8-NEXT:    clrldi r3, r3, 32
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -803,7 +822,8 @@ define void @uhwConv2qp(fp128* nocapture %a, i16 zeroext %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -840,7 +860,8 @@ define void @uhwConv2qp_02(fp128* nocapture %a, i16* nocapture readonly %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -882,7 +903,8 @@ define void @uhwConv2qp_03(fp128* nocapture %a) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -923,7 +945,8 @@ define void @uhwConv2qp_04(fp128* nocapture %a, i16 zeroext %b,
 ; CHECK-P8-NEXT:    clrldi r3, r3, 32
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -963,7 +986,8 @@ define void @ubConv2qp(fp128* nocapture %a, i8 zeroext %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -999,7 +1023,8 @@ define void @ubConv2qp_02(fp128* nocapture %a, i8* nocapture readonly %b) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1041,7 +1066,8 @@ define void @ubConv2qp_03(fp128* nocapture %a) {
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1082,7 +1108,8 @@ define void @ubConv2qp_04(fp128* nocapture %a, i8 zeroext %b,
 ; CHECK-P8-NEXT:    clrldi r3, r3, 32
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1125,7 +1152,8 @@ define double @qpConv2dp(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __trunckfdf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -1161,7 +1189,8 @@ define void @qpConv2dp_02(double* nocapture %res) {
 ; CHECK-P8-NEXT:    addis r4, r2, .LC6 at toc@ha
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC6 at toc@l(r4)
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __trunckfdf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    stfd f1, 0(r30)
@@ -1204,7 +1233,8 @@ define void @qpConv2dp_03(double* nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    addis r4, r2, .LC7 at toc@ha
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    ld r4, .LC7 at toc@l(r4)
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __trunckfdf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    sldi r3, r30, 3
@@ -1244,9 +1274,11 @@ define void @qpConv2dp_04(fp128* nocapture readonly %a, fp128* nocapture readonl
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __trunckfdf2
@@ -1284,7 +1316,8 @@ define float @qpConv2sp(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __trunckfsf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -1321,7 +1354,8 @@ define void @qpConv2sp_02(float* nocapture %res) {
 ; CHECK-P8-NEXT:    addis r4, r2, .LC6 at toc@ha
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC6 at toc@l(r4)
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __trunckfsf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    stfs f1, 0(r30)
@@ -1366,7 +1400,8 @@ define void @qpConv2sp_03(float* nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    ld r4, .LC7 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 48
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __trunckfsf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    sldi r3, r30, 2
@@ -1407,9 +1442,11 @@ define void @qpConv2sp_04(fp128* nocapture readonly %a, fp128* nocapture readonl
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __trunckfsf2
@@ -1479,8 +1516,9 @@ define void @dpConv2qp_02(double* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    bl __extenddfkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addis r3, r2, .LC8 at toc@ha
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    ld r3, .LC8 at toc@l(r3)
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r1, r1, 32
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    mtlr r0
@@ -1516,8 +1554,9 @@ define void @dpConv2qp_02b(double* nocapture readonly %a, i32 signext %idx) {
 ; CHECK-P8-NEXT:    bl __extenddfkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addis r3, r2, .LC8 at toc@ha
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    ld r3, .LC8 at toc@l(r3)
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r1, r1, 32
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    mtlr r0
@@ -1556,8 +1595,9 @@ define void @dpConv2qp_03(fp128* nocapture %res, i32 signext %idx, double %a) {
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    bl __extenddfkf2
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    sldi r3, r30, 4
-; CHECK-P8-NEXT:    stvx v2, r29, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, r29, r3
 ; CHECK-P8-NEXT:    addi r1, r1, 64
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1593,7 +1633,8 @@ define void @dpConv2qp_04(double %a, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    bl __extenddfkf2
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1653,8 +1694,9 @@ define void @spConv2qp_02(float* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    bl __extendsfkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addis r3, r2, .LC8 at toc@ha
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    ld r3, .LC8 at toc@l(r3)
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r1, r1, 32
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    mtlr r0
@@ -1690,8 +1732,9 @@ define void @spConv2qp_02b(float* nocapture readonly %a, i32 signext %idx) {
 ; CHECK-P8-NEXT:    bl __extendsfkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addis r3, r2, .LC8 at toc@ha
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    ld r3, .LC8 at toc@l(r3)
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r1, r1, 32
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    mtlr r0
@@ -1730,8 +1773,9 @@ define void @spConv2qp_03(fp128* nocapture %res, i32 signext %idx, float %a) {
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    bl __extendsfkf2
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    sldi r3, r30, 4
-; CHECK-P8-NEXT:    stvx v2, r29, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, r29, r3
 ; CHECK-P8-NEXT:    addi r1, r1, 64
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1767,7 +1811,8 @@ define void @spConv2qp_04(float %a, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    bl __extendsfkf2
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1805,7 +1850,8 @@ define void @cvdp2sw2qp(double %val, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    extsw r3, r3
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1841,7 +1887,8 @@ define void @cvdp2sdw2qp(double %val, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1879,7 +1926,8 @@ define void @cvsp2sw2qp(float %val, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    extsw r3, r3
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1915,7 +1963,8 @@ define void @cvsp2sdw2qp(float %val, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1953,7 +2002,8 @@ define void @cvdp2uw2qp(double %val, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    clrldi r3, r3, 32
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -1989,7 +2039,8 @@ define void @cvdp2udw2qp(double %val, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -2027,7 +2078,8 @@ define void @cvsp2uw2qp(float %val, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    clrldi r3, r3, 32
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -2063,7 +2115,8 @@ define void @cvsp2udw2qp(float %val, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -2100,7 +2153,8 @@ define i128 @qpConv2i128(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfti
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -2137,7 +2191,8 @@ define i128 @qpConv2ui128(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixunskfti
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -2166,7 +2221,8 @@ define i1 @qpConv2ui1(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -2195,7 +2251,8 @@ define i1 @qpConv2si1(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32

diff  --git a/llvm/test/CodeGen/PowerPC/f128-fma.ll b/llvm/test/CodeGen/PowerPC/f128-fma.ll
index 048711820c0fb..9ca1f12733760 100644
--- a/llvm/test/CodeGen/PowerPC/f128-fma.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-fma.ll
@@ -24,22 +24,26 @@ define void @qpFmadd(fp128* nocapture readonly %a, fp128* nocapture %b,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
 ; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
 ; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    lvx v31, 0, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v31, vs2
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v31
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    li r3, 48
-; CHECK-P8-NEXT:    stvx v2, 0, r30
-; CHECK-P8-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
 ; CHECK-P8-NEXT:    addi r1, r1, 80
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    mtlr r0
@@ -75,23 +79,27 @@ define void @qpFmadd_02(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
 ; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    lvx v3, 0, r5
 ; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r6
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    lvx v31, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs1
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    vmr v2, v31
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    li r3, 48
-; CHECK-P8-NEXT:    stvx v2, 0, r30
-; CHECK-P8-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
 ; CHECK-P8-NEXT:    addi r1, r1, 80
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    mtlr r0
@@ -130,16 +138,20 @@ define void @qpFmadd_03(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    lvx v3, 0, r29
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 64
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -178,28 +190,32 @@ define void @qpFnmadd(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
 ; CHECK-P8-NEXT:    li r7, 64
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    lvx v3, 0, r5
 ; CHECK-P8-NEXT:    std r30, 80(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r6
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    lvx v31, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs1
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    vmr v2, v31
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    addi r3, r1, 48
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r4, 63(r1)
 ; CHECK-P8-NEXT:    xori r4, r4, 128
 ; CHECK-P8-NEXT:    stb r4, 63(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r3, 64
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
-; CHECK-P8-NEXT:    stvx v2, 0, r30
 ; CHECK-P8-NEXT:    ld r30, 80(r1) # 8-byte Folded Reload
 ; CHECK-P8-NEXT:    addi r1, r1, 96
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
@@ -240,22 +256,26 @@ define void @qpFnmadd_02(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    lvx v3, 0, r29
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    addi r3, r1, 32
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r4, 47(r1)
 ; CHECK-P8-NEXT:    xori r4, r4, 128
 ; CHECK-P8-NEXT:    stb r4, 47(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 80
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -295,23 +315,27 @@ define void @qpFmsub(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
 ; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    lvx v3, 0, r5
 ; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r6
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    lvx v31, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs1
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    vmr v2, v31
 ; CHECK-P8-NEXT:    bl __subkf3
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    li r3, 48
-; CHECK-P8-NEXT:    stvx v2, 0, r30
-; CHECK-P8-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT:    ld r30, 64(r1) # 8-byte Folded Reload
 ; CHECK-P8-NEXT:    addi r1, r1, 80
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    mtlr r0
@@ -350,16 +374,20 @@ define void @qpFmsub_02(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    lvx v3, 0, r29
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __subkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 64
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -399,28 +427,32 @@ define void @qpFnmsub(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
 ; CHECK-P8-NEXT:    li r7, 64
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    lvx v3, 0, r5
 ; CHECK-P8-NEXT:    std r30, 80(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r6
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    lvx v31, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs1
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    vmr v2, v31
 ; CHECK-P8-NEXT:    bl __subkf3
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    addi r3, r1, 48
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r4, 63(r1)
 ; CHECK-P8-NEXT:    xori r4, r4, 128
 ; CHECK-P8-NEXT:    stb r4, 63(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r3, 64
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
-; CHECK-P8-NEXT:    stvx v2, 0, r30
 ; CHECK-P8-NEXT:    ld r30, 80(r1) # 8-byte Folded Reload
 ; CHECK-P8-NEXT:    addi r1, r1, 96
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
@@ -461,22 +493,26 @@ define void @qpFnmsub_02(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    lvx v3, 0, r29
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __subkf3
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    addi r3, r1, 32
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r4, 47(r1)
 ; CHECK-P8-NEXT:    xori r4, r4, 128
 ; CHECK-P8-NEXT:    stb r4, 47(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 80
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
index c4783e8f2c12c..89e14b4be23ee 100644
--- a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
@@ -18,7 +18,8 @@ define fp128 @loadConstant() {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
   entry:
     ret fp128 0xL00000000000000004001400000000000
@@ -46,7 +47,8 @@ define fp128 @loadConstant2(fp128 %a, fp128 %b) {
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -134,10 +136,12 @@ define fp128 @fp128Array(fp128* nocapture readonly %farray,
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    sldi r4, r4, 4
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    add r4, r3, r4
 ; CHECK-P8-NEXT:    addi r4, r4, -16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -197,40 +201,38 @@ define fp128 @maxVecParam(fp128 %p1, fp128 %p2, fp128 %p3, fp128 %p4, fp128 %p5,
 ; CHECK-P8-NEXT:    li r3, 48
 ; CHECK-P8-NEXT:    stvx v21, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 64
-; CHECK-P8-NEXT:    vmr v21, v4
 ; CHECK-P8-NEXT:    stvx v22, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 80
-; CHECK-P8-NEXT:    vmr v22, v5
+; CHECK-P8-NEXT:    vmr v22, v4
 ; CHECK-P8-NEXT:    stvx v23, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 96
-; CHECK-P8-NEXT:    vmr v23, v6
+; CHECK-P8-NEXT:    vmr v23, v5
 ; CHECK-P8-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 112
-; CHECK-P8-NEXT:    vmr v24, v7
+; CHECK-P8-NEXT:    vmr v24, v6
 ; CHECK-P8-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 128
-; CHECK-P8-NEXT:    vmr v25, v8
+; CHECK-P8-NEXT:    vmr v25, v7
 ; CHECK-P8-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 144
-; CHECK-P8-NEXT:    vmr v26, v9
+; CHECK-P8-NEXT:    vmr v26, v8
 ; CHECK-P8-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 160
-; CHECK-P8-NEXT:    vmr v27, v10
+; CHECK-P8-NEXT:    vmr v27, v9
 ; CHECK-P8-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 176
-; CHECK-P8-NEXT:    vmr v28, v11
+; CHECK-P8-NEXT:    vmr v28, v10
 ; CHECK-P8-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 192
+; CHECK-P8-NEXT:    vmr v29, v11
 ; CHECK-P8-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 208
 ; CHECK-P8-NEXT:    vmr v30, v12
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addi r3, r1, 448
 ; CHECK-P8-NEXT:    vmr v31, v13
-; CHECK-P8-NEXT:    lvx v29, 0, r3
-; CHECK-P8-NEXT:    bl __addkf3
-; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    vmr v3, v21
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v21, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v22
@@ -254,13 +256,16 @@ define fp128 @maxVecParam(fp128 %p1, fp128 %p2, fp128 %p3, fp128 %p4, fp128 %p5,
 ; CHECK-P8-NEXT:    vmr v3, v28
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    vmr v3, v29
+; CHECK-P8-NEXT:    bl __addkf3
+; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v31
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    vmr v3, v29
+; CHECK-P8-NEXT:    vmr v3, v21
 ; CHECK-P8-NEXT:    bl __subkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    li r3, 208
@@ -429,7 +434,7 @@ define fp128 @mixParam_02(fp128 %p1, double %p2, i64* nocapture %p3,
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
 ; CHECK-P8-NEXT:    li r3, 48
 ; CHECK-P8-NEXT:    add r4, r7, r9
-; CHECK-P8-NEXT:    vmr v4, v2
+; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    stfd f31, 72(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    lwz r3, 176(r1)
@@ -438,9 +443,9 @@ define fp128 @mixParam_02(fp128 %p1, double %p2, i64* nocapture %p3,
 ; CHECK-P8-NEXT:    add r3, r4, r3
 ; CHECK-P8-NEXT:    clrldi r3, r3, 32
 ; CHECK-P8-NEXT:    std r3, 0(r6)
-; CHECK-P8-NEXT:    lvx v3, 0, r8
-; CHECK-P8-NEXT:    vmr v2, v3
-; CHECK-P8-NEXT:    vmr v3, v4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r8
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxlor v2, vs0, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    fmr f1, f31
@@ -501,7 +506,7 @@ define fastcc fp128 @mixParam_02f(fp128 %p1, double %p2, i64* nocapture %p3,
 ; CHECK-P8-NEXT:    .cfi_offset f31, -8
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
 ; CHECK-P8-NEXT:    add r4, r4, r6
-; CHECK-P8-NEXT:    vmr v4, v2
+; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    li r9, 48
 ; CHECK-P8-NEXT:    stfd f31, 72(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    add r4, r4, r7
@@ -510,9 +515,9 @@ define fastcc fp128 @mixParam_02f(fp128 %p1, double %p2, i64* nocapture %p3,
 ; CHECK-P8-NEXT:    add r4, r4, r8
 ; CHECK-P8-NEXT:    clrldi r4, r4, 32
 ; CHECK-P8-NEXT:    std r4, 0(r3)
-; CHECK-P8-NEXT:    lvx v3, 0, r5
-; CHECK-P8-NEXT:    vmr v2, v3
-; CHECK-P8-NEXT:    vmr v3, v4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxlor v2, vs0, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    fmr f1, f31
@@ -572,15 +577,18 @@ define void @mixParam_03(fp128 %f1, double* nocapture %d1, <4 x i32> %vec1,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v3
 ; CHECK-P8-NEXT:    ld r4, 184(r1)
 ; CHECK-P8-NEXT:    li r3, 48
-; CHECK-P8-NEXT:    stvx v2, 0, r9
 ; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r3, r10
-; CHECK-P8-NEXT:    stvx v3, 0, r4
-; CHECK-P8-NEXT:    lvx v31, 0, r9
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r9
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r9
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2
@@ -633,14 +641,17 @@ define fastcc void @mixParam_03f(fp128 %f1, double* nocapture %d1, <4 x i32> %ve
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v3
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    stvx v2, 0, r4
-; CHECK-P8-NEXT:    stvx v3, 0, r7
 ; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r5
 ; CHECK-P8-NEXT:    stvx v31, r1, r6 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    lvx v31, 0, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r7
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2

diff  --git a/llvm/test/CodeGen/PowerPC/f128-rounding.ll b/llvm/test/CodeGen/PowerPC/f128-rounding.ll
index 2913635408b4a..75887deeab7a6 100644
--- a/llvm/test/CodeGen/PowerPC/f128-rounding.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-rounding.ll
@@ -22,11 +22,13 @@ define void @qp_trunc(fp128* nocapture readonly %a, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl truncf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -57,11 +59,13 @@ define void @qp_rint(fp128* nocapture readonly %a, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl rintf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -92,11 +96,13 @@ define void @qp_nearbyint(fp128* nocapture readonly %a, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl nearbyintf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -127,11 +133,13 @@ define void @qp_round(fp128* nocapture readonly %a, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl roundf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -162,11 +170,13 @@ define void @qp_floor(fp128* nocapture readonly %a, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl floorf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
@@ -197,11 +207,13 @@ define void @qp_ceil(fp128* nocapture readonly %a, fp128* nocapture %res) {
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl ceilf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
index 61c41dcef982a..04ea185e0b4ca 100644
--- a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
@@ -28,7 +28,8 @@ define i64 @qpConv2sdw(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfdi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -66,7 +67,8 @@ define void @qpConv2sdw_02(i64* nocapture %res) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfdi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    std r3, 0(r30)
@@ -106,10 +108,12 @@ define i64 @qpConv2sdw_03(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfdi
@@ -149,9 +153,11 @@ define void @qpConv2sdw_04(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfdi
@@ -201,7 +207,8 @@ define void @qpConv2sdw_testXForm(i64* nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfdi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    sldi r4, r30, 3
@@ -240,7 +247,8 @@ define i64 @qpConv2udw(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixunskfdi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -278,7 +286,8 @@ define void @qpConv2udw_02(i64* nocapture %res) {
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixunskfdi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    std r3, 0(r30)
@@ -318,10 +327,12 @@ define i64 @qpConv2udw_03(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixunskfdi
@@ -361,9 +372,11 @@ define void @qpConv2udw_04(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixunskfdi
@@ -412,7 +425,8 @@ define void @qpConv2udw_testXForm(i64* nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixunskfdi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    sldi r4, r30, 3
@@ -452,7 +466,8 @@ define signext i32 @qpConv2sw(fp128* nocapture readonly %a)  {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -491,7 +506,8 @@ define void @qpConv2sw_02(i32* nocapture %res) {
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    stw r3, 0(r30)
@@ -532,10 +548,12 @@ define signext i32 @qpConv2sw_03(fp128* nocapture readonly %a)  {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -576,9 +594,11 @@ define void @qpConv2sw_04(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -617,7 +637,8 @@ define zeroext i32 @qpConv2uw(fp128* nocapture readonly %a)  {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixunskfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -655,7 +676,8 @@ define void @qpConv2uw_02(i32* nocapture %res) {
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixunskfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    stw r3, 0(r30)
@@ -696,10 +718,12 @@ define zeroext i32 @qpConv2uw_03(fp128* nocapture readonly %a)  {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixunskfsi
@@ -739,9 +763,11 @@ define void @qpConv2uw_04(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixunskfsi
@@ -781,7 +807,8 @@ define signext i16 @qpConv2shw(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -819,7 +846,8 @@ define void @qpConv2shw_02(i16* nocapture %res) {
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    sth r3, 0(r30)
@@ -859,10 +887,12 @@ define signext i16 @qpConv2shw_03(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -902,9 +932,11 @@ define void @qpConv2shw_04(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -942,7 +974,8 @@ define zeroext i16 @qpConv2uhw(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -979,7 +1012,8 @@ define void @qpConv2uhw_02(i16* nocapture %res) {
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    sth r3, 0(r30)
@@ -1019,10 +1053,12 @@ define zeroext i16 @qpConv2uhw_03(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1061,9 +1097,11 @@ define void @qpConv2uhw_04(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1101,7 +1139,8 @@ define signext i8 @qpConv2sb(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -1139,7 +1178,8 @@ define void @qpConv2sb_02(i8* nocapture %res) {
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    stb r3, 0(r30)
@@ -1179,10 +1219,12 @@ define signext i8 @qpConv2sb_03(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1222,9 +1264,11 @@ define void @qpConv2sb_04(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1262,7 +1306,8 @@ define zeroext i8 @qpConv2ub(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -1299,7 +1344,8 @@ define void @qpConv2ub_02(i8* nocapture %res) {
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    stb r3, 0(r30)
@@ -1339,10 +1385,12 @@ define zeroext i8 @qpConv2ub_03(fp128* nocapture readonly %a) {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1381,9 +1429,11 @@ define void @qpConv2ub_04(fp128* nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    mr r30, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1482,7 +1532,8 @@ define void @ppcf128Convqp(ppc_fp128 %src, fp128* %dst) {
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    bl __trunctfkf2
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    stvx v2, 0, r30
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/PowerPC/float-logic-ops.ll b/llvm/test/CodeGen/PowerPC/float-logic-ops.ll
index 30722631559c0..c5be349aee96b 100644
--- a/llvm/test/CodeGen/PowerPC/float-logic-ops.ll
+++ b/llvm/test/CodeGen/PowerPC/float-logic-ops.ll
@@ -55,7 +55,8 @@ define <4 x float> @absv4f32_invalid(<4 x float> %a) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; CHECK-NEXT:    lvx v3, 0, r3
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    xxswapd vs35, vs0
 ; CHECK-NEXT:    xxland vs34, vs34, vs35
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
index e2c29f30cd3d6..acb623459d3fc 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
@@ -157,12 +157,14 @@ define fp128 @fmsub_f128(fp128 %f0, fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
+; CHECK-P8-NEXT:    xxswapd vs0, v4
 ; CHECK-P8-NEXT:    addi r3, r1, 32
-; CHECK-P8-NEXT:    stvx v4, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r4, 47(r1)
 ; CHECK-P8-NEXT:    xori r4, r4, 128
 ; CHECK-P8-NEXT:    stb r4, 47(r1)
-; CHECK-P8-NEXT:    lvx v4, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-P8-NEXT:    bl fmaf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 48
@@ -193,12 +195,14 @@ define fp128 @fnmadd_f128(fp128 %f0, fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl fmaf128
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    addi r3, r1, 32
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r4, 47(r1)
 ; CHECK-P8-NEXT:    xori r4, r4, 128
 ; CHECK-P8-NEXT:    stb r4, 47(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r1, r1, 48
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    mtlr r0
@@ -225,20 +229,24 @@ define fp128 @fnmsub_f128(fp128 %f0, fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
+; CHECK-P8-NEXT:    xxswapd vs0, v4
 ; CHECK-P8-NEXT:    addi r3, r1, 32
-; CHECK-P8-NEXT:    stvx v4, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r4, 47(r1)
 ; CHECK-P8-NEXT:    xori r4, r4, 128
 ; CHECK-P8-NEXT:    stb r4, 47(r1)
-; CHECK-P8-NEXT:    lvx v4, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-P8-NEXT:    bl fmaf128
 ; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    addi r3, r1, 48
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lbz r4, 63(r1)
 ; CHECK-P8-NEXT:    xori r4, r4, 128
 ; CHECK-P8-NEXT:    stb r4, 63(r1)
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r1, r1, 64
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
 ; CHECK-P8-NEXT:    mtlr r0

diff  --git a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
index d937acc09c64d..d4efea67c260c 100644
--- a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
+++ b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
@@ -23,14 +23,17 @@ define <8 x i32> @test_large_vec_vaarg(i32 %n, ...) {
 ; LE-NEXT:    ld 3, -8(1)
 ; LE-NEXT:    addi 3, 3, 15
 ; LE-NEXT:    rldicr 3, 3, 0, 59
-; LE-NEXT:    addi 4, 3, 31
-; LE-NEXT:    addi 5, 3, 16
+; LE-NEXT:    addi 4, 3, 16
+; LE-NEXT:    std 4, -8(1)
+; LE-NEXT:    ld 4, -8(1)
+; LE-NEXT:    lxvd2x 0, 0, 3
+; LE-NEXT:    addi 4, 4, 15
 ; LE-NEXT:    rldicr 4, 4, 0, 59
-; LE-NEXT:    std 5, -8(1)
+; LE-NEXT:    xxswapd 34, 0
 ; LE-NEXT:    addi 5, 4, 16
-; LE-NEXT:    lvx 2, 0, 3
 ; LE-NEXT:    std 5, -8(1)
-; LE-NEXT:    lvx 3, 0, 4
+; LE-NEXT:    lxvd2x 1, 0, 4
+; LE-NEXT:    xxswapd 35, 1
 ; LE-NEXT:    blr
   %args = alloca i8*, align 4
   %x = va_arg i8** %args, <8 x i32>

diff  --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index cf8796ad1d709..c0df8a8878e3e 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -83,8 +83,9 @@ define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonl
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    addi r4, r4, 12
 ; P8-NEXT:    lfiwzx f0, 0, r4
-; P8-NEXT:    xxspltw v2, vs0, 1
-; P8-NEXT:    stvx v2, 0, r3
+; P8-NEXT:    xxspltw vs0, vs0, 1
+; P8-NEXT:    xxswapd vs0, vs0
+; P8-NEXT:    stxvd2x vs0, 0, r3
 ; P8-NEXT:    blr
 ;
 ; P7-LABEL: test2:
@@ -139,8 +140,9 @@ define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    addi r4, r4, 12
 ; P8-NEXT:    lfiwzx f0, 0, r4
-; P8-NEXT:    xxspltw v2, vs0, 1
-; P8-NEXT:    stvx v2, 0, r3
+; P8-NEXT:    xxspltw vs0, vs0, 1
+; P8-NEXT:    xxswapd vs0, vs0
+; P8-NEXT:    stxvd2x vs0, 0, r3
 ; P8-NEXT:    blr
 ;
 ; P7-LABEL: test3:
@@ -428,7 +430,8 @@ define void @test7(<8 x i16>* %a, i16* %in) {
 ; P8-NEXT:    lhzx r4, 0, r4
 ; P8-NEXT:    mtvsrwz v2, r4
 ; P8-NEXT:    vsplth v2, v2, 3
-; P8-NEXT:    stvx v2, 0, r3
+; P8-NEXT:    xxswapd vs0, v2
+; P8-NEXT:    stxvd2x vs0, 0, r3
 ; P8-NEXT:    blr
 ;
 ; P7-LABEL: test7:
@@ -489,7 +492,8 @@ define void @test8(<16 x i8>* %a, i8* %in) {
 ; P8-NEXT:    lbzx r4, 0, r4
 ; P8-NEXT:    mtvsrwz v2, r4
 ; P8-NEXT:    vspltb v2, v2, 7
-; P8-NEXT:    stvx v2, 0, r3
+; P8-NEXT:    xxswapd vs0, v2
+; P8-NEXT:    stxvd2x vs0, 0, r3
 ; P8-NEXT:    blr
 ;
 ; P7-LABEL: test8:
@@ -629,7 +633,8 @@ define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
 ;
 ; P8-LABEL: unadjusted_lxvwsx_v16i8:
 ; P8:       # %bb.0: # %entry
-; P8-NEXT:    lvx v2, 0, r3
+; P8-NEXT:    lxvd2x vs0, 0, r3
+; P8-NEXT:    xxswapd v2, vs0
 ; P8-NEXT:    xxspltw v2, v2, 3
 ; P8-NEXT:    blr
 ;
@@ -670,7 +675,8 @@ define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
 ;
 ; P8-LABEL: adjusted_lxvwsx_v16i8:
 ; P8:       # %bb.0: # %entry
-; P8-NEXT:    lvx v2, 0, r3
+; P8-NEXT:    lxvd2x vs0, 0, r3
+; P8-NEXT:    xxswapd v2, vs0
 ; P8-NEXT:    xxspltw v2, v2, 2
 ; P8-NEXT:    blr
 ;
@@ -712,7 +718,8 @@ define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) {
 ;
 ; P8-LABEL: adjusted_lxvwsx_v16i8_2:
 ; P8:       # %bb.0: # %entry
-; P8-NEXT:    lvx v2, 0, r3
+; P8-NEXT:    lxvd2x vs0, 0, r3
+; P8-NEXT:    xxswapd v2, vs0
 ; P8-NEXT:    xxspltw v2, v2, 1
 ; P8-NEXT:    blr
 ;
@@ -754,7 +761,8 @@ define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) {
 ;
 ; P8-LABEL: adjusted_lxvwsx_v16i8_3:
 ; P8:       # %bb.0: # %entry
-; P8-NEXT:    lvx v2, 0, r3
+; P8-NEXT:    lxvd2x vs0, 0, r3
+; P8-NEXT:    xxswapd v2, vs0
 ; P8-NEXT:    xxspltw v2, v2, 0
 ; P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
index 9838550bf06a7..8a04ca4ce3de8 100644
--- a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
+++ b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
@@ -71,10 +71,12 @@ define <4 x i32> @load_swap10(<4 x i32>* %vp1, <4 x i32>* %vp2) {
 ; CHECK-P8-LABEL: load_swap10:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: load_swap10:
@@ -109,10 +111,12 @@ define <4 x i32> @load_swap11(<4 x i32>* %vp1, <4 x i32>* %vp2) {
 ; CHECK-P8-LABEL: load_swap11:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: load_swap11:
@@ -147,10 +151,12 @@ define <8 x i16> @load_swap20(<8 x i16>* %vp1, <8 x i16>* %vp2){
 ; CHECK-P8-LABEL: load_swap20:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: load_swap20:
@@ -185,10 +191,12 @@ define <8 x i16> @load_swap21(<8 x i16>* %vp1, <8 x i16>* %vp2){
 ; CHECK-P8-LABEL: load_swap21:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: load_swap21:
@@ -223,10 +231,12 @@ define <16 x i8> @load_swap30(<16 x i8>* %vp1, <16 x i8>* %vp2){
 ; CHECK-P8-LABEL: load_swap30:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: load_swap30:
@@ -258,10 +268,12 @@ define <16 x i8> @load_swap31(<16 x i8>* %vp1, <16 x i8>* %vp2){
 ; CHECK-P8-LABEL: load_swap31:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: load_swap31:
@@ -321,10 +333,12 @@ define <4 x float> @load_swap50(<4 x float>* %vp1, <4 x float>* %vp2) {
 ; CHECK-P8-LABEL: load_swap50:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI9_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI9_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: load_swap50:
@@ -359,10 +373,12 @@ define <4 x float> @load_swap51(<4 x float>* %vp1, <4 x float>* %vp2) {
 ; CHECK-P8-LABEL: load_swap51:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: load_swap51:
@@ -452,9 +468,11 @@ define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) {
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI13_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI13_0 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
-; CHECK-P8-NEXT:    stvx v2, 0, r7
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: swap_store10:
@@ -489,9 +507,11 @@ define void @swap_store11(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) {
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI14_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI14_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P8-NEXT:    stvx v2, 0, r7
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: swap_store11:
@@ -526,9 +546,11 @@ define void @swap_store20(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) {
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI15_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI15_0 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
-; CHECK-P8-NEXT:    stvx v2, 0, r7
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: swap_store20:
@@ -563,9 +585,11 @@ define void @swap_store21(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) {
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI16_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI16_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P8-NEXT:    stvx v2, 0, r7
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: swap_store21:
@@ -600,9 +624,11 @@ define void @swap_store30(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) {
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI17_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI17_0 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
-; CHECK-P8-NEXT:    stvx v2, 0, r7
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: swap_store30:
@@ -634,9 +660,11 @@ define void @swap_store31(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) {
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI18_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI18_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P8-NEXT:    stvx v2, 0, r7
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: swap_store31:
@@ -722,9 +750,11 @@ define void @swap_store50(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) {
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI21_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI21_0 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
-; CHECK-P8-NEXT:    stvx v2, 0, r7
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: swap_store50:
@@ -759,9 +789,11 @@ define void @swap_store51(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) {
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI22_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI22_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P8-NEXT:    stvx v2, 0, r7
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: swap_store51:

diff  --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index 5915862f4cbdb..f6a57e6fe19af 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -300,8 +300,8 @@ define dso_local void @testUnalignedLdSt() {
 ; LE-PWR8-LABEL: testUnalignedLdSt:
 ; LE-PWR8:       # %bb.0: # %entry
 ; LE-PWR8-NEXT:    addis r3, r2, f at toc@ha
-; LE-PWR8-NEXT:    li r4, 59
-; LE-PWR8-NEXT:    li r5, 43
+; LE-PWR8-NEXT:    li r4, 43
+; LE-PWR8-NEXT:    li r5, 59
 ; LE-PWR8-NEXT:    addi r3, r3, f at toc@l
 ; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
 ; LE-PWR8-NEXT:    li r4, 11
@@ -309,8 +309,8 @@ define dso_local void @testUnalignedLdSt() {
 ; LE-PWR8-NEXT:    li r5, 27
 ; LE-PWR8-NEXT:    lxvd2x vs2, r3, r4
 ; LE-PWR8-NEXT:    lxvd2x vs3, r3, r5
-; LE-PWR8-NEXT:    li r4, 51
-; LE-PWR8-NEXT:    li r5, 67
+; LE-PWR8-NEXT:    li r4, 67
+; LE-PWR8-NEXT:    li r5, 51
 ; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
 ; LE-PWR8-NEXT:    li r4, 35
 ; LE-PWR8-NEXT:    stxvd2x vs0, r3, r5
@@ -576,17 +576,15 @@ define dso_local void @testUnalignedLdStPair() {
 ; LE-PWR8-LABEL: testUnalignedLdStPair:
 ; LE-PWR8:       # %bb.0: # %entry
 ; LE-PWR8-NEXT:    addis r3, r2, g at toc@ha
-; LE-PWR8-NEXT:    li r4, 27
-; LE-PWR8-NEXT:    li r5, 11
-; LE-PWR8-NEXT:    li r6, 19
-; LE-PWR8-NEXT:    li r8, 35
+; LE-PWR8-NEXT:    li r4, 11
+; LE-PWR8-NEXT:    li r5, 27
 ; LE-PWR8-NEXT:    addi r3, r3, g at toc@l
 ; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
-; LE-PWR8-NEXT:    ldx r5, r3, r5
-; LE-PWR8-NEXT:    ldx r7, r3, r6
-; LE-PWR8-NEXT:    stdx r7, r3, r4
-; LE-PWR8-NEXT:    stdx r5, r3, r6
-; LE-PWR8-NEXT:    stxvd2x vs0, r3, r8
+; LE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; LE-PWR8-NEXT:    li r4, 35
+; LE-PWR8-NEXT:    li r5, 19
+; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; LE-PWR8-NEXT:    stxvd2x vs0, r3, r5
 ; LE-PWR8-NEXT:    blr
 ;
 ; BE-PWR9-LABEL: testUnalignedLdStPair:

diff  --git a/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll b/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll
index bcb35e751cfa7..1e906239e03b6 100644
--- a/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll
+++ b/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll
@@ -30,7 +30,8 @@ define dso_local void @test(float* nocapture readonly %Fptr, <4 x float>* nocapt
 ; CHECK-NEXT:          xvcvdpsp 35, 1
 ; CHECK-NEXT:          vmrgew 2, 2, 3
 ; CHECK-NEXT:          .loc    1 3 9 is_stmt 0
-; CHECK-NEXT:          stvx 2, 0, 4
+; CHECK-NEXT:          xxswapd	0, 34
+; CHECK-NEXT:          stxvd2x 0, 0, 4
 ; CHECK-NEXT:          .loc    1 4 1 is_stmt 1
 ; CHECK-NEXT:          blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
index 418e656d9a083..1592c9e25f08a 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
@@ -1733,10 +1733,10 @@ define dso_local void @testGlob11PtrPlus0() {
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobLd11 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobLd11 at toc@l
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobSt11 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobSt11 at toc@l
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: testGlob11PtrPlus0:
@@ -1788,10 +1788,10 @@ define dso_local void @testGlob11PtrPlus3() {
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobLd11 at toc@ha
 ; CHECK-P8-LE-NEXT:    li r4, 3
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobLd11 at toc@l
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobSt11 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobSt11 at toc@l
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: testGlob11PtrPlus3:
@@ -1844,10 +1844,10 @@ define dso_local void @testGlob11PtrPlus4() {
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobLd11 at toc@ha
 ; CHECK-P8-LE-NEXT:    li r4, 4
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobLd11 at toc@l
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobSt11 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobSt11 at toc@l
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: testGlob11PtrPlus4:
@@ -1899,10 +1899,10 @@ define dso_local void @testGlob11PtrPlus16() {
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobLd11 at toc@ha
 ; CHECK-P8-LE-NEXT:    li r4, 16
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobLd11 at toc@l
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobSt11 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobSt11 at toc@l
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: testGlob11PtrPlus16:
@@ -1959,10 +1959,10 @@ define dso_local void @testGlob11PtrPlusVar(i64 %Idx) {
 ; CHECK-P8-LE-NEXT:    addis r4, r2, GlobLd11 at toc@ha
 ; CHECK-P8-LE-NEXT:    sldi r3, r3, 4
 ; CHECK-P8-LE-NEXT:    addi r4, r4, GlobLd11 at toc@l
-; CHECK-P8-LE-NEXT:    lvx v2, r4, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r4, r3
 ; CHECK-P8-LE-NEXT:    addis r4, r2, GlobSt11 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r4, r4, GlobSt11 at toc@l
-; CHECK-P8-LE-NEXT:    stvx v2, r4, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r4, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: testGlob11PtrPlusVar:
@@ -2015,10 +2015,10 @@ define dso_local void @testGlob12PtrPlus0() {
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobLd12 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobLd12 at toc@l
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobSt12 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobSt12 at toc@l
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: testGlob12PtrPlus0:
@@ -2070,10 +2070,10 @@ define dso_local void @testGlob12PtrPlus3() {
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobLd12 at toc@ha
 ; CHECK-P8-LE-NEXT:    li r4, 3
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobLd12 at toc@l
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobSt12 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobSt12 at toc@l
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: testGlob12PtrPlus3:
@@ -2126,10 +2126,10 @@ define dso_local void @testGlob12PtrPlus4() {
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobLd12 at toc@ha
 ; CHECK-P8-LE-NEXT:    li r4, 4
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobLd12 at toc@l
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobSt12 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobSt12 at toc@l
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: testGlob12PtrPlus4:
@@ -2181,10 +2181,10 @@ define dso_local void @testGlob12PtrPlus16() {
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobLd12 at toc@ha
 ; CHECK-P8-LE-NEXT:    li r4, 16
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobLd12 at toc@l
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    addis r3, r2, GlobSt12 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r3, r3, GlobSt12 at toc@l
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: testGlob12PtrPlus16:
@@ -2241,10 +2241,10 @@ define dso_local void @testGlob12PtrPlusVar(i64 %Idx) {
 ; CHECK-P8-LE-NEXT:    addis r4, r2, GlobLd12 at toc@ha
 ; CHECK-P8-LE-NEXT:    sldi r3, r3, 4
 ; CHECK-P8-LE-NEXT:    addi r4, r4, GlobLd12 at toc@l
-; CHECK-P8-LE-NEXT:    lvx v2, r4, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r4, r3
 ; CHECK-P8-LE-NEXT:    addis r4, r2, GlobSt12 at toc@ha
 ; CHECK-P8-LE-NEXT:    addi r4, r4, GlobSt12 at toc@l
-; CHECK-P8-LE-NEXT:    stvx v2, r4, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r4, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: testGlob12PtrPlusVar:

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll b/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll
index ad7f570d30740..b5333550977a7 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll
@@ -217,7 +217,8 @@ define <1 x i128> @call_v1i128_increment_by_one() nounwind {
        ret <1 x i128> %ret
 
 ; CHECK-LE-LABEL: @call_v1i128_increment_by_one
-; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: xxswapd 34, [[VAL]]
 ; CHECK-LE: bl v1i128_increment_by_one
 ; CHECK-LE: blr
 
@@ -246,8 +247,10 @@ define <1 x i128> @call_v1i128_increment_by_val() nounwind {
        ret <1 x i128> %ret
 
 ; CHECK-LE-LABEL: @call_v1i128_increment_by_val
-; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK-LE: lvx 3, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: lxvd2x [[VAL1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE-DAG: lxvd2x [[VAL2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE-DAG: xxswapd 34, [[VAL1]]
+; CHECK-LE: xxswapd 35, [[VAL2]]
 ; CHECK-LE: bl v1i128_increment_by_val
 ; CHECK-LE: blr
 

diff  --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll
index a44c3d5e184d4..0346b765252e6 100644
--- a/llvm/test/CodeGen/PowerPC/pr25080.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25080.ll
@@ -8,7 +8,8 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; LE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; LE-NEXT:    xxlxor 37, 37, 37
 ; LE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; LE-NEXT:    lvx 4, 0, 3
+; LE-NEXT:    lxvd2x 0, 0, 3
+; LE-NEXT:    xxswapd 36, 0
 ; LE-NEXT:    xxland 34, 34, 36
 ; LE-NEXT:    xxland 35, 35, 36
 ; LE-NEXT:    vcmpequw 2, 2, 5
@@ -40,11 +41,12 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; LE-NEXT:    vmrghh 4, 1, 4
 ; LE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
 ; LE-NEXT:    vmrghh 3, 3, 6
+; LE-NEXT:    lxvd2x 2, 0, 3
 ; LE-NEXT:    vmrghh 5, 0, 5
 ; LE-NEXT:    xxmrglw 0, 36, 34
 ; LE-NEXT:    vspltish 4, 15
 ; LE-NEXT:    xxmrglw 1, 37, 35
-; LE-NEXT:    lvx 3, 0, 3
+; LE-NEXT:    xxswapd 35, 2
 ; LE-NEXT:    xxmrgld 34, 1, 0
 ; LE-NEXT:    xxlor 34, 34, 35
 ; LE-NEXT:    vslh 2, 2, 4

diff  --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index 2aef0e7e6d20a..257e9f4230833 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -487,13 +487,15 @@ define <4 x float> @hoo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI12_0 at toc@ha
 ; CHECK-P8-NEXT:    addis 4, 2, .LCPI12_1 at toc@ha
 ; CHECK-P8-NEXT:    addi 3, 3, .LCPI12_0 at toc@l
-; CHECK-P8-NEXT:    xvmulsp 1, 35, 0
-; CHECK-P8-NEXT:    lvx 3, 0, 3
+; CHECK-P8-NEXT:    lxvd2x 1, 0, 3
 ; CHECK-P8-NEXT:    addi 3, 4, .LCPI12_1 at toc@l
-; CHECK-P8-NEXT:    lvx 4, 0, 3
-; CHECK-P8-NEXT:    xvmaddasp 35, 1, 0
-; CHECK-P8-NEXT:    xvmulsp 0, 0, 36
+; CHECK-P8-NEXT:    lxvd2x 3, 0, 3
+; CHECK-P8-NEXT:    xxswapd 1, 1
+; CHECK-P8-NEXT:    xvmulsp 2, 35, 0
+; CHECK-P8-NEXT:    xxswapd 35, 3
+; CHECK-P8-NEXT:    xvmaddasp 1, 2, 0
 ; CHECK-P8-NEXT:    xvmulsp 0, 0, 35
+; CHECK-P8-NEXT:    xvmulsp 0, 0, 1
 ; CHECK-P8-NEXT:    xvmulsp 34, 34, 0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -1044,13 +1046,15 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 {
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI25_0 at toc@ha
 ; CHECK-P8-NEXT:    addis 4, 2, .LCPI25_1 at toc@ha
 ; CHECK-P8-NEXT:    addi 3, 3, .LCPI25_0 at toc@l
-; CHECK-P8-NEXT:    xvmulsp 1, 34, 0
-; CHECK-P8-NEXT:    lvx 2, 0, 3
+; CHECK-P8-NEXT:    lxvd2x 1, 0, 3
 ; CHECK-P8-NEXT:    addi 3, 4, .LCPI25_1 at toc@l
-; CHECK-P8-NEXT:    lvx 3, 0, 3
-; CHECK-P8-NEXT:    xvmaddasp 34, 1, 0
-; CHECK-P8-NEXT:    xvmulsp 0, 1, 35
-; CHECK-P8-NEXT:    xvmulsp 34, 0, 34
+; CHECK-P8-NEXT:    lxvd2x 3, 0, 3
+; CHECK-P8-NEXT:    xxswapd 1, 1
+; CHECK-P8-NEXT:    xvmulsp 2, 34, 0
+; CHECK-P8-NEXT:    xxswapd 34, 3
+; CHECK-P8-NEXT:    xvmaddasp 1, 2, 0
+; CHECK-P8-NEXT:    xvmulsp 0, 2, 34
+; CHECK-P8-NEXT:    xvmulsp 34, 0, 1
 ; CHECK-P8-NEXT:    blr
 ; CHECK-P8-NEXT:  .LBB25_2:
 ; CHECK-P8-NEXT:    xvsqrtsp 34, 34

diff  --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
index 1185737fb0c96..17dafc1caef52 100644
--- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
+++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
@@ -8,12 +8,14 @@ define <4 x float> @repeated_fp_divisor_noest(float %a, <4 x float> %b) {
 ; CHECK-NEXT:    xscvdpspn 0, 1
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 1, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; CHECK-NEXT:    xxswapd 35, 1
+; CHECK-NEXT:    lxvd2x 1, 0, 3
 ; CHECK-NEXT:    xxspltw 0, 0, 0
 ; CHECK-NEXT:    xvdivsp 0, 35, 0
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    xxswapd 35, 1
 ; CHECK-NEXT:    xvmulsp 1, 34, 35
 ; CHECK-NEXT:    xvmulsp 34, 1, 0
 ; CHECK-NEXT:    blr
@@ -30,16 +32,18 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
 ; CHECK-NEXT:    xscvdpspn 0, 1
 ; CHECK-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 1, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI1_1 at toc@l
-; CHECK-NEXT:    lvx 4, 0, 3
+; CHECK-NEXT:    xxswapd 1, 1
 ; CHECK-NEXT:    xxspltw 0, 0, 0
-; CHECK-NEXT:    xvresp 1, 0
-; CHECK-NEXT:    xvmaddasp 35, 0, 1
-; CHECK-NEXT:    xvmulsp 0, 34, 36
-; CHECK-NEXT:    xvnmsubasp 1, 1, 35
-; CHECK-NEXT:    xvmulsp 34, 0, 1
+; CHECK-NEXT:    xvresp 2, 0
+; CHECK-NEXT:    xvmaddasp 1, 0, 2
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
+; CHECK-NEXT:    xvnmsubasp 2, 2, 1
+; CHECK-NEXT:    xvmulsp 0, 34, 35
+; CHECK-NEXT:    xvmulsp 34, 0, 2
 ; CHECK-NEXT:    blr
   %ins = insertelement <4 x float> undef, float %a, i32 0
   %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index 7cbd577682009..fef7de598fca3 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -379,11 +379,13 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 3, 2, .LCPI24_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI24_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI24_1 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI24_1 at toc@l
+; CHECK-NEXT:    xxswapd 35, 0
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    vminub 2, 2, 3
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vaddubm 2, 2, 3
 ; CHECK-NEXT:    blr
   %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
@@ -397,7 +399,8 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 3, 2, .LCPI25_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI25_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vaddubs 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
@@ -411,7 +414,8 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 3, 2, .LCPI26_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI26_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vaddubs 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
@@ -425,11 +429,13 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 3, 2, .LCPI27_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI27_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI27_1 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI27_1 at toc@l
+; CHECK-NEXT:    xxswapd 35, 0
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    vminuh 2, 2, 3
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduhm 2, 2, 3
 ; CHECK-NEXT:    blr
   %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
@@ -443,7 +449,8 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 3, 2, .LCPI28_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI28_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduhs 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
@@ -457,7 +464,8 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 3, 2, .LCPI29_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI29_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduhs 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
@@ -471,11 +479,13 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 3, 2, .LCPI30_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI30_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI30_1 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI30_1 at toc@l
+; CHECK-NEXT:    xxswapd 35, 0
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    vminuw 2, 2, 3
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduwm 2, 2, 3
 ; CHECK-NEXT:    blr
   %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
@@ -489,7 +499,8 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 3, 2, .LCPI31_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI31_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduws 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
@@ -503,7 +514,8 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 3, 2, .LCPI32_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI32_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduws 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>

diff  --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index 87e4c7194d966..6a7d36e2cf3ef 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -40,10 +40,11 @@ define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8LE-LABEL: s2v_test1:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; P8LE-NEXT:    lxsiwzx v4, 0, r3
+; P8LE-NEXT:    lxsiwzx v3, 0, r3
 ; P8LE-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
-; P8LE-NEXT:    lvx v3, 0, r4
-; P8LE-NEXT:    vperm v2, v2, v4, v3
+; P8LE-NEXT:    lxvd2x vs0, 0, r4
+; P8LE-NEXT:    xxswapd v4, vs0
+; P8LE-NEXT:    vperm v2, v2, v3, v4
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test1:
@@ -107,9 +108,10 @@ define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8LE-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
 ; P8LE-NEXT:    addi r3, r3, 4
 ; P8LE-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
-; P8LE-NEXT:    lxsiwzx v4, 0, r3
-; P8LE-NEXT:    lvx v3, 0, r4
-; P8LE-NEXT:    vperm v2, v2, v4, v3
+; P8LE-NEXT:    lxsiwzx v3, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r4
+; P8LE-NEXT:    xxswapd v4, vs0
+; P8LE-NEXT:    vperm v2, v2, v3, v4
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test2:
@@ -176,10 +178,11 @@ define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32
 ; P8LE-LABEL: s2v_test3:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; P8LE-NEXT:    sldi r5, r7, 2
 ; P8LE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; P8LE-NEXT:    lxsiwzx v3, r3, r5
-; P8LE-NEXT:    lvx v4, 0, r4
+; P8LE-NEXT:    lxvd2x vs0, 0, r4
+; P8LE-NEXT:    sldi r4, r7, 2
+; P8LE-NEXT:    lxsiwzx v3, r3, r4
+; P8LE-NEXT:    xxswapd v4, vs0
 ; P8LE-NEXT:    vperm v2, v2, v3, v4
 ; P8LE-NEXT:    blr
 ;
@@ -258,9 +261,10 @@ define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8LE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
 ; P8LE-NEXT:    addi r3, r3, 4
 ; P8LE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; P8LE-NEXT:    lxsiwzx v4, 0, r3
-; P8LE-NEXT:    lvx v3, 0, r4
-; P8LE-NEXT:    vperm v2, v2, v4, v3
+; P8LE-NEXT:    lxsiwzx v3, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r4
+; P8LE-NEXT:    xxswapd v4, vs0
+; P8LE-NEXT:    vperm v2, v2, v3, v4
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test4:
@@ -325,10 +329,11 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1)  {
 ; P8LE-LABEL: s2v_test5:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; P8LE-NEXT:    lxsiwzx v4, 0, r5
+; P8LE-NEXT:    lxsiwzx v3, 0, r5
 ; P8LE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
-; P8LE-NEXT:    lvx v3, 0, r3
-; P8LE-NEXT:    vperm v2, v2, v4, v3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v4, vs0
+; P8LE-NEXT:    vperm v2, v2, v3, v4
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test5:
@@ -390,10 +395,11 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
 ; P8LE-LABEL: s2v_test_f1:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
-; P8LE-NEXT:    lxsiwzx v4, 0, r3
+; P8LE-NEXT:    lxsiwzx v3, 0, r3
 ; P8LE-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
-; P8LE-NEXT:    lvx v3, 0, r4
-; P8LE-NEXT:    vperm v2, v2, v4, v3
+; P8LE-NEXT:    lxvd2x vs0, 0, r4
+; P8LE-NEXT:    xxswapd v4, vs0
+; P8LE-NEXT:    vperm v2, v2, v3, v4
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test_f1:

diff  --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
index 20fa7436b336c..030892b14e2cc 100644
--- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
@@ -33,7 +33,8 @@ define <4 x i32> @add_zext_ifpos_vec_splat(<4 x i32> %x) {
 ; CHECK-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
 ; CHECK-NEXT:    vcmpgtsw 2, 2, 3
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vsubuwm 2, 3, 2
 ; CHECK-NEXT:    blr
   %c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -84,7 +85,8 @@ define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) {
 ; CHECK-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
 ; CHECK-NEXT:    vcmpgtsw 2, 2, 3
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduwm 2, 2, 3
 ; CHECK-NEXT:    blr
   %c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -192,9 +194,10 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
 ; CHECK-NEXT:    vspltisw 4, 15
 ; CHECK-NEXT:    addis 3, 2, .LCPI15_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI15_0 at toc@l
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    vsubuwm 3, 4, 3
 ; CHECK-NEXT:    vsraw 2, 2, 3
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduwm 2, 2, 3
 ; CHECK-NEXT:    blr
   %c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -222,9 +225,10 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
 ; CHECK-NEXT:    vspltisw 4, 15
 ; CHECK-NEXT:    addis 3, 2, .LCPI17_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI17_0 at toc@l
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    vsubuwm 3, 4, 3
 ; CHECK-NEXT:    vsrw 2, 2, 3
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduwm 2, 2, 3
 ; CHECK-NEXT:    blr
   %c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -276,9 +280,10 @@ define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
 ; CHECK-NEXT:    vspltisw 4, 15
 ; CHECK-NEXT:    addis 3, 2, .LCPI21_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI21_0 at toc@l
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    vsubuwm 3, 4, 3
 ; CHECK-NEXT:    vsraw 2, 2, 3
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduwm 2, 2, 3
 ; CHECK-NEXT:    blr
   %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>

diff  --git a/llvm/test/CodeGen/PowerPC/store_fptoi.ll b/llvm/test/CodeGen/PowerPC/store_fptoi.ll
index 5015c9de0b299..eda44437bb1f4 100644
--- a/llvm/test/CodeGen/PowerPC/store_fptoi.ll
+++ b/llvm/test/CodeGen/PowerPC/store_fptoi.ll
@@ -26,8 +26,9 @@ define void @qpConv2sdw(fp128* nocapture readonly %a, i64* nocapture %b) {
 ; CHECK-PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std 0, 16(1)
 ; CHECK-PWR8-NEXT:    stdu 1, -48(1)
-; CHECK-PWR8-NEXT:    lvx 2, 0, 3
+; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-PWR8-NEXT:    mr 30, 4
+; CHECK-PWR8-NEXT:    xxswapd 2, 0
 ; CHECK-PWR8-NEXT:    bl __fixkfdi
 ; CHECK-PWR8-NEXT:    nop
 ; CHECK-PWR8-NEXT:    std 3, 0(30)
@@ -63,8 +64,9 @@ define void @qpConv2sw(fp128* nocapture readonly %a, i32* nocapture %b) {
 ; CHECK-PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std 0, 16(1)
 ; CHECK-PWR8-NEXT:    stdu 1, -48(1)
-; CHECK-PWR8-NEXT:    lvx 2, 0, 3
+; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-PWR8-NEXT:    mr 30, 4
+; CHECK-PWR8-NEXT:    xxswapd 2, 0
 ; CHECK-PWR8-NEXT:    bl __fixkfsi
 ; CHECK-PWR8-NEXT:    nop
 ; CHECK-PWR8-NEXT:    stw 3, 0(30)
@@ -100,8 +102,9 @@ define void @qpConv2udw(fp128* nocapture readonly %a, i64* nocapture %b) {
 ; CHECK-PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std 0, 16(1)
 ; CHECK-PWR8-NEXT:    stdu 1, -48(1)
-; CHECK-PWR8-NEXT:    lvx 2, 0, 3
+; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-PWR8-NEXT:    mr 30, 4
+; CHECK-PWR8-NEXT:    xxswapd 2, 0
 ; CHECK-PWR8-NEXT:    bl __fixunskfdi
 ; CHECK-PWR8-NEXT:    nop
 ; CHECK-PWR8-NEXT:    std 3, 0(30)
@@ -137,8 +140,9 @@ define void @qpConv2uw(fp128* nocapture readonly %a, i32* nocapture %b) {
 ; CHECK-PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std 0, 16(1)
 ; CHECK-PWR8-NEXT:    stdu 1, -48(1)
-; CHECK-PWR8-NEXT:    lvx 2, 0, 3
+; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-PWR8-NEXT:    mr 30, 4
+; CHECK-PWR8-NEXT:    xxswapd 2, 0
 ; CHECK-PWR8-NEXT:    bl __fixunskfsi
 ; CHECK-PWR8-NEXT:    nop
 ; CHECK-PWR8-NEXT:    stw 3, 0(30)

diff  --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
index 216152c1277fd..288894b8f29c1 100644
--- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -27,21 +27,24 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
-; CHECK-LE-P7-NEXT:    lvx v3, 0, r4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
+; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
 ; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-LE-P7-NEXT:    xxswapd v4, vs1
 ; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    xscvdpsxws v3, f1
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-LE-P8-NEXT:    xscvdpsxws v3, f1
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -93,21 +96,24 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
-; CHECK-LE-P7-NEXT:    lvx v3, 0, r4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
+; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
 ; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-LE-P7-NEXT:    xxswapd v4, vs1
 ; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test2:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    xscvdpsxws v3, f1
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P8-NEXT:    xscvdpsxws v3, f1
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -159,21 +165,24 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-LE-P7-NEXT:    lvx v3, 0, r4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
+; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
 ; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-LE-P7-NEXT:    xxswapd v4, vs1
 ; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test3:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    xscvdpuxws v3, f1
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-LE-P8-NEXT:    xscvdpuxws v3, f1
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -225,21 +234,24 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -4
 ; CHECK-LE-P7-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
 ; CHECK-LE-P7-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; CHECK-LE-P7-NEXT:    lvx v3, 0, r4
 ; CHECK-LE-P7-NEXT:    stfiwx f0, 0, r3
+; CHECK-LE-P7-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P7-NEXT:    lwz r3, -4(r1)
 ; CHECK-LE-P7-NEXT:    stw r3, -32(r1)
 ; CHECK-LE-P7-NEXT:    addi r3, r1, -32
-; CHECK-LE-P7-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P7-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P7-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-LE-P7-NEXT:    xxswapd v4, vs1
 ; CHECK-LE-P7-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P7-NEXT:    blr
 ;
 ; CHECK-LE-P8-LABEL: test4:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    xscvdpuxws v3, f1
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-LE-P8-NEXT:    xscvdpuxws v3, f1
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P8-NEXT:    lvx v4, 0, r3
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/toc-float.ll b/llvm/test/CodeGen/PowerPC/toc-float.ll
index 30f3ac030575d..6b251f281fb2c 100644
--- a/llvm/test/CodeGen/PowerPC/toc-float.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-float.ll
@@ -150,7 +150,8 @@ define <4 x i32> @vectorArray() #0 {
 ; CHECK-P8-NEXT:    addis 3, 2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    ld 3, .LC0 at toc@l(3)
 ; CHECK-P8-NEXT:    addi 3, 3, 32
-; CHECK-P8-NEXT:    lvx 2, 0, 3
+; CHECK-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-P8-NEXT:    xxswapd 34, 0
 ; CHECK-P8-NEXT:    blr
 entry:
   %0 = load <4 x i32>, <4 x i32>* getelementptr inbounds ([10 x <4 x i32>], [10 x <4 x i32>]* @vec_arr, i64 0, i64 2), align 16

diff  --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
index 27593cec1d6a6..54e9221d46fc2 100644
--- a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
@@ -163,36 +163,42 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    addis 6, 2, .LCPI4_0 at toc@ha
 ; PPC64LE-NEXT:    mtfprwz 0, 3
-; PPC64LE-NEXT:    mtfprwz 1, 4
-; PPC64LE-NEXT:    addi 3, 6, .LCPI4_0 at toc@l
-; PPC64LE-NEXT:    addis 4, 2, .LCPI4_2 at toc@ha
-; PPC64LE-NEXT:    lvx 2, 0, 3
-; PPC64LE-NEXT:    mtvsrwz 36, 5
 ; PPC64LE-NEXT:    addis 3, 2, .LCPI4_1 at toc@ha
-; PPC64LE-NEXT:    xxmrghw 35, 1, 0
+; PPC64LE-NEXT:    addi 6, 6, .LCPI4_0 at toc@l
+; PPC64LE-NEXT:    mtfprwz 2, 4
 ; PPC64LE-NEXT:    addi 3, 3, .LCPI4_1 at toc@l
-; PPC64LE-NEXT:    vperm 2, 4, 3, 2
-; PPC64LE-NEXT:    vspltisw 3, -11
-; PPC64LE-NEXT:    lvx 4, 0, 3
+; PPC64LE-NEXT:    addis 4, 2, .LCPI4_2 at toc@ha
+; PPC64LE-NEXT:    lxvd2x 1, 0, 6
+; PPC64LE-NEXT:    mtvsrwz 36, 5
+; PPC64LE-NEXT:    xxmrghw 34, 2, 0
+; PPC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PPC64LE-NEXT:    addi 3, 4, .LCPI4_2 at toc@l
 ; PPC64LE-NEXT:    addis 4, 2, .LCPI4_4 at toc@ha
-; PPC64LE-NEXT:    lvx 5, 0, 3
-; PPC64LE-NEXT:    addis 3, 2, .LCPI4_3 at toc@ha
 ; PPC64LE-NEXT:    addi 4, 4, .LCPI4_4 at toc@l
+; PPC64LE-NEXT:    xxswapd 35, 1
+; PPC64LE-NEXT:    lxvd2x 1, 0, 3
+; PPC64LE-NEXT:    addis 3, 2, .LCPI4_3 at toc@ha
 ; PPC64LE-NEXT:    addi 3, 3, .LCPI4_3 at toc@l
-; PPC64LE-NEXT:    vsrw 3, 3, 3
-; PPC64LE-NEXT:    vsubuwm 2, 2, 4
-; PPC64LE-NEXT:    lvx 4, 0, 3
+; PPC64LE-NEXT:    vperm 2, 4, 2, 3
+; PPC64LE-NEXT:    vspltisw 3, -11
+; PPC64LE-NEXT:    xxswapd 36, 0
+; PPC64LE-NEXT:    xxswapd 37, 1
+; PPC64LE-NEXT:    lxvd2x 0, 0, 3
+; PPC64LE-NEXT:    lxvd2x 1, 0, 4
 ; PPC64LE-NEXT:    addis 3, 2, .LCPI4_5 at toc@ha
 ; PPC64LE-NEXT:    addi 3, 3, .LCPI4_5 at toc@l
+; PPC64LE-NEXT:    vsrw 3, 3, 3
+; PPC64LE-NEXT:    vsubuwm 2, 2, 4
+; PPC64LE-NEXT:    xxswapd 36, 0
+; PPC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PPC64LE-NEXT:    vmuluwm 2, 2, 5
-; PPC64LE-NEXT:    lvx 5, 0, 4
+; PPC64LE-NEXT:    xxswapd 37, 1
 ; PPC64LE-NEXT:    xxland 32, 34, 35
 ; PPC64LE-NEXT:    vslw 2, 2, 4
 ; PPC64LE-NEXT:    vsrw 4, 0, 5
-; PPC64LE-NEXT:    xxlor 0, 36, 34
-; PPC64LE-NEXT:    lvx 2, 0, 3
-; PPC64LE-NEXT:    xxland 35, 0, 35
+; PPC64LE-NEXT:    xxlor 1, 36, 34
+; PPC64LE-NEXT:    xxswapd 34, 0
+; PPC64LE-NEXT:    xxland 35, 1, 35
 ; PPC64LE-NEXT:    vcmpgtuw 2, 3, 2
 ; PPC64LE-NEXT:    xxswapd 0, 34
 ; PPC64LE-NEXT:    xxsldwi 1, 34, 34, 1

diff  --git a/llvm/test/CodeGen/PowerPC/vavg.ll b/llvm/test/CodeGen/PowerPC/vavg.ll
index 57587b6edb34d..482e76c3b7d7e 100644
--- a/llvm/test/CodeGen/PowerPC/vavg.ll
+++ b/llvm/test/CodeGen/PowerPC/vavg.ll
@@ -150,22 +150,24 @@ define <8 x i16> @test_v8i16_sign_negative(<8 x i16> %m, <8 x i16> %n) {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
 ; CHECK-P8-NEXT:    vadduhm 2, 2, 3
-; CHECK-P8-NEXT:    vspltish 4, 1
+; CHECK-P8-NEXT:    vspltish 3, 1
 ; CHECK-P8-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
-; CHECK-P8-NEXT:    lvx 3, 0, 3
-; CHECK-P8-NEXT:    vadduhm 2, 2, 3
-; CHECK-P8-NEXT:    vsrah 2, 2, 4
+; CHECK-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-P8-NEXT:    xxswapd 36, 0
+; CHECK-P8-NEXT:    vadduhm 2, 2, 4
+; CHECK-P8-NEXT:    vsrah 2, 2, 3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_v8i16_sign_negative:
 ; CHECK-P7:       # %bb.0: # %entry
 ; CHECK-P7-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
 ; CHECK-P7-NEXT:    vadduhm 2, 2, 3
-; CHECK-P7-NEXT:    vspltish 4, 1
+; CHECK-P7-NEXT:    vspltish 3, 1
 ; CHECK-P7-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
-; CHECK-P7-NEXT:    lvx 3, 0, 3
-; CHECK-P7-NEXT:    vadduhm 2, 2, 3
-; CHECK-P7-NEXT:    vsrah 2, 2, 4
+; CHECK-P7-NEXT:    lxvd2x 0, 0, 3
+; CHECK-P7-NEXT:    xxswapd 36, 0
+; CHECK-P7-NEXT:    vadduhm 2, 2, 4
+; CHECK-P7-NEXT:    vsrah 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
   %add = add <8 x i16> %m, <i16 1, i16 1, i16 1, i16 -1, i16 1, i16 1, i16 1, i16 1>

diff  --git a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
index e9d792c6a4408..4d791a0f10b3e 100644
--- a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
@@ -24,12 +24,14 @@ define i1 @shufeq(<2 x i64> %a) #0 {
 ; CHECK_LE-NEXT:    xxswapd 35, 34
 ; CHECK_LE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK_LE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
-; CHECK_LE-NEXT:    lvx 3, 0, 3
+; CHECK_LE-NEXT:    lxvd2x 0, 0, 3
 ; CHECK_LE-NEXT:    addi 3, 1, -16
+; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
+; CHECK_LE-NEXT:    xxswapd 35, 0
 ; CHECK_LE-NEXT:    vperm 3, 2, 2, 3
-; CHECK_LE-NEXT:    xxland 34, 35, 34
-; CHECK_LE-NEXT:    stvx 2, 0, 3
+; CHECK_LE-NEXT:    xxland 0, 35, 34
+; CHECK_LE-NEXT:    xxswapd 0, 0
+; CHECK_LE-NEXT:    stxvd2x 0, 0, 3
 ; CHECK_LE-NEXT:    ld 3, -16(1)
 ; CHECK_LE-NEXT:    blr
 ;
@@ -66,12 +68,14 @@ define i1 @shufne(<2 x i64> %a) #0 {
 ; CHECK_LE-NEXT:    xxswapd 35, 34
 ; CHECK_LE-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
 ; CHECK_LE-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
-; CHECK_LE-NEXT:    lvx 3, 0, 3
+; CHECK_LE-NEXT:    lxvd2x 0, 0, 3
 ; CHECK_LE-NEXT:    addi 3, 1, -16
+; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
+; CHECK_LE-NEXT:    xxswapd 35, 0
 ; CHECK_LE-NEXT:    vperm 3, 2, 2, 3
-; CHECK_LE-NEXT:    xxland 34, 35, 34
-; CHECK_LE-NEXT:    stvx 2, 0, 3
+; CHECK_LE-NEXT:    xxland 0, 35, 34
+; CHECK_LE-NEXT:    xxswapd 0, 0
+; CHECK_LE-NEXT:    stxvd2x 0, 0, 3
 ; CHECK_LE-NEXT:    ld 3, -16(1)
 ; CHECK_LE-NEXT:    blr
 ;
@@ -101,10 +105,11 @@ define <2 x i64> @cmpeq(<2 x i64> noundef %a, <2 x i64> noundef %b) {
 ;
 ; CHECK_LE-LABEL: cmpeq:
 ; CHECK_LE:       # %bb.0: # %entry
-; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
 ; CHECK_LE-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
+; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
 ; CHECK_LE-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
-; CHECK_LE-NEXT:    lvx 3, 0, 3
+; CHECK_LE-NEXT:    lxvd2x 0, 0, 3
+; CHECK_LE-NEXT:    xxswapd 35, 0
 ; CHECK_LE-NEXT:    vperm 3, 2, 2, 3
 ; CHECK_LE-NEXT:    xxland 34, 35, 34
 ; CHECK_LE-NEXT:    blr
@@ -132,10 +137,11 @@ define <2 x i64> @cmpne(<2 x i64> noundef %a, <2 x i64> noundef %b) {
 ;
 ; CHECK_LE-LABEL: cmpne:
 ; CHECK_LE:       # %bb.0: # %entry
-; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
 ; CHECK_LE-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
+; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
 ; CHECK_LE-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
-; CHECK_LE-NEXT:    lvx 3, 0, 3
+; CHECK_LE-NEXT:    lxvd2x 0, 0, 3
+; CHECK_LE-NEXT:    xxswapd 35, 0
 ; CHECK_LE-NEXT:    xxlnor 34, 34, 34
 ; CHECK_LE-NEXT:    vperm 3, 2, 2, 3
 ; CHECK_LE-NEXT:    xxlor 34, 35, 34

diff  --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
index 8274c493d6a28..f28ec0e761b78 100644
--- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
@@ -14,31 +14,36 @@ define void @test8(<8 x double>* nocapture %Sink, <8 x i16>* nocapture readonly
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI0_2 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_1 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_3 at toc@ha
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI0_0 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI0_2 at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_1 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI0_3 at toc@ha
-; CHECK-P8-NEXT:    lvx v5, 0, r6
-; CHECK-P8-NEXT:    lvx v1, 0, r4
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_3 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    addi r5, r6, .LCPI0_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI0_3 at toc@l
-; CHECK-P8-NEXT:    lvx v0, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI0_1 at toc@ha
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI0_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs4, 0, r5
+; CHECK-P8-NEXT:    xxswapd v0, vs3
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
-; CHECK-P8-NEXT:    vperm v5, v4, v3, v5
-; CHECK-P8-NEXT:    vperm v0, v4, v3, v0
-; CHECK-P8-NEXT:    vperm v3, v4, v3, v1
-; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
-; CHECK-P8-NEXT:    xvcvuxddp vs1, v5
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xxswapd v1, vs4
+; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
+; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v1
 ; CHECK-P8-NEXT:    xvcvuxddp vs2, v0
-; CHECK-P8-NEXT:    xvcvuxddp vs3, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v5
+; CHECK-P8-NEXT:    xvcvuxddp vs3, v2
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 16
@@ -118,17 +123,20 @@ define void @test4(<4 x double>* nocapture %Sink, <4 x i16>* nocapture readonly
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI1_1 at toc@l
-; CHECK-P8-NEXT:    lvx v5, 0, r5
-; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
-; CHECK-P8-NEXT:    vperm v3, v4, v3, v5
-; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
-; CHECK-P8-NEXT:    xvcvuxddp vs1, v3
+; CHECK-P8-NEXT:    addi r4, r6, .LCPI1_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v5
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -181,11 +189,13 @@ define void @test2(<2 x double>* nocapture %Sink, <2 x i16>* nocapture readonly
 ; CHECK-P8-LABEL: test2:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
-; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
@@ -226,37 +236,42 @@ define void @stest8(<8 x double>* nocapture %Sink, <8 x i16>* nocapture readonly
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    addi r5, r6, .LCPI3_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_4 at toc@ha
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_3 at toc@ha
-; CHECK-P8-NEXT:    lvx v4, 0, r6
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_4 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_4 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_3 at toc@l
-; CHECK-P8-NEXT:    lvx v5, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI3_4 at toc@l
-; CHECK-P8-NEXT:    lvx v0, 0, r5
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs4, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v5, vs3
 ; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    vperm v4, v3, v3, v4
-; CHECK-P8-NEXT:    vperm v5, v3, v3, v5
-; CHECK-P8-NEXT:    vperm v3, v3, v3, v0
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    xxswapd v0, vs4
+; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v0
 ; CHECK-P8-NEXT:    xxswapd v0, vs0
-; CHECK-P8-NEXT:    vsld v2, v2, v0
+; CHECK-P8-NEXT:    vsld v3, v3, v0
 ; CHECK-P8-NEXT:    vsld v4, v4, v0
 ; CHECK-P8-NEXT:    vsld v5, v5, v0
-; CHECK-P8-NEXT:    vsld v3, v3, v0
-; CHECK-P8-NEXT:    vsrad v2, v2, v0
+; CHECK-P8-NEXT:    vsld v2, v2, v0
 ; CHECK-P8-NEXT:    vsrad v3, v3, v0
+; CHECK-P8-NEXT:    vsrad v2, v2, v0
 ; CHECK-P8-NEXT:    vsrad v4, v4, v0
 ; CHECK-P8-NEXT:    vsrad v5, v5, v0
-; CHECK-P8-NEXT:    xvcvsxddp vs2, v3
-; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
+; CHECK-P8-NEXT:    xvcvsxddp vs2, v2
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-P8-NEXT:    xvcvsxddp vs1, v5
 ; CHECK-P8-NEXT:    xvcvsxddp vs3, v4
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
@@ -347,24 +362,27 @@ define void @stest4(<4 x double>* nocapture %Sink, <4 x i16>* nocapture readonly
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI4_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI4_2 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_1 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI4_0 at toc@l
+; CHECK-P8-NEXT:    addi r4, r6, .LCPI4_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_1 at toc@ha
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_1 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI4_2 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lvx v4, 0, r5
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v4
 ; CHECK-P8-NEXT:    xxswapd v4, vs0
-; CHECK-P8-NEXT:    vsld v2, v2, v4
 ; CHECK-P8-NEXT:    vsld v3, v3, v4
-; CHECK-P8-NEXT:    vsrad v2, v2, v4
+; CHECK-P8-NEXT:    vsld v2, v2, v4
 ; CHECK-P8-NEXT:    vsrad v3, v3, v4
-; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
-; CHECK-P8-NEXT:    xvcvsxddp vs1, v3
+; CHECK-P8-NEXT:    vsrad v2, v2, v4
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -419,13 +437,15 @@ define void @stest2(<2 x double>* nocapture %Sink, <2 x i16>* nocapture readonly
 ; CHECK-P8-LABEL: stest2:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vsld v2, v2, v3
 ; CHECK-P8-NEXT:    vsrad v2, v2, v3

diff  --git a/llvm/test/CodeGen/PowerPC/vec-trunc.ll b/llvm/test/CodeGen/PowerPC/vec-trunc.ll
index e8ca7bff71084..bfec8e0a7ec11 100644
--- a/llvm/test/CodeGen/PowerPC/vec-trunc.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-trunc.ll
@@ -9,7 +9,8 @@
 define void @test8i8(<8 x i8>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) {
 ; CHECK-LABEL: test8i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lvx v2, 0, r4
+; CHECK-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-NEXT:    xxswapd v2, vs0
 ; CHECK-NEXT:    vpkuhum v2, v2, v2
 ; CHECK-NEXT:    xxswapd vs0, v2
 ; CHECK-NEXT:    stfdx f0, 0, r3
@@ -34,7 +35,8 @@ entry:
 define void @test4i8(<4 x i8>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) {
 ; CHECK-LABEL: test4i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lvx v2, 0, r4
+; CHECK-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-NEXT:    xxswapd v2, vs0
 ; CHECK-NEXT:    vpkuhum v2, v2, v2
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 2
 ; CHECK-NEXT:    stfiwx f0, 0, r3
@@ -60,10 +62,12 @@ define void @test4i8w(<4 x i8>* nocapture %Sink, <4 x i32>* nocapture readonly %
 ; CHECK-LABEL: test4i8w:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-NEXT:    lvx v3, 0, r4
+; CHECK-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-NEXT:    lvx v2, 0, r5
-; CHECK-NEXT:    vperm v2, v3, v3, v2
+; CHECK-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-NEXT:    xxswapd v2, vs0
+; CHECK-NEXT:    xxswapd v3, vs1
+; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 2
 ; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
@@ -90,7 +94,8 @@ entry:
 define void @test2i8(<2 x i8>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) {
 ; CHECK-LABEL: test2i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lvx v2, 0, r4
+; CHECK-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-NEXT:    xxswapd v2, vs0
 ; CHECK-NEXT:    vpkuhum v2, v2, v2
 ; CHECK-NEXT:    xxswapd vs0, v2
 ; CHECK-NEXT:    mffprd r4, f0
@@ -117,7 +122,8 @@ entry:
 define void @test4i16(<4 x i16>* nocapture %Sink, <4 x i32>* nocapture readonly %SrcPtr) {
 ; CHECK-LABEL: test4i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lvx v2, 0, r4
+; CHECK-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-NEXT:    xxswapd v2, vs0
 ; CHECK-NEXT:    vpkuwum v2, v2, v2
 ; CHECK-NEXT:    xxswapd vs0, v2
 ; CHECK-NEXT:    stfdx f0, 0, r3
@@ -142,7 +148,8 @@ entry:
 define void @test2i16(<2 x i16>* nocapture %Sink, <2 x i32>* nocapture readonly %SrcPtr) {
 ; CHECK-LABEL: test2i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lvx v2, 0, r4
+; CHECK-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-NEXT:    xxswapd v2, vs0
 ; CHECK-NEXT:    vpkuwum v2, v2, v2
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 2
 ; CHECK-NEXT:    stfiwx f0, 0, r3
@@ -167,11 +174,12 @@ entry:
 define void @test2i16d(<2 x i16>* nocapture %Sink, <2 x i64>* nocapture readonly %SrcPtr) {
 ; CHECK-LABEL: test2i16d:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
-; CHECK-NEXT:    addi r4, r5, .LCPI6_0 at toc@l
-; CHECK-NEXT:    lvx v3, 0, r4
+; CHECK-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-NEXT:    xxswapd v2, vs0
+; CHECK-NEXT:    xxswapd v3, vs1
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 2
 ; CHECK-NEXT:    stfiwx f0, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
index 7f49208b9a292..f5f09b4e85a6d 100644
--- a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
@@ -9,19 +9,20 @@
 define dso_local <8 x i8> @test8x32(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) {
 ; CHECK-LABEL: test8x32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addis r11, r2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    rldimi r3, r4, 32, 0
 ; CHECK-NEXT:    rldimi r5, r6, 32, 0
-; CHECK-NEXT:    addis r11, r2, .LCPI0_0 at toc@ha
-; CHECK-NEXT:    rldimi r7, r8, 32, 0
-; CHECK-NEXT:    rldimi r9, r10, 32, 0
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    addi r3, r11, .LCPI0_0 at toc@l
+; CHECK-NEXT:    rldimi r7, r8, 32, 0
+; CHECK-NEXT:    rldimi r9, r10, 32, 0
+; CHECK-NEXT:    lxvd2x vs3, 0, r3
 ; CHECK-NEXT:    mtfprd f1, r5
-; CHECK-NEXT:    lvx v4, 0, r3
 ; CHECK-NEXT:    mtfprd f2, r7
-; CHECK-NEXT:    mtfprd f3, r9
+; CHECK-NEXT:    mtfprd f4, r9
 ; CHECK-NEXT:    xxmrghd v2, vs1, vs0
-; CHECK-NEXT:    xxmrghd v3, vs3, vs2
+; CHECK-NEXT:    xxswapd v4, vs3
+; CHECK-NEXT:    xxmrghd v3, vs4, vs2
 ; CHECK-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-NEXT:    blr
 ;
@@ -79,13 +80,14 @@ define dso_local <4 x i16> @test4x64(i64 %i1, i64 %i2, i64 %i3, i64 %i4) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis r7, r2, .LCPI1_0 at toc@ha
 ; CHECK-NEXT:    mtfprd f0, r5
+; CHECK-NEXT:    addi r5, r7, .LCPI1_0 at toc@l
 ; CHECK-NEXT:    mtfprd f1, r6
+; CHECK-NEXT:    lxvd2x vs3, 0, r5
 ; CHECK-NEXT:    mtfprd f2, r3
-; CHECK-NEXT:    addi r3, r7, .LCPI1_0 at toc@l
-; CHECK-NEXT:    mtfprd f3, r4
+; CHECK-NEXT:    mtfprd f4, r4
 ; CHECK-NEXT:    xxmrghd v2, vs1, vs0
-; CHECK-NEXT:    lvx v4, 0, r3
-; CHECK-NEXT:    xxmrghd v3, vs3, vs2
+; CHECK-NEXT:    xxmrghd v3, vs4, vs2
+; CHECK-NEXT:    xxswapd v4, vs3
 ; CHECK-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll b/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll
index 3a7bfcfb19f1d..e3890faa388e7 100644
--- a/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll
@@ -9,10 +9,11 @@
 define <2 x i64> @v2si64_cmp(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
 ; CHECK-LABEL: v2si64_cmp:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vperm 3, 2, 2, 3
 ; CHECK-NEXT:    xxland 34, 35, 34
 ; CHECK-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vec_constants.ll b/llvm/test/CodeGen/PowerPC/vec_constants.ll
index 1dd34becd5b91..339b0d1066473 100644
--- a/llvm/test/CodeGen/PowerPC/vec_constants.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_constants.ll
@@ -21,17 +21,23 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 ;
 ; LE-LABEL: test1:
 ; LE:       # %bb.0:
-; LE-NEXT:    lvx 2, 0, 3
+; LE-NEXT:    lxvd2x 0, 0, 3
+; LE-NEXT:    xxswapd 34, 0
 ; LE-NEXT:    vspltisb 3, -1
 ; LE-NEXT:    vslw 3, 3, 3
-; LE-NEXT:    xxland 34, 34, 35
-; LE-NEXT:    stvx 2, 0, 3
-; LE-NEXT:    lvx 2, 0, 4
-; LE-NEXT:    xxlandc 34, 34, 35
-; LE-NEXT:    stvx 2, 0, 4
-; LE-NEXT:    lvx 2, 0, 5
-; LE-NEXT:    xvabssp 34, 34
-; LE-NEXT:    stvx 2, 0, 5
+; LE-NEXT:    xxland 0, 34, 35
+; LE-NEXT:    xxswapd 0, 0
+; LE-NEXT:    stxvd2x 0, 0, 3
+; LE-NEXT:    lxvd2x 0, 0, 4
+; LE-NEXT:    xxswapd 34, 0
+; LE-NEXT:    xxlandc 0, 34, 35
+; LE-NEXT:    xxswapd 0, 0
+; LE-NEXT:    stxvd2x 0, 0, 4
+; LE-NEXT:    lxvd2x 0, 0, 5
+; LE-NEXT:    xxswapd 34, 0
+; LE-NEXT:    xvabssp 0, 34
+; LE-NEXT:    xxswapd 0, 0
+; LE-NEXT:    stxvd2x 0, 0, 5
 ; LE-NEXT:    blr
 	%tmp = load <4 x i32>, <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
 	%tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
index 2fec5286d1142..1ac409466ed77 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
@@ -166,53 +166,53 @@ entry:
 define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lvx v3, r3, r4
-; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    xscvspdpn f2, v2
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
-; CHECK-P8-NEXT:    xscvspdpn f3, v3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, v2
+; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f5, v3
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P8-NEXT:    xscvspdpn f4, vs4
-; CHECK-P8-NEXT:    xscvspdpn f5, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxswapd vs0, v3
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 1
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    xscvdpsxws f2, f4
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f4, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v2, v4, v2
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    vmrghh v3, v3, v4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f6
+; CHECK-P8-NEXT:    xscvdpsxws f2, f7
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f4
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    vmrghh v5, v0, v5
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    vmrghh v2, v2, v4
+; CHECK-P8-NEXT:    vmrghh v3, v3, v5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    mtvsrd v1, r4
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT:    mtvsrd v1, r3
-; CHECK-P8-NEXT:    vmrghh v4, v4, v1
-; CHECK-P8-NEXT:    xxmrglw vs1, v4, v5
+; CHECK-P8-NEXT:    vmrghh v4, v4, v0
+; CHECK-P8-NEXT:    vmrghh v5, v5, v1
+; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -329,105 +329,107 @@ entry:
 define void @test16elt(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lvx v3, r4, r5
-; CHECK-P8-NEXT:    lvx v2, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    xxsldwi vs0, v5, v5, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, v5
-; CHECK-P8-NEXT:    lvx v4, r4, r6
-; CHECK-P8-NEXT:    xxswapd vs3, v5
-; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
-; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 3
-; CHECK-P8-NEXT:    xxswapd vs8, v3
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r6
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f3, vs3
-; CHECK-P8-NEXT:    xscvspdpn f5, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    xscvspdpn f8, vs8
+; CHECK-P8-NEXT:    xscvspdpn f4, vs1
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs2
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    xxsldwi vs6, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs8, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f7, v2
+; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs3
+; CHECK-P8-NEXT:    xxswapd v0, vs3
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvspdpn f8, vs8
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvspdpn f9, vs9
 ; CHECK-P8-NEXT:    xscvspdpn f2, v3
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f5
+; CHECK-P8-NEXT:    xscvdpsxws f6, f6
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f7
+; CHECK-P8-NEXT:    xxsldwi vs7, v0, v0, 3
+; CHECK-P8-NEXT:    mtvsrd v2, r4
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f8
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f6
+; CHECK-P8-NEXT:    xscvdpsxws f6, f9
+; CHECK-P8-NEXT:    xscvspdpn f10, v4
+; CHECK-P8-NEXT:    mtvsrd v1, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xxsldwi vs0, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f4, v2
-; CHECK-P8-NEXT:    xscvdpsxws f5, f7
-; CHECK-P8-NEXT:    xxsldwi vs7, v4, v4, 3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f3, v0
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f6, v4
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f8
-; CHECK-P8-NEXT:    xxswapd vs8, v4
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xxsldwi vs4, v4, v4, 3
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    vmrghh v2, v2, v1
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f6
+; CHECK-P8-NEXT:    xxsldwi vs6, v4, v4, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    vmrghh v3, v3, v1
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    xxswapd vs5, v2
-; CHECK-P8-NEXT:    xscvspdpn f3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    vmrghh v3, v0, v3
-; CHECK-P8-NEXT:    mtvsrd v0, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f6, f6
-; CHECK-P8-NEXT:    xscvspdpn f1, vs5
-; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
-; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f10
+; CHECK-P8-NEXT:    vmrghh v4, v5, v1
+; CHECK-P8-NEXT:    xscvspdpn f4, vs4
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mtvsrd v1, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xxsldwi vs2, v0, v0, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v2, v5, v1
-; CHECK-P8-NEXT:    vmrghh v5, v6, v0
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    xscvdpsxws f2, f3
-; CHECK-P8-NEXT:    xscvspdpn f5, vs5
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
 ; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f1, f4
 ; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xxsldwi vs2, v4, v4, 1
-; CHECK-P8-NEXT:    xscvspdpn f8, vs8
-; CHECK-P8-NEXT:    xscvdpsxws f0, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f7
-; CHECK-P8-NEXT:    mtvsrd v8, r4
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    xscvdpsxws f3, f6
+; CHECK-P8-NEXT:    mtvsrd v0, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f8
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    mtvsrd v8, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f7
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    vmrghh v0, v0, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
-; CHECK-P8-NEXT:    vmrghh v4, v8, v4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    vmrghh v6, v6, v8
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    vmrghh v1, v1, v9
-; CHECK-P8-NEXT:    xxmrglw vs1, v0, v5
+; CHECK-P8-NEXT:    vmrghh v5, v5, v9
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    vmrghh v7, v8, v7
-; CHECK-P8-NEXT:    vmrghh v6, v6, v9
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    vmrghh v7, v7, v8
+; CHECK-P8-NEXT:    xxmrglw vs1, v6, v4
+; CHECK-P8-NEXT:    mtvsrd v8, r4
+; CHECK-P8-NEXT:    vmrghh v1, v1, v9
+; CHECK-P8-NEXT:    vmrghh v0, v0, v8
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT:    xxmrglw vs2, v1, v4
-; CHECK-P8-NEXT:    stvx v2, 0, r3
-; CHECK-P8-NEXT:    xxmrglw vs3, v6, v7
+; CHECK-P8-NEXT:    xxmrglw vs2, v7, v5
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxmrglw vs3, v0, v1
 ; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -796,53 +798,53 @@ entry:
 define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lvx v3, r3, r4
-; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    xscvspdpn f2, v2
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
-; CHECK-P8-NEXT:    xscvspdpn f3, v3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, v2
+; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f5, v3
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P8-NEXT:    xscvspdpn f4, vs4
-; CHECK-P8-NEXT:    xscvspdpn f5, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxswapd vs0, v3
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 1
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    xscvdpsxws f2, f4
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f4, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v2, v4, v2
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    vmrghh v3, v3, v4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f6
+; CHECK-P8-NEXT:    xscvdpsxws f2, f7
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f4
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    vmrghh v5, v0, v5
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    vmrghh v2, v2, v4
+; CHECK-P8-NEXT:    vmrghh v3, v3, v5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    mtvsrd v1, r4
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT:    mtvsrd v1, r3
-; CHECK-P8-NEXT:    vmrghh v4, v4, v1
-; CHECK-P8-NEXT:    xxmrglw vs1, v4, v5
+; CHECK-P8-NEXT:    vmrghh v4, v4, v0
+; CHECK-P8-NEXT:    vmrghh v5, v5, v1
+; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -959,105 +961,107 @@ entry:
 define void @test16elt_signed(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lvx v3, r4, r5
-; CHECK-P8-NEXT:    lvx v2, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    xxsldwi vs0, v5, v5, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, v5
-; CHECK-P8-NEXT:    lvx v4, r4, r6
-; CHECK-P8-NEXT:    xxswapd vs3, v5
-; CHECK-P8-NEXT:    xxsldwi vs5, v5, v5, 1
-; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 3
-; CHECK-P8-NEXT:    xxswapd vs8, v3
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r6
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f3, vs3
-; CHECK-P8-NEXT:    xscvspdpn f5, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    xscvspdpn f8, vs8
+; CHECK-P8-NEXT:    xscvspdpn f4, vs1
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs2
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    xxsldwi vs6, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs8, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f7, v2
+; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f5, vs3
+; CHECK-P8-NEXT:    xxswapd v0, vs3
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvspdpn f8, vs8
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvspdpn f9, vs9
 ; CHECK-P8-NEXT:    xscvspdpn f2, v3
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f5
+; CHECK-P8-NEXT:    xscvdpsxws f6, f6
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f7
+; CHECK-P8-NEXT:    xxsldwi vs7, v0, v0, 3
+; CHECK-P8-NEXT:    mtvsrd v2, r4
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f8
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f6
+; CHECK-P8-NEXT:    xscvdpsxws f6, f9
+; CHECK-P8-NEXT:    xscvspdpn f10, v4
+; CHECK-P8-NEXT:    mtvsrd v1, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xxsldwi vs0, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f4, v2
-; CHECK-P8-NEXT:    xscvdpsxws f5, f7
-; CHECK-P8-NEXT:    xxsldwi vs7, v4, v4, 3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f3, v0
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f6, v4
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f8
-; CHECK-P8-NEXT:    xxswapd vs8, v4
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xxsldwi vs4, v4, v4, 3
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    vmrghh v2, v2, v1
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f6
+; CHECK-P8-NEXT:    xxsldwi vs6, v4, v4, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    vmrghh v3, v3, v1
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    xxswapd vs5, v2
-; CHECK-P8-NEXT:    xscvspdpn f3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    vmrghh v3, v0, v3
-; CHECK-P8-NEXT:    mtvsrd v0, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f6, f6
-; CHECK-P8-NEXT:    xscvspdpn f1, vs5
-; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
-; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f10
+; CHECK-P8-NEXT:    vmrghh v4, v5, v1
+; CHECK-P8-NEXT:    xscvspdpn f4, vs4
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mtvsrd v1, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xxsldwi vs2, v0, v0, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v2, v5, v1
-; CHECK-P8-NEXT:    vmrghh v5, v6, v0
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    xscvdpsxws f2, f3
-; CHECK-P8-NEXT:    xscvspdpn f5, vs5
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
 ; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f1, f4
 ; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xxsldwi vs2, v4, v4, 1
-; CHECK-P8-NEXT:    xscvspdpn f8, vs8
-; CHECK-P8-NEXT:    xscvdpsxws f0, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f7
-; CHECK-P8-NEXT:    mtvsrd v8, r4
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    xscvdpsxws f3, f6
+; CHECK-P8-NEXT:    mtvsrd v0, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f8
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    mtvsrd v8, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f7
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    vmrghh v0, v0, v7
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
-; CHECK-P8-NEXT:    vmrghh v4, v8, v4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    vmrghh v6, v6, v8
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    vmrghh v1, v1, v9
-; CHECK-P8-NEXT:    xxmrglw vs1, v0, v5
+; CHECK-P8-NEXT:    vmrghh v5, v5, v9
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    vmrghh v7, v8, v7
-; CHECK-P8-NEXT:    vmrghh v6, v6, v9
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    vmrghh v7, v7, v8
+; CHECK-P8-NEXT:    xxmrglw vs1, v6, v4
+; CHECK-P8-NEXT:    mtvsrd v8, r4
+; CHECK-P8-NEXT:    vmrghh v1, v1, v9
+; CHECK-P8-NEXT:    vmrghh v0, v0, v8
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT:    xxmrglw vs2, v1, v4
-; CHECK-P8-NEXT:    stvx v2, 0, r3
-; CHECK-P8-NEXT:    xxmrglw vs3, v6, v7
+; CHECK-P8-NEXT:    xxmrglw vs2, v7, v5
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    xxmrglw vs3, v0, v1
 ; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
index a94e1f2359145..5c4f54170eb90 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
@@ -90,10 +90,12 @@ define void @test8elt(<8 x i64>* noalias nocapture sret(<8 x i64>) %agg.result,
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxmrglw vs2, v3, v3
 ; CHECK-P8-NEXT:    xxmrghw vs3, v3, v3
 ; CHECK-P8-NEXT:    xxmrglw vs0, v2, v2
@@ -173,33 +175,37 @@ define void @test16elt(<16 x i64>* noalias nocapture sret(<16 x i64>) %agg.resul
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    li r8, 64
-; CHECK-P8-NEXT:    lvx v4, r4, r7
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    lvx v3, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxmrghw vs3, v4, v4
+; CHECK-P8-NEXT:    xxmrglw vs1, v2, v2
+; CHECK-P8-NEXT:    xxmrghw vs2, v2, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxmrglw vs5, v4, v4
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v2
-; CHECK-P8-NEXT:    xxmrghw vs1, v2, v2
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xxmrglw vs2, v3, v3
-; CHECK-P8-NEXT:    xxmrghw vs4, v3, v3
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v3
 ; CHECK-P8-NEXT:    xvcvspdp vs3, vs3
+; CHECK-P8-NEXT:    xxmrghw vs4, v3, v3
 ; CHECK-P8-NEXT:    xxmrglw vs6, v2, v2
 ; CHECK-P8-NEXT:    xxmrghw vs7, v2, v2
 ; CHECK-P8-NEXT:    xvcvspdp vs5, vs5
-; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
 ; CHECK-P8-NEXT:    xvcvspdp vs1, vs1
 ; CHECK-P8-NEXT:    xvcvspdp vs2, vs2
+; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
 ; CHECK-P8-NEXT:    xvcvspdp vs4, vs4
 ; CHECK-P8-NEXT:    xvcvspdp vs6, vs6
 ; CHECK-P8-NEXT:    xvcvspdp vs7, vs7
 ; CHECK-P8-NEXT:    xvcvdpuxds v3, vs3
 ; CHECK-P8-NEXT:    xvcvdpuxds v5, vs5
-; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
-; CHECK-P8-NEXT:    xvcvdpuxds v4, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v4, vs2
 ; CHECK-P8-NEXT:    xvcvdpuxds v0, vs4
-; CHECK-P8-NEXT:    xvcvdpuxds v1, vs2
+; CHECK-P8-NEXT:    xvcvdpuxds v1, vs0
 ; CHECK-P8-NEXT:    xvcvdpuxds v6, vs6
 ; CHECK-P8-NEXT:    xxswapd vs0, v3
 ; CHECK-P8-NEXT:    xvcvdpuxds v7, vs7
@@ -389,10 +395,12 @@ define void @test8elt_signed(<8 x i64>* noalias nocapture sret(<8 x i64>) %agg.r
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxmrglw vs2, v3, v3
 ; CHECK-P8-NEXT:    xxmrghw vs3, v3, v3
 ; CHECK-P8-NEXT:    xxmrglw vs0, v2, v2
@@ -472,33 +480,37 @@ define void @test16elt_signed(<16 x i64>* noalias nocapture sret(<16 x i64>) %ag
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    li r8, 64
-; CHECK-P8-NEXT:    lvx v4, r4, r7
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    lvx v3, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxmrghw vs3, v4, v4
+; CHECK-P8-NEXT:    xxmrglw vs1, v2, v2
+; CHECK-P8-NEXT:    xxmrghw vs2, v2, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxmrglw vs5, v4, v4
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v2
-; CHECK-P8-NEXT:    xxmrghw vs1, v2, v2
-; CHECK-P8-NEXT:    lvx v2, 0, r4
-; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xxmrglw vs2, v3, v3
-; CHECK-P8-NEXT:    xxmrghw vs4, v3, v3
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v3
 ; CHECK-P8-NEXT:    xvcvspdp vs3, vs3
+; CHECK-P8-NEXT:    xxmrghw vs4, v3, v3
 ; CHECK-P8-NEXT:    xxmrglw vs6, v2, v2
 ; CHECK-P8-NEXT:    xxmrghw vs7, v2, v2
 ; CHECK-P8-NEXT:    xvcvspdp vs5, vs5
-; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
 ; CHECK-P8-NEXT:    xvcvspdp vs1, vs1
 ; CHECK-P8-NEXT:    xvcvspdp vs2, vs2
+; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
 ; CHECK-P8-NEXT:    xvcvspdp vs4, vs4
 ; CHECK-P8-NEXT:    xvcvspdp vs6, vs6
 ; CHECK-P8-NEXT:    xvcvspdp vs7, vs7
 ; CHECK-P8-NEXT:    xvcvdpuxds v3, vs3
 ; CHECK-P8-NEXT:    xvcvdpuxds v5, vs5
-; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
-; CHECK-P8-NEXT:    xvcvdpuxds v4, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v4, vs2
 ; CHECK-P8-NEXT:    xvcvdpuxds v0, vs4
-; CHECK-P8-NEXT:    xvcvdpuxds v1, vs2
+; CHECK-P8-NEXT:    xvcvdpuxds v1, vs0
 ; CHECK-P8-NEXT:    xvcvdpuxds v6, vs6
 ; CHECK-P8-NEXT:    xxswapd vs0, v3
 ; CHECK-P8-NEXT:    xvcvdpuxds v7, vs7

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
index 5160ad2759918..bbdf928d51cc6 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
@@ -175,53 +175,53 @@ entry:
 define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lvx v3, r3, r4
-; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    xscvspdpn f2, v2
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
-; CHECK-P8-NEXT:    xscvspdpn f3, v3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, v2
+; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f5, v3
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P8-NEXT:    xscvspdpn f4, vs4
-; CHECK-P8-NEXT:    xscvspdpn f5, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxswapd vs0, v3
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 1
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    xscvdpsxws f2, f4
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f4, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghb v2, v4, v2
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    vmrghb v3, v3, v4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f6
+; CHECK-P8-NEXT:    xscvdpsxws f2, f7
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f4
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    vmrghb v5, v0, v5
-; CHECK-P8-NEXT:    mtvsrd v1, r3
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    vmrghb v2, v2, v4
+; CHECK-P8-NEXT:    vmrghb v3, v3, v5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    vmrghb v4, v4, v0
+; CHECK-P8-NEXT:    vmrghb v5, v5, v1
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
-; CHECK-P8-NEXT:    vmrghb v4, v4, v1
-; CHECK-P8-NEXT:    vmrglh v3, v4, v5
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -343,102 +343,102 @@ entry:
 define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v4, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    lvx v3, r3, r4
-; CHECK-P8-NEXT:    lvx v2, r3, r5
-; CHECK-P8-NEXT:    xxsldwi vs0, v4, v4, 3
-; CHECK-P8-NEXT:    xxswapd vs2, v4
-; CHECK-P8-NEXT:    xxsldwi vs4, v4, v4, 1
-; CHECK-P8-NEXT:    xscvspdpn f1, v4
-; CHECK-P8-NEXT:    xscvspdpn f3, v3
-; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xxswapd vs7, v3
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xscvspdpn f5, vs2
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    xscvspdpn f6, vs3
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f8, v2
+; CHECK-P8-NEXT:    xxsldwi vs2, v3, v3, 3
+; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, v5
+; CHECK-P8-NEXT:    xxswapd v7, vs4
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xxsldwi vs8, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
-; CHECK-P8-NEXT:    xxsldwi vs9, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    xscvdpsxws f8, f8
+; CHECK-P8-NEXT:    xscvspdpn f9, vs9
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    xscvspdpn f8, vs8
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvspdpn f9, vs9
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprwz r5, f2
-; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    mffprwz r3, f5
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v4, r5
-; CHECK-P8-NEXT:    mffprwz r5, f4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f6
-; CHECK-P8-NEXT:    vmrghb v3, v4, v3
-; CHECK-P8-NEXT:    mtvsrd v4, r5
-; CHECK-P8-NEXT:    mffprwz r5, f3
-; CHECK-P8-NEXT:    xscvdpsxws f3, f7
-; CHECK-P8-NEXT:    xscvdpsxws f4, f8
+; CHECK-P8-NEXT:    xscvspdpn f1, v3
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lvx v0, r3, r4
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
-; CHECK-P8-NEXT:    xscvspdpn f5, v2
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    xxsldwi vs3, v0, v0, 3
-; CHECK-P8-NEXT:    mtvsrd v1, r3
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    xxswapd vs4, v0
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    mtvsrd v7, r3
+; CHECK-P8-NEXT:    mffprwz r3, f8
+; CHECK-P8-NEXT:    vmrghb v2, v4, v0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f5, f9
+; CHECK-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxsldwi vs0, v0, v0, 1
-; CHECK-P8-NEXT:    xscvspdpn f2, v0
-; CHECK-P8-NEXT:    xscvspdpn f3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f6, f9
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
+; CHECK-P8-NEXT:    xxsldwi vs0, v5, v5, 3
+; CHECK-P8-NEXT:    xscvspdpn f7, vs4
+; CHECK-P8-NEXT:    xxsldwi vs4, v7, v7, 3
+; CHECK-P8-NEXT:    mtvsrd v1, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xxsldwi vs2, v5, v5, 1
+; CHECK-P8-NEXT:    mffprwz r3, f5
+; CHECK-P8-NEXT:    xscvdpsxws f5, f6
+; CHECK-P8-NEXT:    xxsldwi vs6, v7, v7, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f1, v7
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvspdpn f4, vs4
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghb v2, v6, v1
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    vmrghb v4, v5, v4
-; CHECK-P8-NEXT:    mtvsrd v5, r5
-; CHECK-P8-NEXT:    vmrghb v0, v6, v1
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r3, f5
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    vmrghb v4, v4, v1
+; CHECK-P8-NEXT:    vmrghb v5, v0, v8
+; CHECK-P8-NEXT:    mtvsrd v0, r3
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f6
+; CHECK-P8-NEXT:    vmrghb v3, v3, v6
 ; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    vmrghb v5, v5, v7
-; CHECK-P8-NEXT:    vmrghb v1, v1, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    mtvsrd v7, r3
+; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    mffprwz r3, f7
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    vmrghb v0, v0, v6
+; CHECK-P8-NEXT:    vmrghb v1, v1, v7
+; CHECK-P8-NEXT:    mtvsrd v6, r3
+; CHECK-P8-NEXT:    mtvsrd v7, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mtvsrd v9, r3
-; CHECK-P8-NEXT:    vmrghb v7, v8, v7
-; CHECK-P8-NEXT:    vmrghb v6, v6, v9
-; CHECK-P8-NEXT:    vmrglh v3, v4, v3
-; CHECK-P8-NEXT:    vmrglh v2, v5, v2
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    mtvsrd v9, r4
+; CHECK-P8-NEXT:    vmrghb v6, v6, v8
+; CHECK-P8-NEXT:    vmrghb v7, v7, v9
+; CHECK-P8-NEXT:    vmrglh v2, v4, v2
+; CHECK-P8-NEXT:    vmrglh v3, v5, v3
 ; CHECK-P8-NEXT:    vmrglh v4, v1, v0
-; CHECK-P8-NEXT:    vmrglh v5, v6, v7
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    vmrglh v5, v7, v6
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
@@ -815,53 +815,53 @@ entry:
 define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v2, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lvx v3, r3, r4
-; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    xscvspdpn f2, v2
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 3
-; CHECK-P8-NEXT:    xscvspdpn f3, v3
+; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, v2
+; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f5, v3
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P8-NEXT:    xscvspdpn f4, vs4
-; CHECK-P8-NEXT:    xscvspdpn f5, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvspdpn f7, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxswapd vs0, v3
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 1
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    xscvdpsxws f2, f4
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f4, f5
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghb v2, v4, v2
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    vmrghb v3, v3, v4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f6
+; CHECK-P8-NEXT:    xscvdpsxws f2, f7
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f4
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    vmrghb v5, v0, v5
-; CHECK-P8-NEXT:    mtvsrd v1, r3
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    vmrghb v2, v2, v4
+; CHECK-P8-NEXT:    vmrghb v3, v3, v5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    vmrghb v4, v4, v0
+; CHECK-P8-NEXT:    vmrghb v5, v5, v1
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
-; CHECK-P8-NEXT:    vmrghb v4, v4, v1
-; CHECK-P8-NEXT:    vmrglh v3, v4, v5
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -983,102 +983,102 @@ entry:
 define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lvx v4, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    lvx v3, r3, r4
-; CHECK-P8-NEXT:    lvx v2, r3, r5
-; CHECK-P8-NEXT:    xxsldwi vs0, v4, v4, 3
-; CHECK-P8-NEXT:    xxswapd vs2, v4
-; CHECK-P8-NEXT:    xxsldwi vs4, v4, v4, 1
-; CHECK-P8-NEXT:    xscvspdpn f1, v4
-; CHECK-P8-NEXT:    xscvspdpn f3, v3
-; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xxswapd vs7, v3
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    xscvspdpn f5, vs2
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    xscvspdpn f6, vs3
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f8, v2
+; CHECK-P8-NEXT:    xxsldwi vs2, v3, v3, 3
+; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f3, v5
+; CHECK-P8-NEXT:    xxswapd v7, vs4
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xxsldwi vs8, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
-; CHECK-P8-NEXT:    xxsldwi vs9, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    xscvdpsxws f8, f8
+; CHECK-P8-NEXT:    xscvspdpn f9, vs9
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    xscvspdpn f8, vs8
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvspdpn f9, vs9
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mffprwz r5, f2
-; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    mffprwz r3, f5
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v4, r5
-; CHECK-P8-NEXT:    mffprwz r5, f4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f6
-; CHECK-P8-NEXT:    vmrghb v3, v4, v3
-; CHECK-P8-NEXT:    mtvsrd v4, r5
-; CHECK-P8-NEXT:    mffprwz r5, f3
-; CHECK-P8-NEXT:    xscvdpsxws f3, f7
-; CHECK-P8-NEXT:    xscvdpsxws f4, f8
+; CHECK-P8-NEXT:    xscvspdpn f1, v3
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lvx v0, r3, r4
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
-; CHECK-P8-NEXT:    xscvspdpn f5, v2
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    xxsldwi vs3, v0, v0, 3
-; CHECK-P8-NEXT:    mtvsrd v1, r3
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    xxswapd vs4, v0
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    mtvsrd v7, r3
+; CHECK-P8-NEXT:    mffprwz r3, f8
+; CHECK-P8-NEXT:    vmrghb v2, v4, v0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvdpsxws f5, f9
+; CHECK-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxsldwi vs0, v0, v0, 1
-; CHECK-P8-NEXT:    xscvspdpn f2, v0
-; CHECK-P8-NEXT:    xscvspdpn f3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f6, f9
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
+; CHECK-P8-NEXT:    xxsldwi vs0, v5, v5, 3
+; CHECK-P8-NEXT:    xscvspdpn f7, vs4
+; CHECK-P8-NEXT:    xxsldwi vs4, v7, v7, 3
+; CHECK-P8-NEXT:    mtvsrd v1, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xxsldwi vs2, v5, v5, 1
+; CHECK-P8-NEXT:    mffprwz r3, f5
+; CHECK-P8-NEXT:    xscvdpsxws f5, f6
+; CHECK-P8-NEXT:    xxsldwi vs6, v7, v7, 1
+; CHECK-P8-NEXT:    xscvspdpn f2, vs2
+; CHECK-P8-NEXT:    xscvspdpn f1, v7
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    xscvspdpn f4, vs4
+; CHECK-P8-NEXT:    xscvspdpn f6, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghb v2, v6, v1
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    vmrghb v4, v5, v4
-; CHECK-P8-NEXT:    mtvsrd v5, r5
-; CHECK-P8-NEXT:    vmrghb v0, v6, v1
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r3, f5
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    vmrghb v4, v4, v1
+; CHECK-P8-NEXT:    vmrghb v5, v0, v8
+; CHECK-P8-NEXT:    mtvsrd v0, r3
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f4
+; CHECK-P8-NEXT:    xscvdpsxws f2, f6
+; CHECK-P8-NEXT:    vmrghb v3, v3, v6
 ; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    vmrghb v5, v5, v7
-; CHECK-P8-NEXT:    vmrghb v1, v1, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    mtvsrd v7, r3
+; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    mffprwz r3, f7
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    vmrghb v0, v0, v6
+; CHECK-P8-NEXT:    vmrghb v1, v1, v7
+; CHECK-P8-NEXT:    mtvsrd v6, r3
+; CHECK-P8-NEXT:    mtvsrd v7, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mtvsrd v9, r3
-; CHECK-P8-NEXT:    vmrghb v7, v8, v7
-; CHECK-P8-NEXT:    vmrghb v6, v6, v9
-; CHECK-P8-NEXT:    vmrglh v3, v4, v3
-; CHECK-P8-NEXT:    vmrglh v2, v5, v2
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    mtvsrd v9, r4
+; CHECK-P8-NEXT:    vmrghb v6, v6, v8
+; CHECK-P8-NEXT:    vmrghb v7, v7, v9
+; CHECK-P8-NEXT:    vmrglh v2, v4, v2
+; CHECK-P8-NEXT:    vmrglh v3, v5, v3
 ; CHECK-P8-NEXT:    vmrglh v4, v1, v0
-; CHECK-P8-NEXT:    vmrglh v5, v6, v7
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    vmrglh v5, v7, v6
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
index d0f75b84f07a3..db631458937c8 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
@@ -371,10 +371,12 @@ define void @test16elt(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.resul
 ; CHECK-P8-NEXT:    vmrghh v7, v9, v7
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    xxmrglw vs2, v1, v0
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xxmrglw vs3, v7, v6
 ; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -916,10 +918,12 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret(<16 x i16>) %ag
 ; CHECK-P8-NEXT:    vmrghh v7, v9, v7
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    xxmrglw vs2, v1, v0
-; CHECK-P8-NEXT:    stvx v2, 0, r3
+; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xxmrglw vs3, v7, v6
 ; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
-; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    xxswapd vs0, v3
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
index 959f3ce289989..e6ca1109b0876 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -95,22 +95,18 @@ define void @test8elt(<8 x i32>* noalias nocapture sret(<8 x i32>) %agg.result,
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxmrgld vs4, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrgld vs1, vs2, vs3
-; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-P8-NEXT:    xxmrghd vs4, vs0, vs1
+; CHECK-P8-NEXT:    xxmrgld vs0, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xxmrgld vs2, vs3, vs2
 ; CHECK-P8-NEXT:    xvcvdpuxws v2, vs4
 ; CHECK-P8-NEXT:    xvcvdpuxws v3, vs0
 ; CHECK-P8-NEXT:    xvcvdpuxws v4, vs1
 ; CHECK-P8-NEXT:    xvcvdpuxws v5, vs2
 ; CHECK-P8-NEXT:    vmrgew v2, v3, v2
 ; CHECK-P8-NEXT:    vmrgew v3, v5, v4
-; CHECK-P8-NEXT:    stvx v2, r3, r5
-; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    stxvd2x v2, r3, r5
+; CHECK-P8-NEXT:    stxvd2x v3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -164,51 +160,43 @@ define void @test16elt(<16 x i32>* noalias nocapture sret(<16 x i32>) %agg.resul
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 32
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    li r8, 64
-; CHECK-P8-NEXT:    li r7, 16
-; CHECK-P8-NEXT:    li r9, 80
-; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    li r8, 80
+; CHECK-P8-NEXT:    lxvd2x vs8, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
-; CHECK-P8-NEXT:    li r8, 96
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    lxvd2x vs5, r4, r8
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT:    li r7, 96
 ; CHECK-P8-NEXT:    li r8, 112
-; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r7
+; CHECK-P8-NEXT:    li r7, 16
 ; CHECK-P8-NEXT:    lxvd2x vs6, r4, r8
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    xxmrgld vs8, vs1, vs0
-; CHECK-P8-NEXT:    xxswapd vs6, vs6
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs7
-; CHECK-P8-NEXT:    xxmrgld vs7, vs4, vs3
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
-; CHECK-P8-NEXT:    xxmrgld vs4, vs6, vs5
-; CHECK-P8-NEXT:    xvcvdpuxws v2, vs8
+; CHECK-P8-NEXT:    xxmrghd vs4, vs0, vs1
+; CHECK-P8-NEXT:    xxmrgld vs0, vs0, vs1
+; CHECK-P8-NEXT:    lxvd2x vs7, r4, r7
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    xxmrgld vs2, vs2, vs3
+; CHECK-P8-NEXT:    xxmrghd vs3, vs5, vs6
 ; CHECK-P8-NEXT:    xvcvdpuxws v3, vs0
-; CHECK-P8-NEXT:    xxmrghd vs0, vs6, vs5
-; CHECK-P8-NEXT:    xxmrgld vs5, vs2, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs1
-; CHECK-P8-NEXT:    xvcvdpuxws v4, vs7
-; CHECK-P8-NEXT:    xvcvdpuxws v5, vs3
-; CHECK-P8-NEXT:    xvcvdpuxws v0, vs4
+; CHECK-P8-NEXT:    xxmrgld vs0, vs5, vs6
+; CHECK-P8-NEXT:    xvcvdpuxws v4, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs8, vs7
+; CHECK-P8-NEXT:    xvcvdpuxws v5, vs2
+; CHECK-P8-NEXT:    xxmrgld vs2, vs8, vs7
+; CHECK-P8-NEXT:    xvcvdpuxws v2, vs4
+; CHECK-P8-NEXT:    xvcvdpuxws v0, vs3
 ; CHECK-P8-NEXT:    xvcvdpuxws v1, vs0
-; CHECK-P8-NEXT:    xvcvdpuxws v6, vs5
-; CHECK-P8-NEXT:    xvcvdpuxws v7, vs1
+; CHECK-P8-NEXT:    xvcvdpuxws v6, vs1
+; CHECK-P8-NEXT:    xvcvdpuxws v7, vs2
 ; CHECK-P8-NEXT:    vmrgew v2, v3, v2
 ; CHECK-P8-NEXT:    vmrgew v3, v5, v4
 ; CHECK-P8-NEXT:    vmrgew v4, v1, v0
 ; CHECK-P8-NEXT:    vmrgew v5, v7, v6
-; CHECK-P8-NEXT:    stvx v2, r3, r7
-; CHECK-P8-NEXT:    stvx v3, r3, r5
-; CHECK-P8-NEXT:    stvx v4, r3, r6
-; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    stxvd2x v4, r3, r6
+; CHECK-P8-NEXT:    stxvd2x v3, r3, r5
+; CHECK-P8-NEXT:    stxvd2x v2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x v5, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -375,22 +363,18 @@ define void @test8elt_signed(<8 x i32>* noalias nocapture sret(<8 x i32>) %agg.r
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxmrgld vs4, vs1, vs0
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxmrgld vs1, vs2, vs3
-; CHECK-P8-NEXT:    xxmrghd vs2, vs2, vs3
+; CHECK-P8-NEXT:    xxmrghd vs4, vs0, vs1
+; CHECK-P8-NEXT:    xxmrgld vs0, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P8-NEXT:    xxmrgld vs2, vs3, vs2
 ; CHECK-P8-NEXT:    xvcvdpsxws v2, vs4
 ; CHECK-P8-NEXT:    xvcvdpsxws v3, vs0
 ; CHECK-P8-NEXT:    xvcvdpsxws v4, vs1
 ; CHECK-P8-NEXT:    xvcvdpsxws v5, vs2
 ; CHECK-P8-NEXT:    vmrgew v2, v3, v2
 ; CHECK-P8-NEXT:    vmrgew v3, v5, v4
-; CHECK-P8-NEXT:    stvx v2, r3, r5
-; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    stxvd2x v2, r3, r5
+; CHECK-P8-NEXT:    stxvd2x v3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -444,51 +428,43 @@ define void @test16elt_signed(<16 x i32>* noalias nocapture sret(<16 x i32>) %ag
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 32
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    li r8, 64
-; CHECK-P8-NEXT:    li r7, 16
-; CHECK-P8-NEXT:    li r9, 80
-; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    li r8, 80
+; CHECK-P8-NEXT:    lxvd2x vs8, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
-; CHECK-P8-NEXT:    li r8, 96
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    lxvd2x vs5, r4, r8
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
+; CHECK-P8-NEXT:    li r7, 96
 ; CHECK-P8-NEXT:    li r8, 112
-; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r7
+; CHECK-P8-NEXT:    li r7, 16
 ; CHECK-P8-NEXT:    lxvd2x vs6, r4, r8
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    xxmrgld vs8, vs1, vs0
-; CHECK-P8-NEXT:    xxswapd vs6, vs6
-; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs7
-; CHECK-P8-NEXT:    xxmrgld vs7, vs4, vs3
-; CHECK-P8-NEXT:    xxmrghd vs3, vs4, vs3
-; CHECK-P8-NEXT:    xxmrgld vs4, vs6, vs5
-; CHECK-P8-NEXT:    xvcvdpsxws v2, vs8
+; CHECK-P8-NEXT:    xxmrghd vs4, vs0, vs1
+; CHECK-P8-NEXT:    xxmrgld vs0, vs0, vs1
+; CHECK-P8-NEXT:    lxvd2x vs7, r4, r7
+; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    xxmrgld vs2, vs2, vs3
+; CHECK-P8-NEXT:    xxmrghd vs3, vs5, vs6
 ; CHECK-P8-NEXT:    xvcvdpsxws v3, vs0
-; CHECK-P8-NEXT:    xxmrghd vs0, vs6, vs5
-; CHECK-P8-NEXT:    xxmrgld vs5, vs2, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs1
-; CHECK-P8-NEXT:    xvcvdpsxws v4, vs7
-; CHECK-P8-NEXT:    xvcvdpsxws v5, vs3
-; CHECK-P8-NEXT:    xvcvdpsxws v0, vs4
+; CHECK-P8-NEXT:    xxmrgld vs0, vs5, vs6
+; CHECK-P8-NEXT:    xvcvdpsxws v4, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs8, vs7
+; CHECK-P8-NEXT:    xvcvdpsxws v5, vs2
+; CHECK-P8-NEXT:    xxmrgld vs2, vs8, vs7
+; CHECK-P8-NEXT:    xvcvdpsxws v2, vs4
+; CHECK-P8-NEXT:    xvcvdpsxws v0, vs3
 ; CHECK-P8-NEXT:    xvcvdpsxws v1, vs0
-; CHECK-P8-NEXT:    xvcvdpsxws v6, vs5
-; CHECK-P8-NEXT:    xvcvdpsxws v7, vs1
+; CHECK-P8-NEXT:    xvcvdpsxws v6, vs1
+; CHECK-P8-NEXT:    xvcvdpsxws v7, vs2
 ; CHECK-P8-NEXT:    vmrgew v2, v3, v2
 ; CHECK-P8-NEXT:    vmrgew v3, v5, v4
 ; CHECK-P8-NEXT:    vmrgew v4, v1, v0
 ; CHECK-P8-NEXT:    vmrgew v5, v7, v6
-; CHECK-P8-NEXT:    stvx v2, r3, r7
-; CHECK-P8-NEXT:    stvx v3, r3, r5
-; CHECK-P8-NEXT:    stvx v4, r3, r6
-; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    stxvd2x v4, r3, r6
+; CHECK-P8-NEXT:    stxvd2x v3, r3, r5
+; CHECK-P8-NEXT:    stxvd2x v2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x v5, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
index a09fbf2000b74..84f3f231e6969 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
@@ -64,12 +64,12 @@ define void @test8elt(<8 x i32>* noalias nocapture sret(<8 x i32>) %agg.result,
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    xvcvspuxws v3, v3
-; CHECK-P8-NEXT:    xvcvspuxws v2, v2
-; CHECK-P8-NEXT:    stvx v3, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xvcvspuxws vs1, vs1
+; CHECK-P8-NEXT:    xvcvspuxws vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -104,18 +104,18 @@ define void @test16elt(<16 x i32>* noalias nocapture sret(<16 x i32>) %agg.resul
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    lvx v5, 0, r4
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    lvx v3, r4, r6
-; CHECK-P8-NEXT:    lvx v4, r4, r7
-; CHECK-P8-NEXT:    xvcvspuxws v5, v5
-; CHECK-P8-NEXT:    xvcvspuxws v2, v2
-; CHECK-P8-NEXT:    xvcvspuxws v3, v3
-; CHECK-P8-NEXT:    xvcvspuxws v4, v4
-; CHECK-P8-NEXT:    stvx v5, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r5
-; CHECK-P8-NEXT:    stvx v3, r3, r6
-; CHECK-P8-NEXT:    stvx v4, r3, r7
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    xvcvspuxws vs3, vs3
+; CHECK-P8-NEXT:    xvcvspuxws vs0, vs0
+; CHECK-P8-NEXT:    xvcvspuxws vs1, vs1
+; CHECK-P8-NEXT:    xvcvspuxws vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -211,12 +211,12 @@ define void @test8elt_signed(<8 x i32>* noalias nocapture sret(<8 x i32>) %agg.r
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    xvcvspsxws v3, v3
-; CHECK-P8-NEXT:    xvcvspsxws v2, v2
-; CHECK-P8-NEXT:    stvx v3, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xvcvspsxws vs1, vs1
+; CHECK-P8-NEXT:    xvcvspsxws vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -251,18 +251,18 @@ define void @test16elt_signed(<16 x i32>* noalias nocapture sret(<16 x i32>) %ag
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    lvx v5, 0, r4
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    lvx v3, r4, r6
-; CHECK-P8-NEXT:    lvx v4, r4, r7
-; CHECK-P8-NEXT:    xvcvspsxws v5, v5
-; CHECK-P8-NEXT:    xvcvspsxws v2, v2
-; CHECK-P8-NEXT:    xvcvspsxws v3, v3
-; CHECK-P8-NEXT:    xvcvspsxws v4, v4
-; CHECK-P8-NEXT:    stvx v5, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r5
-; CHECK-P8-NEXT:    stvx v3, r3, r6
-; CHECK-P8-NEXT:    stvx v4, r3, r7
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    xvcvspsxws vs3, vs3
+; CHECK-P8-NEXT:    xvcvspsxws vs0, vs0
+; CHECK-P8-NEXT:    xvcvspsxws vs1, vs1
+; CHECK-P8-NEXT:    xvcvspsxws vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index a6a8d36a400d5..e7306daa82bc2 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -101,10 +101,12 @@ define void @test8elt(<8 x float>* noalias nocapture sret(<8 x float>) %agg.resu
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    vmrglh v4, v3, v2
 ; CHECK-P8-NEXT:    vmrghh v2, v3, v2
-; CHECK-P8-NEXT:    xvcvuxwsp v3, v4
-; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
-; CHECK-P8-NEXT:    stvx v3, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r4
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, v4
+; CHECK-P8-NEXT:    xvcvuxwsp vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -138,29 +140,37 @@ define void @test16elt(<16 x float>* noalias nocapture sret(<16 x float>) %agg.r
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
-; CHECK-P8-NEXT:    xxlxor v3, v3, v3
-; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    li r6, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI3_1 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v0, 0, r6
-; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lvx v5, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r5, .LCPI3_1 at toc@l
+; CHECK-P8-NEXT:    xxswapd v2, vs1
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    xxswapd v5, vs2
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    vperm v1, v3, v4, v2
-; CHECK-P8-NEXT:    vperm v2, v3, v5, v2
-; CHECK-P8-NEXT:    vperm v5, v3, v5, v0
-; CHECK-P8-NEXT:    vperm v3, v3, v4, v0
-; CHECK-P8-NEXT:    xvcvuxwsp v4, v1
-; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
-; CHECK-P8-NEXT:    xvcvuxwsp v5, v5
-; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
-; CHECK-P8-NEXT:    stvx v4, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r6
-; CHECK-P8-NEXT:    stvx v5, r3, r4
-; CHECK-P8-NEXT:    stvx v3, r3, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v0, vs3
+; CHECK-P8-NEXT:    vperm v1, v4, v2, v3
+; CHECK-P8-NEXT:    vperm v3, v4, v5, v3
+; CHECK-P8-NEXT:    vperm v5, v4, v5, v0
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v0
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, v1
+; CHECK-P8-NEXT:    xvcvuxwsp vs1, v3
+; CHECK-P8-NEXT:    xvcvuxwsp vs2, v5
+; CHECK-P8-NEXT:    xvcvuxwsp vs3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -317,10 +327,12 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret(<8 x float>) %a
 ; CHECK-P8-NEXT:    vslw v2, v2, v3
 ; CHECK-P8-NEXT:    vsraw v4, v4, v3
 ; CHECK-P8-NEXT:    vsraw v2, v2, v3
-; CHECK-P8-NEXT:    xvcvsxwsp v3, v4
-; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
-; CHECK-P8-NEXT:    stvx v3, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r4
+; CHECK-P8-NEXT:    xvcvsxwsp vs0, v4
+; CHECK-P8-NEXT:    xvcvsxwsp vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -356,16 +368,18 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>)
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    vspltisw v5, 8
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lvx v3, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
 ; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vadduwm v5, v5, v5
+; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    vmrglh v4, v2, v2
 ; CHECK-P8-NEXT:    vmrglh v0, v3, v3
 ; CHECK-P8-NEXT:    vmrghh v3, v3, v3
 ; CHECK-P8-NEXT:    vmrghh v2, v2, v2
-; CHECK-P8-NEXT:    vadduwm v5, v5, v5
 ; CHECK-P8-NEXT:    vslw v4, v4, v5
 ; CHECK-P8-NEXT:    vslw v0, v0, v5
 ; CHECK-P8-NEXT:    vslw v3, v3, v5
@@ -374,14 +388,18 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>)
 ; CHECK-P8-NEXT:    vsraw v0, v0, v5
 ; CHECK-P8-NEXT:    vsraw v3, v3, v5
 ; CHECK-P8-NEXT:    vsraw v2, v2, v5
-; CHECK-P8-NEXT:    xvcvsxwsp v4, v4
-; CHECK-P8-NEXT:    xvcvsxwsp v5, v0
-; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
-; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
-; CHECK-P8-NEXT:    stvx v4, 0, r3
-; CHECK-P8-NEXT:    stvx v5, r3, r6
-; CHECK-P8-NEXT:    stvx v3, r3, r4
-; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    xvcvsxwsp vs0, v4
+; CHECK-P8-NEXT:    xvcvsxwsp vs1, v0
+; CHECK-P8-NEXT:    xvcvsxwsp vs2, v3
+; CHECK-P8-NEXT:    xvcvsxwsp vs3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index f3454e9282e54..e8d4c7335be09 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -16,7 +16,8 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtvsrwz v2, r3
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P8-NEXT:    xvcvuxddp v2, v2
 ; CHECK-P8-NEXT:    blr
@@ -53,15 +54,17 @@ define void @test4elt(<4 x double>* noalias nocapture sret(<4 x double>) %agg.re
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
-; CHECK-P8-NEXT:    mtvsrd v2, r4
+; CHECK-P8-NEXT:    xxlxor v2, v2, v2
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
-; CHECK-P8-NEXT:    addi r4, r6, .LCPI1_1 at toc@l
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    lvx v3, 0, r5
-; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    mtvsrd v4, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    addi r5, r6, .LCPI1_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    vperm v3, v2, v4, v3
+; CHECK-P8-NEXT:    vperm v2, v2, v4, v5
 ; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
 ; CHECK-P8-NEXT:    xvcvuxddp vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
@@ -120,16 +123,20 @@ define void @test8elt(<8 x double>* noalias nocapture sret(<8 x double>) %agg.re
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_2 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_3 at toc@ha
-; CHECK-P8-NEXT:    lvx v5, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_3 at toc@l
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
-; CHECK-P8-NEXT:    lvx v0, 0, r4
-; CHECK-P8-NEXT:    lvx v1, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    xxswapd v1, vs3
 ; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
 ; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
@@ -215,38 +222,44 @@ entry:
 define void @test16elt(<16 x double>* noalias nocapture sret(<16 x double>) %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-P8-NEXT:    lvx v4, 0, r4
-; CHECK-P8-NEXT:    xxlxor v3, v3, v3
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-P8-NEXT:    lvx v5, 0, r6
-; CHECK-P8-NEXT:    li r6, 16
-; CHECK-P8-NEXT:    lvx v2, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
-; CHECK-P8-NEXT:    lvx v0, r4, r6
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
-; CHECK-P8-NEXT:    lvx v1, 0, r5
-; CHECK-P8-NEXT:    li r5, 96
-; CHECK-P8-NEXT:    lvx v8, 0, r4
-; CHECK-P8-NEXT:    vperm v6, v3, v4, v2
+; CHECK-P8-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r6
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r5
+; CHECK-P8-NEXT:    addi r4, r6, .LCPI3_1 at toc@l
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_3 at toc@ha
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs4, 0, r4
+; CHECK-P8-NEXT:    addi r4, r6, .LCPI3_3 at toc@l
+; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v5, vs2
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    vperm v7, v3, v4, v5
-; CHECK-P8-NEXT:    vperm v2, v3, v0, v2
-; CHECK-P8-NEXT:    vperm v9, v3, v0, v1
-; CHECK-P8-NEXT:    vperm v5, v3, v0, v5
-; CHECK-P8-NEXT:    vperm v0, v3, v0, v8
-; CHECK-P8-NEXT:    vperm v1, v3, v4, v1
-; CHECK-P8-NEXT:    vperm v3, v3, v4, v8
-; CHECK-P8-NEXT:    xvcvuxddp vs1, v2
+; CHECK-P8-NEXT:    xxswapd v0, vs3
+; CHECK-P8-NEXT:    xxswapd v1, vs4
+; CHECK-P8-NEXT:    xxswapd v8, vs0
+; CHECK-P8-NEXT:    vperm v6, v4, v2, v3
+; CHECK-P8-NEXT:    vperm v7, v4, v2, v5
+; CHECK-P8-NEXT:    vperm v3, v4, v0, v3
+; CHECK-P8-NEXT:    vperm v9, v4, v0, v1
+; CHECK-P8-NEXT:    vperm v5, v4, v0, v5
+; CHECK-P8-NEXT:    vperm v0, v4, v0, v8
+; CHECK-P8-NEXT:    vperm v1, v4, v2, v1
+; CHECK-P8-NEXT:    vperm v2, v4, v2, v8
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v3
 ; CHECK-P8-NEXT:    xvcvuxddp vs4, v9
 ; CHECK-P8-NEXT:    xvcvuxddp vs2, v5
 ; CHECK-P8-NEXT:    xvcvuxddp vs3, v0
 ; CHECK-P8-NEXT:    xvcvuxddp vs0, v7
-; CHECK-P8-NEXT:    xvcvuxddp vs5, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs5, v2
 ; CHECK-P8-NEXT:    xvcvuxddp vs6, v6
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xvcvuxddp vs7, v1
@@ -256,18 +269,18 @@ define void @test16elt(<16 x double>* noalias nocapture sret(<16 x double>) %agg
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs5, vs5
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
 ; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    li r5, 64
+; CHECK-P8-NEXT:    li r6, 64
 ; CHECK-P8-NEXT:    xxswapd vs2, vs7
 ; CHECK-P8-NEXT:    xxswapd vs3, vs6
 ; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
 ; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -371,7 +384,8 @@ define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI4_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI4_1 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
@@ -416,11 +430,13 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret(<4 x double>)
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
 ; CHECK-P8-NEXT:    addi r5, r6, .LCPI5_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lvx v4, 0, r5
+; CHECK-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
 ; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
 ; CHECK-P8-NEXT:    xxswapd v4, vs0
@@ -483,23 +499,27 @@ entry:
 define void @test8elt_signed(<8 x double>* noalias nocapture sret(<8 x double>) %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_2 at toc@ha
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_3 at toc@ha
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_2 at toc@l
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_2 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI6_3 at toc@l
-; CHECK-P8-NEXT:    lvx v4, 0, r5
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_3 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_4 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    lvx v5, 0, r6
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_3 at toc@l
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_4 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
+; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
-; CHECK-P8-NEXT:    lvx v0, 0, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xxswapd v0, vs3
 ; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
 ; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
@@ -602,63 +622,69 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret(<16 x double
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
-; CHECK-P8-NEXT:    lvx v4, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
 ; CHECK-P8-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_3 at toc@ha
-; CHECK-P8-NEXT:    lvx v3, 0, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r6
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI7_4 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_3 at toc@l
 ; CHECK-P8-NEXT:    addi r6, r6, .LCPI7_4 at toc@l
-; CHECK-P8-NEXT:    lvx v5, 0, r5
-; CHECK-P8-NEXT:    lvx v0, 0, r6
-; CHECK-P8-NEXT:    li r6, 16
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
-; CHECK-P8-NEXT:    lvx v7, r4, r6
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
-; CHECK-P8-NEXT:    vperm v1, v4, v4, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs1
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs4, 0, r6
+; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    xxswapd v0, vs4
+; CHECK-P8-NEXT:    xxswapd v6, vs0
+; CHECK-P8-NEXT:    vperm v1, v2, v2, v3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    vperm v6, v4, v4, v3
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT:    li r5, 96
-; CHECK-P8-NEXT:    vperm v8, v4, v4, v5
-; CHECK-P8-NEXT:    vperm v4, v4, v4, v0
-; CHECK-P8-NEXT:    vperm v5, v7, v7, v5
+; CHECK-P8-NEXT:    vperm v7, v2, v2, v4
+; CHECK-P8-NEXT:    vperm v8, v2, v2, v5
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v0
 ; CHECK-P8-NEXT:    xxswapd v9, vs0
-; CHECK-P8-NEXT:    vperm v0, v7, v7, v0
-; CHECK-P8-NEXT:    vperm v2, v7, v7, v2
-; CHECK-P8-NEXT:    vperm v3, v7, v7, v3
+; CHECK-P8-NEXT:    vperm v5, v6, v6, v5
+; CHECK-P8-NEXT:    vperm v0, v6, v6, v0
+; CHECK-P8-NEXT:    vperm v3, v6, v6, v3
+; CHECK-P8-NEXT:    vperm v4, v6, v6, v4
 ; CHECK-P8-NEXT:    vsld v1, v1, v9
-; CHECK-P8-NEXT:    vsld v6, v6, v9
+; CHECK-P8-NEXT:    vsld v6, v7, v9
 ; CHECK-P8-NEXT:    vsld v5, v5, v9
 ; CHECK-P8-NEXT:    vsld v0, v0, v9
-; CHECK-P8-NEXT:    vsld v2, v2, v9
 ; CHECK-P8-NEXT:    vsld v3, v3, v9
+; CHECK-P8-NEXT:    vsld v4, v4, v9
 ; CHECK-P8-NEXT:    vsrad v5, v5, v9
 ; CHECK-P8-NEXT:    vsrad v0, v0, v9
 ; CHECK-P8-NEXT:    vsld v7, v8, v9
-; CHECK-P8-NEXT:    vsld v4, v4, v9
-; CHECK-P8-NEXT:    vsrad v2, v2, v9
+; CHECK-P8-NEXT:    vsld v2, v2, v9
 ; CHECK-P8-NEXT:    vsrad v3, v3, v9
+; CHECK-P8-NEXT:    vsrad v4, v4, v9
 ; CHECK-P8-NEXT:    xvcvsxddp vs2, v5
 ; CHECK-P8-NEXT:    xvcvsxddp vs3, v0
 ; CHECK-P8-NEXT:    vsrad v1, v1, v9
 ; CHECK-P8-NEXT:    vsrad v6, v6, v9
 ; CHECK-P8-NEXT:    vsrad v7, v7, v9
-; CHECK-P8-NEXT:    vsrad v4, v4, v9
-; CHECK-P8-NEXT:    xvcvsxddp vs1, v2
+; CHECK-P8-NEXT:    vsrad v2, v2, v9
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v3
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xvcvsxddp vs4, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs4, v4
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    xvcvsxddp vs0, v7
-; CHECK-P8-NEXT:    xvcvsxddp vs5, v4
+; CHECK-P8-NEXT:    xvcvsxddp vs5, v2
 ; CHECK-P8-NEXT:    xvcvsxddp vs6, v1
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
 ; CHECK-P8-NEXT:    li r4, 80
 ; CHECK-P8-NEXT:    xvcvsxddp vs7, v6
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
-; CHECK-P8-NEXT:    li r5, 64
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT:    li r6, 64
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xxswapd vs4, vs4
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
@@ -667,11 +693,11 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret(<16 x double
 ; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    xxswapd vs2, vs7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
 ; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
index a96a0e55b4e22..9a9ecea962144 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
@@ -81,10 +81,12 @@ define void @test8elt(<8 x double>* noalias nocapture sret(<8 x double>) %agg.re
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxmrglw v5, v3, v3
 ; CHECK-P8-NEXT:    xxmrghw v3, v3, v3
 ; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
@@ -152,25 +154,29 @@ define void @test16elt(<16 x double>* noalias nocapture sret(<16 x double>) %agg
 ; CHECK-P8-NEXT:    li r6, 48
 ; CHECK-P8-NEXT:    li r7, 32
 ; CHECK-P8-NEXT:    li r8, 64
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    lvx v3, r4, r6
-; CHECK-P8-NEXT:    lvx v0, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    li r4, 112
 ; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
-; CHECK-P8-NEXT:    xxmrghw v5, v3, v3
+; CHECK-P8-NEXT:    xxmrghw v0, v3, v3
+; CHECK-P8-NEXT:    xxswapd v5, vs0
 ; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
 ; CHECK-P8-NEXT:    xxmrglw v3, v3, v3
 ; CHECK-P8-NEXT:    xvcvuxwdp vs0, v4
-; CHECK-P8-NEXT:    lvx v4, 0, r4
-; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xvcvuxwdp vs1, v5
-; CHECK-P8-NEXT:    xxmrghw v5, v0, v0
-; CHECK-P8-NEXT:    xxmrglw v0, v0, v0
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    xvcvuxwdp vs1, v0
+; CHECK-P8-NEXT:    xxmrghw v0, v5, v5
+; CHECK-P8-NEXT:    xxmrglw v5, v5, v5
 ; CHECK-P8-NEXT:    xvcvuxwdp vs2, v2
 ; CHECK-P8-NEXT:    xxmrglw v2, v4, v4
 ; CHECK-P8-NEXT:    xvcvuxwdp vs3, v3
 ; CHECK-P8-NEXT:    xxmrghw v3, v4, v4
-; CHECK-P8-NEXT:    xvcvuxwdp vs4, v5
-; CHECK-P8-NEXT:    xvcvuxwdp vs5, v0
+; CHECK-P8-NEXT:    xvcvuxwdp vs4, v0
+; CHECK-P8-NEXT:    xvcvuxwdp vs5, v5
 ; CHECK-P8-NEXT:    xvcvuxwdp vs6, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xvcvuxwdp vs7, v3
@@ -335,10 +341,12 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret(<8 x double>)
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lvx v2, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxmrglw v5, v3, v3
 ; CHECK-P8-NEXT:    xxmrghw v3, v3, v3
 ; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
@@ -406,25 +414,29 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret(<16 x double
 ; CHECK-P8-NEXT:    li r6, 48
 ; CHECK-P8-NEXT:    li r7, 32
 ; CHECK-P8-NEXT:    li r8, 64
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    lvx v3, r4, r6
-; CHECK-P8-NEXT:    lvx v0, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    li r4, 112
 ; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
-; CHECK-P8-NEXT:    xxmrghw v5, v3, v3
+; CHECK-P8-NEXT:    xxmrghw v0, v3, v3
+; CHECK-P8-NEXT:    xxswapd v5, vs0
 ; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
 ; CHECK-P8-NEXT:    xxmrglw v3, v3, v3
 ; CHECK-P8-NEXT:    xvcvsxwdp vs0, v4
-; CHECK-P8-NEXT:    lvx v4, 0, r4
-; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xvcvsxwdp vs1, v5
-; CHECK-P8-NEXT:    xxmrghw v5, v0, v0
-; CHECK-P8-NEXT:    xxmrglw v0, v0, v0
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    xvcvsxwdp vs1, v0
+; CHECK-P8-NEXT:    xxmrghw v0, v5, v5
+; CHECK-P8-NEXT:    xxmrglw v5, v5, v5
 ; CHECK-P8-NEXT:    xvcvsxwdp vs2, v2
 ; CHECK-P8-NEXT:    xxmrglw v2, v4, v4
 ; CHECK-P8-NEXT:    xvcvsxwdp vs3, v3
 ; CHECK-P8-NEXT:    xxmrghw v3, v4, v4
-; CHECK-P8-NEXT:    xvcvsxwdp vs4, v5
-; CHECK-P8-NEXT:    xvcvsxwdp vs5, v0
+; CHECK-P8-NEXT:    xvcvsxwdp vs4, v0
+; CHECK-P8-NEXT:    xvcvsxwdp vs5, v5
 ; CHECK-P8-NEXT:    xvcvsxwdp vs6, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xvcvsxwdp vs7, v3

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
index c68a0dc7b613f..23b370f677560 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
@@ -115,8 +115,10 @@ define void @test8elt(<8 x float>* noalias nocapture sret(<8 x float>) %agg.resu
 ; CHECK-P8-NEXT:    xxsldwi v4, vs2, vs2, 3
 ; CHECK-P8-NEXT:    vpkudum v2, v3, v2
 ; CHECK-P8-NEXT:    vpkudum v3, v4, v5
-; CHECK-P8-NEXT:    stvx v2, r3, r5
-; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v3
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -211,10 +213,14 @@ define void @test16elt(<16 x float>* noalias nocapture sret(<16 x float>) %agg.r
 ; CHECK-P8-NEXT:    xxsldwi v7, vs7, vs7, 3
 ; CHECK-P8-NEXT:    vpkudum v4, v1, v0
 ; CHECK-P8-NEXT:    vpkudum v5, v6, v7
-; CHECK-P8-NEXT:    stvx v2, r3, r7
-; CHECK-P8-NEXT:    stvx v3, r3, r5
-; CHECK-P8-NEXT:    stvx v4, r3, r6
-; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v3
+; CHECK-P8-NEXT:    xxswapd vs0, v4
+; CHECK-P8-NEXT:    xxswapd vs3, v5
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -401,8 +407,10 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret(<8 x float>) %a
 ; CHECK-P8-NEXT:    xxsldwi v4, vs2, vs2, 3
 ; CHECK-P8-NEXT:    vpkudum v2, v3, v2
 ; CHECK-P8-NEXT:    vpkudum v3, v4, v5
-; CHECK-P8-NEXT:    stvx v2, r3, r5
-; CHECK-P8-NEXT:    stvx v3, 0, r3
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v3
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -497,10 +505,14 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>)
 ; CHECK-P8-NEXT:    xxsldwi v7, vs7, vs7, 3
 ; CHECK-P8-NEXT:    vpkudum v4, v1, v0
 ; CHECK-P8-NEXT:    vpkudum v5, v6, v7
-; CHECK-P8-NEXT:    stvx v2, r3, r7
-; CHECK-P8-NEXT:    stvx v3, r3, r5
-; CHECK-P8-NEXT:    stvx v4, r3, r6
-; CHECK-P8-NEXT:    stvx v5, 0, r3
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v3
+; CHECK-P8-NEXT:    xxswapd vs0, v4
+; CHECK-P8-NEXT:    xxswapd vs3, v5
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index 81faab9defbca..7ba0f8d444f69 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -68,7 +68,8 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    mtvsrwz v2, r3
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P8-NEXT:    blr
@@ -105,19 +106,23 @@ define void @test8elt(<8 x float>* noalias nocapture sret(<8 x float>) %agg.resu
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
-; CHECK-P8-NEXT:    mtvsrd v2, r4
+; CHECK-P8-NEXT:    xxlxor v2, v2, v2
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-P8-NEXT:    addi r4, r6, .LCPI2_1 at toc@l
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    lvx v3, 0, r5
-; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    mtvsrd v4, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v5
-; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
-; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
-; CHECK-P8-NEXT:    stvx v3, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    addi r5, r6, .LCPI2_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    vperm v3, v2, v4, v3
+; CHECK-P8-NEXT:    vperm v2, v2, v4, v5
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-P8-NEXT:    xvcvuxwsp vs1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -170,29 +175,37 @@ define void @test16elt(<16 x float>* noalias nocapture sret(<16 x float>) %agg.r
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_2 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
-; CHECK-P8-NEXT:    lvx v5, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
-; CHECK-P8-NEXT:    lvx v0, 0, r4
-; CHECK-P8-NEXT:    lvx v1, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    xxswapd v1, vs3
 ; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
 ; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
 ; CHECK-P8-NEXT:    vperm v2, v4, v2, v1
-; CHECK-P8-NEXT:    xvcvuxwsp v4, v5
-; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
-; CHECK-P8-NEXT:    xvcvuxwsp v5, v0
-; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
-; CHECK-P8-NEXT:    stvx v4, r3, r5
-; CHECK-P8-NEXT:    stvx v3, 0, r3
-; CHECK-P8-NEXT:    stvx v5, r3, r4
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-P8-NEXT:    xvcvuxwsp vs1, v5
+; CHECK-P8-NEXT:    xvcvuxwsp vs2, v0
+; CHECK-P8-NEXT:    xvcvuxwsp vs3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stvx v2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -320,7 +333,8 @@ define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrwz v3, r3
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
 ; CHECK-P8-NEXT:    vspltisw v3, 12
 ; CHECK-P8-NEXT:    vadduwm v3, v3, v3
@@ -362,23 +376,27 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret(<8 x float>) %a
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_1 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    vspltisw v5, 12
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
 ; CHECK-P8-NEXT:    addi r5, r6, .LCPI6_1 at toc@l
-; CHECK-P8-NEXT:    lvx v4, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
 ; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
-; CHECK-P8-NEXT:    vadduwm v4, v5, v5
+; CHECK-P8-NEXT:    vspltisw v4, 12
+; CHECK-P8-NEXT:    vadduwm v4, v4, v4
 ; CHECK-P8-NEXT:    vslw v2, v2, v4
 ; CHECK-P8-NEXT:    vslw v3, v3, v4
 ; CHECK-P8-NEXT:    vsraw v2, v2, v4
 ; CHECK-P8-NEXT:    vsraw v3, v3, v4
-; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
-; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
-; CHECK-P8-NEXT:    stvx v2, 0, r3
-; CHECK-P8-NEXT:    stvx v3, r3, r4
+; CHECK-P8-NEXT:    xvcvsxwsp vs0, v2
+; CHECK-P8-NEXT:    xvcvsxwsp vs1, v3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -433,16 +451,20 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>)
 ; CHECK-P8-NEXT:    vspltisw v1, 12
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_2 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
-; CHECK-P8-NEXT:    lvx v4, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
-; CHECK-P8-NEXT:    lvx v5, 0, r4
-; CHECK-P8-NEXT:    lvx v0, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xxswapd v0, vs3
 ; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
 ; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
@@ -456,15 +478,19 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>)
 ; CHECK-P8-NEXT:    vsraw v4, v4, v0
 ; CHECK-P8-NEXT:    vsraw v5, v5, v0
 ; CHECK-P8-NEXT:    vsraw v2, v2, v0
-; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
-; CHECK-P8-NEXT:    xvcvsxwsp v4, v4
-; CHECK-P8-NEXT:    xvcvsxwsp v5, v5
-; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
-; CHECK-P8-NEXT:    stvx v3, 0, r3
-; CHECK-P8-NEXT:    stvx v4, r3, r5
-; CHECK-P8-NEXT:    stvx v5, r3, r4
+; CHECK-P8-NEXT:    xvcvsxwsp vs0, v3
+; CHECK-P8-NEXT:    xvcvsxwsp vs1, v4
+; CHECK-P8-NEXT:    xvcvsxwsp vs2, v5
+; CHECK-P8-NEXT:    xvcvsxwsp vs3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stvx v2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index 23adab16eda91..9e518fc03dfc4 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -16,7 +16,8 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtvsrwz v2, r3
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P8-NEXT:    xvcvuxddp v2, v2
 ; CHECK-P8-NEXT:    blr
@@ -53,15 +54,17 @@ define void @test4elt(<4 x double>* noalias nocapture sret(<4 x double>) %agg.re
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
-; CHECK-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-P8-NEXT:    xxlxor v2, v2, v2
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
-; CHECK-P8-NEXT:    addi r4, r6, .LCPI1_1 at toc@l
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    lvx v3, 0, r5
-; CHECK-P8-NEXT:    lvx v5, 0, r4
+; CHECK-P8-NEXT:    mtvsrwz v4, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    addi r5, r6, .LCPI1_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    vperm v3, v2, v4, v3
+; CHECK-P8-NEXT:    vperm v2, v2, v4, v5
 ; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
 ; CHECK-P8-NEXT:    xvcvuxddp vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
@@ -117,31 +120,35 @@ define void @test8elt(<8 x double>* noalias nocapture sret(<8 x double>) %agg.re
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI2_2 at toc@ha
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_3 at toc@ha
+; CHECK-P8-NEXT:    xxlxor v2, v2, v2
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_3 at toc@l
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    lvx v3, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI2_2 at toc@l
-; CHECK-P8-NEXT:    lvx v0, 0, r4
+; CHECK-P8-NEXT:    addi r6, r6, .LCPI2_2 at toc@l
+; CHECK-P8-NEXT:    mtvsrd v4, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lvx v5, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
-; CHECK-P8-NEXT:    lvx v1, 0, r5
-; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_3 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r6
+; CHECK-P8-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_3 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT:    addi r5, r6, .LCPI2_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs1
 ; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
-; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v1
-; CHECK-P8-NEXT:    xvcvuxddp vs2, v0
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    xxswapd v1, vs3
+; CHECK-P8-NEXT:    vperm v3, v2, v4, v3
+; CHECK-P8-NEXT:    vperm v5, v2, v4, v5
+; CHECK-P8-NEXT:    vperm v0, v2, v4, v0
+; CHECK-P8-NEXT:    vperm v2, v2, v4, v1
 ; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
 ; CHECK-P8-NEXT:    xvcvuxddp vs1, v5
+; CHECK-P8-NEXT:    xvcvuxddp vs2, v0
 ; CHECK-P8-NEXT:    xvcvuxddp vs3, v2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 16
@@ -224,58 +231,66 @@ define void @test16elt(<16 x double>* noalias nocapture sret(<16 x double>) %agg
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
-; CHECK-P8-NEXT:    lvx v5, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_4 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_4 at toc@l
-; CHECK-P8-NEXT:    lvx v0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_6 at toc@ha
-; CHECK-P8-NEXT:    lvx v1, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_7 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_6 at toc@l
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_7 at toc@l
-; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
-; CHECK-P8-NEXT:    lvx v6, 0, r4
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_5 at toc@ha
-; CHECK-P8-NEXT:    lvx v7, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_3 at toc@ha
-; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_5 at toc@l
+; CHECK-P8-NEXT:    xxswapd v0, vs2
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_3 at toc@l
-; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
-; CHECK-P8-NEXT:    lvx v8, 0, r4
-; CHECK-P8-NEXT:    lvx v9, 0, r5
-; CHECK-P8-NEXT:    vperm v1, v4, v2, v1
+; CHECK-P8-NEXT:    xxswapd v1, vs3
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-P8-NEXT:    li r4, 112
 ; CHECK-P8-NEXT:    li r5, 96
+; CHECK-P8-NEXT:    xxswapd v6, vs0
+; CHECK-P8-NEXT:    xxswapd v7, vs1
+; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
+; CHECK-P8-NEXT:    xxswapd v8, vs2
+; CHECK-P8-NEXT:    xxswapd v9, vs3
 ; CHECK-P8-NEXT:    vperm v6, v4, v2, v6
 ; CHECK-P8-NEXT:    vperm v7, v4, v2, v7
 ; CHECK-P8-NEXT:    vperm v8, v4, v2, v8
+; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
+; CHECK-P8-NEXT:    vperm v1, v4, v2, v1
 ; CHECK-P8-NEXT:    vperm v2, v4, v2, v9
-; CHECK-P8-NEXT:    xvcvuxddp vs0, v0
-; CHECK-P8-NEXT:    xvcvuxddp vs1, v1
 ; CHECK-P8-NEXT:    xvcvuxddp vs2, v6
 ; CHECK-P8-NEXT:    xvcvuxddp vs3, v7
 ; CHECK-P8-NEXT:    xvcvuxddp vs4, v8
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v0
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v1
 ; CHECK-P8-NEXT:    xvcvuxddp vs5, v2
 ; CHECK-P8-NEXT:    xvcvuxddp vs6, v3
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xvcvuxddp vs7, v5
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xvcvuxddp vs7, v5
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
 ; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
 ; CHECK-P8-NEXT:    li r5, 64
-; CHECK-P8-NEXT:    xxswapd vs2, vs7
 ; CHECK-P8-NEXT:    xxswapd vs3, vs6
 ; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd vs2, vs7
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
 ; CHECK-P8-NEXT:    li r5, 32
 ; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
@@ -404,7 +419,8 @@ define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI4_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI4_1 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
@@ -449,11 +465,13 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret(<4 x double>)
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
 ; CHECK-P8-NEXT:    addi r5, r6, .LCPI5_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lvx v4, 0, r5
+; CHECK-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
 ; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
 ; CHECK-P8-NEXT:    xxswapd v4, vs0
@@ -519,22 +537,26 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret(<8 x double>)
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_2 at toc@ha
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
 ; CHECK-P8-NEXT:    addi r6, r6, .LCPI6_2 at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
-; CHECK-P8-NEXT:    lvx v2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_3 at toc@ha
-; CHECK-P8-NEXT:    lvx v4, 0, r6
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r6
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_4 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_3 at toc@l
-; CHECK-P8-NEXT:    lvx v5, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
 ; CHECK-P8-NEXT:    addi r5, r6, .LCPI6_4 at toc@l
-; CHECK-P8-NEXT:    lvx v0, 0, r5
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r5, .LCPI6_1 at toc@l
 ; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd v0, vs3
+; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
 ; CHECK-P8-NEXT:    vperm v4, v3, v3, v4
 ; CHECK-P8-NEXT:    vperm v5, v3, v3, v5
 ; CHECK-P8-NEXT:    vperm v3, v3, v3, v0
@@ -639,38 +661,46 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret(<16 x double
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_2 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI7_3 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_2 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI7_3 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_4 at toc@ha
-; CHECK-P8-NEXT:    lvx v4, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_5 at toc@ha
-; CHECK-P8-NEXT:    lvx v5, 0, r6
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI7_1 at toc@ha
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_4 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_5 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI7_1 at toc@l
-; CHECK-P8-NEXT:    lvx v0, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_6 at toc@ha
-; CHECK-P8-NEXT:    lvx v1, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_7 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r6
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_6 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_7 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_4 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_4 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_5 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_6 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_5 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_6 at toc@l
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_7 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_8 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_7 at toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_8 at toc@l
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xxswapd v0, vs3
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
 ; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
-; CHECK-P8-NEXT:    lvx v6, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_8 at toc@ha
-; CHECK-P8-NEXT:    lvx v7, 0, r5
-; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
 ; CHECK-P8-NEXT:    li r5, 96
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_8 at toc@l
+; CHECK-P8-NEXT:    xxswapd v1, vs0
+; CHECK-P8-NEXT:    xxswapd v6, vs1
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
+; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xxswapd v7, vs2
+; CHECK-P8-NEXT:    xxswapd v8, vs3
 ; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
-; CHECK-P8-NEXT:    xxswapd v9, vs0
-; CHECK-P8-NEXT:    lvx v8, 0, r4
 ; CHECK-P8-NEXT:    vperm v0, v2, v2, v0
-; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xxswapd v9, vs0
 ; CHECK-P8-NEXT:    vperm v1, v2, v2, v1
 ; CHECK-P8-NEXT:    vperm v6, v2, v2, v6
 ; CHECK-P8-NEXT:    vperm v7, v2, v2, v7

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
index 7ec148a029067..8177fe9df1edb 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
@@ -64,12 +64,12 @@ define void @test8elt(<8 x float>* noalias nocapture sret(<8 x float>) %agg.resu
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
-; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
-; CHECK-P8-NEXT:    stvx v3, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xvcvuxwsp vs1, vs1
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -104,18 +104,18 @@ define void @test16elt(<16 x float>* noalias nocapture sret(<16 x float>) %agg.r
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    lvx v5, 0, r4
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    lvx v3, r4, r6
-; CHECK-P8-NEXT:    lvx v4, r4, r7
-; CHECK-P8-NEXT:    xvcvuxwsp v5, v5
-; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
-; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
-; CHECK-P8-NEXT:    xvcvuxwsp v4, v4
-; CHECK-P8-NEXT:    stvx v5, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r5
-; CHECK-P8-NEXT:    stvx v3, r3, r6
-; CHECK-P8-NEXT:    stvx v4, r3, r7
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    xvcvuxwsp vs3, vs3
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, vs0
+; CHECK-P8-NEXT:    xvcvuxwsp vs1, vs1
+; CHECK-P8-NEXT:    xvcvuxwsp vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -211,12 +211,12 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret(<8 x float>) %a
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lvx v3, 0, r4
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
-; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
-; CHECK-P8-NEXT:    stvx v3, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xvcvsxwsp vs1, vs1
+; CHECK-P8-NEXT:    xvcvsxwsp vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -251,18 +251,18 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>)
 ; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    lvx v5, 0, r4
-; CHECK-P8-NEXT:    lvx v2, r4, r5
-; CHECK-P8-NEXT:    lvx v3, r4, r6
-; CHECK-P8-NEXT:    lvx v4, r4, r7
-; CHECK-P8-NEXT:    xvcvsxwsp v5, v5
-; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
-; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
-; CHECK-P8-NEXT:    xvcvsxwsp v4, v4
-; CHECK-P8-NEXT:    stvx v5, 0, r3
-; CHECK-P8-NEXT:    stvx v2, r3, r5
-; CHECK-P8-NEXT:    stvx v3, r3, r6
-; CHECK-P8-NEXT:    stvx v4, r3, r7
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    xvcvsxwsp vs3, vs3
+; CHECK-P8-NEXT:    xvcvsxwsp vs0, vs0
+; CHECK-P8-NEXT:    xvcvsxwsp vs1, vs1
+; CHECK-P8-NEXT:    xvcvsxwsp vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll b/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
index 65200298e3a1e..7af2946fb18c5 100644
--- a/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
@@ -4,9 +4,11 @@
 define void @VPKUDUM_unary(<2 x i64>* %A) {
 ; CHECK-LABEL: VPKUDUM_unary:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lvx 2, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 34, 0
 ; CHECK-NEXT:    vpkudum 2, 2, 2
-; CHECK-NEXT:    stvx 2, 0, 3
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    stxvd2x 0, 0, 3
 ; CHECK-NEXT:    blr
 entry:
 	%tmp = load <2 x i64>, <2 x i64>* %A
@@ -25,10 +27,13 @@ entry:
 define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) {
 ; CHECK-LABEL: VPKUDUM:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lvx 2, 0, 3
-; CHECK-NEXT:    lvx 3, 0, 4
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    lxvd2x 1, 0, 4
+; CHECK-NEXT:    xxswapd 34, 0
+; CHECK-NEXT:    xxswapd 35, 1
 ; CHECK-NEXT:    vpkudum 2, 3, 2
-; CHECK-NEXT:    stvx 2, 0, 3
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    stxvd2x 0, 0, 3
 ; CHECK-NEXT:    blr
 entry:
 	%tmp = load <2 x i64>, <2 x i64>* %A

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 96eebccf13b1c..90880c5773357 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -51,7 +51,6 @@ define <3 x float> @constrained_vector_fdiv_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
 ; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
 ; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
@@ -64,6 +63,8 @@ define <3 x float> @constrained_vector_fdiv_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
@@ -313,12 +314,13 @@ define <3 x float> @constrained_vector_frem_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 96
@@ -646,7 +648,6 @@ define <3 x float> @constrained_vector_fmul_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    addi 3, 3, .LCPI12_0 at toc@l
 ; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
 ; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
@@ -659,6 +660,8 @@ define <3 x float> @constrained_vector_fmul_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
@@ -807,7 +810,6 @@ define <3 x float> @constrained_vector_fadd_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    addi 3, 3, .LCPI17_0 at toc@l
 ; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
 ; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
@@ -820,6 +822,8 @@ define <3 x float> @constrained_vector_fadd_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
@@ -968,7 +972,6 @@ define <3 x float> @constrained_vector_fsub_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    addi 3, 3, .LCPI22_0 at toc@l
 ; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
 ; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
@@ -981,6 +984,8 @@ define <3 x float> @constrained_vector_fsub_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
@@ -1124,10 +1129,11 @@ define <3 x float> @constrained_vector_sqrt_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI27_0 at toc@ha
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI27_0 at toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxswapd 36, 3
 ; PC64LE-NEXT:    xssqrtsp 0, 0
 ; PC64LE-NEXT:    xssqrtsp 1, 1
 ; PC64LE-NEXT:    xssqrtsp 2, 2
@@ -1368,12 +1374,13 @@ define <3 x float> @constrained_vector_pow_v3f32(<3 x float> %x, <3 x float> %y)
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI32_0 at toc@l
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 96
@@ -1800,10 +1807,11 @@ define <3 x float> @constrained_vector_powi_v3f32(<3 x float> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI37_0 at toc@l
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 96
@@ -2184,11 +2192,12 @@ define <3 x float> @constrained_vector_sin_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI42_0 at toc@l
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
@@ -2534,11 +2543,12 @@ define <3 x float> @constrained_vector_cos_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI47_0 at toc@l
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
@@ -2884,11 +2894,12 @@ define <3 x float> @constrained_vector_exp_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI52_0 at toc@l
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
@@ -3234,11 +3245,12 @@ define <3 x float> @constrained_vector_exp2_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI57_0 at toc@l
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
@@ -3584,11 +3596,12 @@ define <3 x float> @constrained_vector_log_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI62_0 at toc@l
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
@@ -3934,11 +3947,12 @@ define <3 x float> @constrained_vector_log10_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI67_0 at toc@l
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
@@ -4284,11 +4298,12 @@ define <3 x float> @constrained_vector_log2_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI72_0 at toc@l
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
@@ -4554,10 +4569,11 @@ define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI77_0 at toc@ha
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI77_0 at toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxswapd 36, 3
 ; PC64LE-NEXT:    xsrdpic 0, 0
 ; PC64LE-NEXT:    xsrdpic 1, 1
 ; PC64LE-NEXT:    xsrdpic 2, 2
@@ -4772,11 +4788,12 @@ define <3 x float> @constrained_vector_nearbyint_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI82_0 at toc@l
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
@@ -5087,12 +5104,13 @@ define <3 x float> @constrained_vector_maxnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI87_0 at toc@l
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 96
@@ -5328,12 +5346,13 @@ define <3 x float> @constrained_vector_minnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI92_0 at toc@l
 ; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 0, 1
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 96
@@ -5544,7 +5563,6 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI97_0 at toc@ha
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI97_0 at toc@l
-; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
@@ -5552,13 +5570,15 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvdpsxws 1, 1
 ; PC64LE-NEXT:    xscvdpsxws 2, 2
 ; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwz 0, 4
-; PC64LE-NEXT:    mtfprwz 1, 5
-; PC64LE-NEXT:    mffprwz 4, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    mtvsrwz 36, 4
-; PC64LE-NEXT:    vperm 2, 4, 3, 2
+; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    mtfprwz 3, 5
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    mffprwz 3, 2
+; PC64LE-NEXT:    xxmrghw 34, 3, 1
+; PC64LE-NEXT:    mtvsrwz 36, 3
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32:
@@ -5814,15 +5834,16 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xscvdpsxws 1, 2
 ; PC64LE-NEXT:    addi 3, 3, .LCPI105_0 at toc@l
 ; PC64LE-NEXT:    xscvdpsxws 2, 3
-; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwz 0, 4
-; PC64LE-NEXT:    mtfprwz 1, 5
-; PC64LE-NEXT:    mffprwz 4, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    mtvsrwz 36, 4
-; PC64LE-NEXT:    vperm 2, 4, 3, 2
+; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    mtfprwz 3, 5
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    mffprwz 3, 2
+; PC64LE-NEXT:    xxmrghw 34, 3, 1
+; PC64LE-NEXT:    mtvsrwz 36, 3
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64:
@@ -6038,7 +6059,6 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI113_0 at toc@ha
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI113_0 at toc@l
-; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
@@ -6046,13 +6066,15 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvdpuxws 1, 1
 ; PC64LE-NEXT:    xscvdpuxws 2, 2
 ; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwz 0, 4
-; PC64LE-NEXT:    mtfprwz 1, 5
-; PC64LE-NEXT:    mffprwz 4, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    mtvsrwz 36, 4
-; PC64LE-NEXT:    vperm 2, 4, 3, 2
+; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    mtfprwz 3, 5
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    mffprwz 3, 2
+; PC64LE-NEXT:    xxmrghw 34, 3, 1
+; PC64LE-NEXT:    mtvsrwz 36, 3
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32:
@@ -6307,15 +6329,16 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xscvdpuxws 1, 2
 ; PC64LE-NEXT:    addi 3, 3, .LCPI121_0 at toc@l
 ; PC64LE-NEXT:    xscvdpuxws 2, 3
-; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwz 0, 4
-; PC64LE-NEXT:    mtfprwz 1, 5
-; PC64LE-NEXT:    mffprwz 4, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    mtvsrwz 36, 4
-; PC64LE-NEXT:    vperm 2, 4, 3, 2
+; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    mtfprwz 3, 5
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    mffprwz 3, 2
+; PC64LE-NEXT:    xxmrghw 34, 3, 1
+; PC64LE-NEXT:    mtvsrwz 36, 3
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64:
@@ -6521,10 +6544,11 @@ define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xsrsp 1, 2
 ; PC64LE-NEXT:    addi 3, 3, .LCPI129_0 at toc@l
 ; PC64LE-NEXT:    xsrsp 2, 3
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
@@ -6720,10 +6744,11 @@ define <3 x float> @constrained_vector_ceil_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI137_0 at toc@ha
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI137_0 at toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxswapd 36, 3
 ; PC64LE-NEXT:    xsrdpip 0, 0
 ; PC64LE-NEXT:    xsrdpip 1, 1
 ; PC64LE-NEXT:    xsrdpip 2, 2
@@ -6835,10 +6860,11 @@ define <3 x float> @constrained_vector_floor_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI141_0 at toc@ha
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI141_0 at toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxswapd 36, 3
 ; PC64LE-NEXT:    xsrdpim 0, 0
 ; PC64LE-NEXT:    xsrdpim 1, 1
 ; PC64LE-NEXT:    xsrdpim 2, 2
@@ -6949,10 +6975,11 @@ define <3 x float> @constrained_vector_round_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI145_0 at toc@ha
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI145_0 at toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxswapd 36, 3
 ; PC64LE-NEXT:    xsrdpi 0, 0
 ; PC64LE-NEXT:    xsrdpi 1, 1
 ; PC64LE-NEXT:    xsrdpi 2, 2
@@ -7064,10 +7091,11 @@ define <3 x float> @constrained_vector_trunc_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI149_0 at toc@ha
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI149_0 at toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxswapd 36, 3
 ; PC64LE-NEXT:    xsrdpiz 0, 0
 ; PC64LE-NEXT:    xsrdpiz 1, 1
 ; PC64LE-NEXT:    xsrdpiz 2, 2
@@ -7221,9 +7249,10 @@ define <2 x double> @constrained_vector_sitofp_v2f64_v2i16(<2 x i16> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    addis 3, 2, .LCPI155_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI155_0 at toc@l
-; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI155_1 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI155_1 at toc@l
+; PC64LE-NEXT:    xxswapd 35, 0
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    vperm 2, 2, 2, 3
 ; PC64LE-NEXT:    xxswapd 35, 0
@@ -7419,10 +7448,11 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
 ; PC64LE-NEXT:    addis 3, 2, .LCPI161_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI161_0 at toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    mffprwz 4, 0
 ; PC64LE-NEXT:    mffprwz 5, 1
 ; PC64LE-NEXT:    mtfprwa 0, 4
+; PC64LE-NEXT:    xxswapd 36, 3
 ; PC64LE-NEXT:    mtfprwa 1, 5
 ; PC64LE-NEXT:    mfvsrwz 4, 34
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
@@ -7502,14 +7532,15 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-NEXT:    mtfprd 1, 4
 ; PC64LE-NEXT:    addi 3, 6, .LCPI163_0 at toc@l
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
 ; PC64LE-NEXT:    mtfprd 2, 5
 ; PC64LE-NEXT:    xscvsxdsp 2, 2
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
@@ -7790,7 +7821,8 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i16(<2 x i16> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI173_0 at toc@ha
 ; PC64LE-NEXT:    xxlxor 36, 36, 36
 ; PC64LE-NEXT:    addi 3, 3, .LCPI173_0 at toc@l
-; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 0
 ; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    xvcvuxddp 34, 34
 ; PC64LE-NEXT:    blr
@@ -7982,10 +8014,11 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
 ; PC64LE-NEXT:    addis 3, 2, .LCPI179_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI179_0 at toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    mffprwz 4, 0
 ; PC64LE-NEXT:    mffprwz 5, 1
 ; PC64LE-NEXT:    mtfprwz 0, 4
+; PC64LE-NEXT:    xxswapd 36, 3
 ; PC64LE-NEXT:    mtfprwz 1, 5
 ; PC64LE-NEXT:    mfvsrwz 4, 34
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
@@ -8065,14 +8098,15 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-NEXT:    mtfprd 1, 4
 ; PC64LE-NEXT:    addi 3, 6, .LCPI181_0 at toc@l
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
-; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
 ; PC64LE-NEXT:    mtfprd 2, 5
 ; PC64LE-NEXT:    xscvuxdsp 2, 2
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    xxswapd 36, 2
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vector-ldst.ll b/llvm/test/CodeGen/PowerPC/vector-ldst.ll
index 0c75f67743563..f860fea56e7a1 100644
--- a/llvm/test/CodeGen/PowerPC/vector-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-ldst.ll
@@ -27,7 +27,8 @@ define dso_local <16 x i8> @ld_0_vector(i64 %ptr) {
 ;
 ; CHECK-P8-LE-LABEL: ld_0_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_0_vector:
@@ -56,7 +57,8 @@ define dso_local <16 x i8> @ld_unalign16_vector(i8* nocapture readonly %ptr) {
 ; CHECK-P8-LE-LABEL: ld_unalign16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    addi r3, r3, 1
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_unalign16_vector:
@@ -87,7 +89,8 @@ define dso_local <16 x i8> @ld_align16_vector(i8* nocapture readonly %ptr) {
 ; CHECK-P8-LE-LABEL: ld_align16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    addi r3, r3, 8
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_align16_vector:
@@ -120,7 +123,8 @@ define dso_local <16 x i8> @ld_unalign32_vector(i8* nocapture readonly %ptr) {
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, 1
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 34463
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_unalign32_vector:
@@ -154,7 +158,8 @@ define dso_local <16 x i8> @ld_align32_vector(i8* nocapture readonly %ptr) {
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, 1525
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 56600
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_align32_vector:
@@ -195,7 +200,8 @@ define dso_local <16 x i8> @ld_unalign64_vector(i8* nocapture readonly %ptr) {
 ; CHECK-P8-LE-NEXT:    rldic r4, r4, 35, 24
 ; CHECK-P8-LE-NEXT:    oris r4, r4, 54437
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 4097
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_unalign64_vector:
@@ -235,7 +241,8 @@ define dso_local <16 x i8> @ld_align64_vector(i8* nocapture readonly %ptr) {
 ; CHECK-P8-LE-NEXT:    lis r4, 3725
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-LE-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_align64_vector:
@@ -261,7 +268,8 @@ define dso_local <16 x i8> @ld_reg_vector(i8* nocapture readonly %ptr, i64 %off)
 ;
 ; CHECK-P8-LE-LABEL: ld_reg_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_reg_vector:
@@ -286,7 +294,8 @@ define dso_local <16 x i8> @ld_or_vector(i64 %ptr, i8 zeroext %off) {
 ; CHECK-P8-LE-LABEL: ld_or_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    or r3, r4, r3
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_or_vector:
@@ -313,7 +322,8 @@ define dso_local <16 x i8> @ld_or2_vector(i64 %ptr, i8 zeroext %off) {
 ; CHECK-P8-LE-LABEL: ld_or2_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 51
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_or2_vector:
@@ -341,7 +351,8 @@ define dso_local <16 x i8> @ld_not_disjoint16_vector(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_not_disjoint16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 6
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_not_disjoint16_vector:
@@ -375,7 +386,8 @@ define dso_local <16 x i8> @ld_disjoint_unalign16_vector(i64 %ptr) {
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 51
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 6
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_unalign16_vector:
@@ -411,7 +423,8 @@ define dso_local <16 x i8> @ld_disjoint_align16_vector(i64 %ptr) {
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 51
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 24
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align16_vector:
@@ -441,7 +454,8 @@ define dso_local <16 x i8> @ld_not_disjoint32_vector(i64 %ptr) {
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 34463
 ; CHECK-P8-LE-NEXT:    oris r3, r3, 1
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_not_disjoint32_vector:
@@ -478,7 +492,8 @@ define dso_local <16 x i8> @ld_disjoint_unalign32_vector(i64 %ptr) {
 ; CHECK-P8-LE-NEXT:    lis r4, 1
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 43
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 34463
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_unalign32_vector:
@@ -520,7 +535,8 @@ define dso_local <16 x i8> @ld_disjoint_align32_vector(i64 %ptr) {
 ; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
 ; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_vector:
@@ -567,7 +583,8 @@ define dso_local <16 x i8> @ld_not_disjoint64_vector(i64 %ptr) {
 ; CHECK-P8-LE-NEXT:    oris r4, r4, 54437
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 4097
 ; CHECK-P8-LE-NEXT:    or r3, r3, r4
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_not_disjoint64_vector:
@@ -614,7 +631,8 @@ define dso_local <16 x i8> @ld_disjoint_unalign64_vector(i64 %ptr) {
 ; CHECK-P8-LE-NEXT:    rldic r4, r4, 35, 24
 ; CHECK-P8-LE-NEXT:    oris r4, r4, 54437
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 4097
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_unalign64_vector:
@@ -659,7 +677,8 @@ define dso_local <16 x i8> @ld_disjoint_align64_vector(i64 %ptr) {
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 23
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-LE-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-LE-NEXT:    lvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align64_vector:
@@ -689,7 +708,8 @@ define dso_local <16 x i8> @ld_cst_unalign16_vector() {
 ; CHECK-P8-LE-LABEL: ld_cst_unalign16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    li r3, 255
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_cst_unalign16_vector:
@@ -712,7 +732,8 @@ define dso_local <16 x i8> @ld_cst_align16_vector() {
 ; CHECK-P8-LE-LABEL: ld_cst_align16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    li r3, 4080
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_cst_align16_vector:
@@ -744,7 +765,8 @@ define dso_local <16 x i8> @ld_cst_unalign32_vector() {
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r3, 1
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 34463
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_cst_unalign32_vector:
@@ -777,7 +799,8 @@ define dso_local <16 x i8> @ld_cst_align32_vector() {
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r3, 152
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 38428
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_cst_align32_vector:
@@ -816,7 +839,8 @@ define dso_local <16 x i8> @ld_cst_unalign64_vector() {
 ; CHECK-P8-LE-NEXT:    rldic r3, r3, 35, 24
 ; CHECK-P8-LE-NEXT:    oris r3, r3, 54437
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 4097
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_cst_unalign64_vector:
@@ -854,7 +878,8 @@ define dso_local <16 x i8> @ld_cst_align64_vector() {
 ; CHECK-P8-LE-NEXT:    lis r3, 3725
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 19025
 ; CHECK-P8-LE-NEXT:    rldic r3, r3, 12, 24
-; CHECK-P8-LE-NEXT:    lvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_cst_align64_vector:
@@ -878,7 +903,8 @@ define dso_local void @st_0_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_0_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_0_vector:
@@ -906,8 +932,9 @@ define dso_local void @st_unalign16_vector(i8* nocapture %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_unalign16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    addi r3, r3, 1
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_unalign16_vector:
@@ -937,8 +964,9 @@ define dso_local void @st_align16_vector(i8* nocapture %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_align16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    addi r3, r3, 8
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_align16_vector:
@@ -969,9 +997,10 @@ define dso_local void @st_unalign32_vector(i8* nocapture %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_unalign32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r4, 1
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 34463
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_unalign32_vector:
@@ -1003,9 +1032,10 @@ define dso_local void @st_align32_vector(i8* nocapture %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_align32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r4, 1525
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 56600
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_align32_vector:
@@ -1043,10 +1073,11 @@ define dso_local void @st_unalign64_vector(i8* nocapture %ptr, <16 x i8> %str) {
 ; CHECK-P8-LE-LABEL: st_unalign64_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    li r4, 29
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldic r4, r4, 35, 24
 ; CHECK-P8-LE-NEXT:    oris r4, r4, 54437
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 4097
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_unalign64_vector:
@@ -1084,9 +1115,10 @@ define dso_local void @st_align64_vector(i8* nocapture %ptr, <16 x i8> %str) {
 ; CHECK-P8-LE-LABEL: st_align64_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, 3725
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-LE-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_align64_vector:
@@ -1112,7 +1144,8 @@ define dso_local void @st_reg_vector(i8* nocapture %ptr, i64 %off, <16 x i8> %st
 ;
 ; CHECK-P8-LE-LABEL: st_reg_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_reg_vector:
@@ -1136,8 +1169,9 @@ define dso_local void @st_or1_vector(i64 %ptr, i8 zeroext %off, <16 x i8> %str)
 ;
 ; CHECK-P8-LE-LABEL: st_or1_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    or r3, r4, r3
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_or1_vector:
@@ -1163,8 +1197,9 @@ define dso_local void @st_or2_vector(i64 %ptr, i8 zeroext %off, <16 x i8> %str)
 ;
 ; CHECK-P8-LE-LABEL: st_or2_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 51
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_or2_vector:
@@ -1191,8 +1226,9 @@ define dso_local void @st_not_disjoint16_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_not_disjoint16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 6
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_not_disjoint16_vector:
@@ -1224,9 +1260,10 @@ define dso_local void @st_disjoint_unalign16_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_disjoint_unalign16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 51
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 6
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_disjoint_unalign16_vector:
@@ -1260,9 +1297,10 @@ define dso_local void @st_disjoint_align16_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_disjoint_align16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 51
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 24
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_disjoint_align16_vector:
@@ -1290,9 +1328,10 @@ define dso_local void @st_not_disjoint32_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_not_disjoint32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 34463
 ; CHECK-P8-LE-NEXT:    oris r3, r3, 1
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_not_disjoint32_vector:
@@ -1326,10 +1365,11 @@ define dso_local void @st_disjoint_unalign32_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_disjoint_unalign32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r4, 1
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 43
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 34463
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_disjoint_unalign32_vector:
@@ -1367,11 +1407,12 @@ define dso_local void @st_disjoint_align32_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_disjoint_align32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
 ; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
 ; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_disjoint_align32_vector:
@@ -1414,11 +1455,12 @@ define dso_local void @st_not_disjoint64_vector(i64 %ptr, <16 x i8> %str) {
 ; CHECK-P8-LE-LABEL: st_not_disjoint64_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    li r4, 29
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldic r4, r4, 35, 24
 ; CHECK-P8-LE-NEXT:    oris r4, r4, 54437
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 4097
 ; CHECK-P8-LE-NEXT:    or r3, r3, r4
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_not_disjoint64_vector:
@@ -1461,11 +1503,12 @@ define dso_local void @st_disjoint_unalign64_vector(i64 %ptr, <16 x i8> %str) {
 ; CHECK-P8-LE-LABEL: st_disjoint_unalign64_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    li r4, 29
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 23
 ; CHECK-P8-LE-NEXT:    rldic r4, r4, 35, 24
 ; CHECK-P8-LE-NEXT:    oris r4, r4, 54437
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 4097
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_disjoint_unalign64_vector:
@@ -1507,10 +1550,11 @@ define dso_local void @st_disjoint_align64_vector(i64 %ptr, <16 x i8> %str) {
 ; CHECK-P8-LE-LABEL: st_disjoint_align64_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, 3725
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 23
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-LE-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-LE-NEXT:    stvx v2, r3, r4
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_disjoint_align64_vector:
@@ -1539,8 +1583,9 @@ define dso_local void @st_cst_unalign16_vector(<16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_cst_unalign16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    li r3, 255
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_cst_unalign16_vector:
@@ -1562,8 +1607,9 @@ define dso_local void @st_cst_align16_vector(<16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_cst_align16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    li r3, 4080
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_cst_align16_vector:
@@ -1593,9 +1639,10 @@ define dso_local void @st_cst_unalign32_vector(<16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_cst_unalign32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r3, 1
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 34463
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_cst_unalign32_vector:
@@ -1626,9 +1673,10 @@ define dso_local void @st_cst_align32_vector(<16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_cst_align32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r3, 152
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 38428
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_cst_align32_vector:
@@ -1664,10 +1712,11 @@ define dso_local void @st_cst_unalign64_vector(<16 x i8> %str) {
 ; CHECK-P8-LE-LABEL: st_cst_unalign64_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    li r3, 29
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldic r3, r3, 35, 24
 ; CHECK-P8-LE-NEXT:    oris r3, r3, 54437
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 4097
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_cst_unalign64_vector:
@@ -1703,9 +1752,10 @@ define dso_local void @st_cst_align64_vector(<16 x i8> %str) {
 ; CHECK-P8-LE-LABEL: st_cst_align64_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r3, 3725
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 19025
 ; CHECK-P8-LE-NEXT:    rldic r3, r3, 12, 24
-; CHECK-P8-LE-NEXT:    stvx v2, 0, r3
+; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_cst_align64_vector:

diff  --git a/llvm/test/CodeGen/PowerPC/vector-rotates.ll b/llvm/test/CodeGen/PowerPC/vector-rotates.ll
index b5064501702dd..2de8804ba8e24 100644
--- a/llvm/test/CodeGen/PowerPC/vector-rotates.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-rotates.ll
@@ -11,7 +11,8 @@ define <16 x i8> @rotl_v16i8(<16 x i8> %a) {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd vs35, vs0
 ; CHECK-P8-NEXT:    vrlb v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -34,7 +35,8 @@ define <8 x i16> @rotl_v8i16(<8 x i16> %a) {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd vs35, vs0
 ; CHECK-P8-NEXT:    vrlh v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -57,7 +59,8 @@ define <4 x i32> @rotl_v4i32_0(<4 x i32> %a) {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-P8-NEXT:    lvx v3, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd vs35, vs0
 ; CHECK-P8-NEXT:    vrlw v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vselect-constants.ll b/llvm/test/CodeGen/PowerPC/vselect-constants.ll
index df939d066d639..d2d331b0a078c 100644
--- a/llvm/test/CodeGen/PowerPC/vselect-constants.ll
+++ b/llvm/test/CodeGen/PowerPC/vselect-constants.ll
@@ -16,11 +16,13 @@ define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) {
 ; CHECK-NEXT:    addis 4, 2, .LCPI0_1 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
 ; CHECK-NEXT:    addi 4, 4, .LCPI0_1 at toc@l
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    lxvd2x 1, 0, 4
 ; CHECK-NEXT:    vsubuwm 3, 4, 3
-; CHECK-NEXT:    lvx 4, 0, 4
 ; CHECK-NEXT:    vslw 2, 2, 3
+; CHECK-NEXT:    xxswapd 36, 1
 ; CHECK-NEXT:    vsraw 2, 2, 3
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    xxsel 34, 36, 35, 34
 ; CHECK-NEXT:    blr
   %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
@@ -30,13 +32,15 @@ define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) {
 define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: cmp_sel_C1_or_C2_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
 ; CHECK-NEXT:    addis 4, 2, .LCPI1_1 at toc@ha
+; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
 ; CHECK-NEXT:    addi 4, 4, .LCPI1_1 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
-; CHECK-NEXT:    lvx 4, 0, 4
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    lxvd2x 1, 0, 4
+; CHECK-NEXT:    xxswapd 35, 0
+; CHECK-NEXT:    xxswapd 36, 1
 ; CHECK-NEXT:    xxsel 34, 36, 35, 34
 ; CHECK-NEXT:    blr
   %cond = icmp eq <4 x i32> %x, %y
@@ -47,12 +51,13 @@ define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) {
 ; CHECK-LABEL: sel_Cplus1_or_C_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vspltisw 3, 1
 ; CHECK-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
+; CHECK-NEXT:    vspltisw 3, 1
 ; CHECK-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    xxland 34, 34, 35
-; CHECK-NEXT:    lvx 3, 0, 3
-; CHECK-NEXT:    vadduwm 2, 2, 3
+; CHECK-NEXT:    xxswapd 36, 0
+; CHECK-NEXT:    vadduwm 2, 2, 4
 ; CHECK-NEXT:    blr
   %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
   ret <4 x i32> %add
@@ -61,10 +66,11 @@ define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) {
 define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: cmp_sel_Cplus1_or_C_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
+; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vsubuwm 2, 3, 2
 ; CHECK-NEXT:    blr
   %cond = icmp eq <4 x i32> %x, %y
@@ -79,10 +85,11 @@ define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) {
 ; CHECK-NEXT:    vspltisw 4, 15
 ; CHECK-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    vsubuwm 3, 4, 3
 ; CHECK-NEXT:    vslw 2, 2, 3
 ; CHECK-NEXT:    vsraw 2, 2, 3
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduwm 2, 2, 3
 ; CHECK-NEXT:    blr
   %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
@@ -92,10 +99,11 @@ define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) {
 define <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: cmp_sel_Cminus1_or_C_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
+; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addi 3, 3, .LCPI5_0 at toc@l
-; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vadduwm 2, 2, 3
 ; CHECK-NEXT:    blr
   %cond = icmp eq <4 x i32> %x, %y

diff  --git a/llvm/test/CodeGen/PowerPC/vsx-ldst.ll b/llvm/test/CodeGen/PowerPC/vsx-ldst.ll
index 0bbc633363a76..0f3fadc32507e 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-ldst.ll
@@ -14,10 +14,8 @@
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \
 ; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
-; RUN: grep lxvd2x < %t | count 3
-; RUN: grep lvx < %t | count 3
-; RUN: grep stxvd2x < %t | count 3
-; RUN: grep stvx < %t | count 3
+; RUN: grep lxvd2x < %t | count 6
+; RUN: grep stxvd2x < %t | count 6
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \
 ; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s > %t

diff  --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 603a1bb611b31..18e621de905eb 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -1038,7 +1038,8 @@ define <4 x float> @test32(<4 x float>* %a) {
 ;
 ; CHECK-LE-LABEL: test32:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    lvx v2, 0, r3
+; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-NEXT:    blr
   %v = load <4 x float>, <4 x float>* %a, align 16
   ret <4 x float> %v
@@ -1065,7 +1066,8 @@ define void @test33(<4 x float>* %a, <4 x float> %b) {
 ;
 ; CHECK-LE-LABEL: test33:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    stvx v2, 0, r3
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    blr
   store <4 x float> %b, <4 x float>* %a, align 16
   ret void
@@ -1159,7 +1161,8 @@ define <4 x i32> @test34(<4 x i32>* %a) {
 ;
 ; CHECK-LE-LABEL: test34:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    lvx v2, 0, r3
+; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-NEXT:    blr
   %v = load <4 x i32>, <4 x i32>* %a, align 16
   ret <4 x i32> %v
@@ -1186,7 +1189,8 @@ define void @test35(<4 x i32>* %a, <4 x i32> %b) {
 ;
 ; CHECK-LE-LABEL: test35:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    stvx v2, 0, r3
+; CHECK-LE-NEXT:    xxswapd vs0, v2
+; CHECK-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    blr
   store <4 x i32> %b, <4 x i32>* %a, align 16
   ret void
@@ -2282,9 +2286,10 @@ define <2 x double> @test69(<2 x i16> %a) {
 ; CHECK-LE:       # %bb.0:
 ; CHECK-LE-NEXT:    addis r3, r2, .LCPI63_0 at toc@ha
 ; CHECK-LE-NEXT:    addi r3, r3, .LCPI63_0 at toc@l
-; CHECK-LE-NEXT:    lvx v3, 0, r3
+; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    addis r3, r2, .LCPI63_1 at toc@ha
 ; CHECK-LE-NEXT:    addi r3, r3, .LCPI63_1 at toc@l
+; CHECK-LE-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    xxswapd v3, vs0
@@ -2362,9 +2367,10 @@ define <2 x double> @test70(<2 x i8> %a) {
 ; CHECK-LE:       # %bb.0:
 ; CHECK-LE-NEXT:    addis r3, r2, .LCPI64_0 at toc@ha
 ; CHECK-LE-NEXT:    addi r3, r3, .LCPI64_0 at toc@l
-; CHECK-LE-NEXT:    lvx v3, 0, r3
+; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    addis r3, r2, .LCPI64_1 at toc@ha
 ; CHECK-LE-NEXT:    addi r3, r3, .LCPI64_1 at toc@l
+; CHECK-LE-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    xxswapd v3, vs0
@@ -2419,11 +2425,12 @@ define <2 x i32> @test80(i32 %v) {
 ;
 ; CHECK-LE-LABEL: test80:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-NEXT:    addis r4, r2, .LCPI65_0 at toc@ha
-; CHECK-LE-NEXT:    addi r3, r4, .LCPI65_0 at toc@l
-; CHECK-LE-NEXT:    xxspltw v2, vs0, 1
-; CHECK-LE-NEXT:    lvx v3, 0, r3
+; CHECK-LE-NEXT:    mtfprwz f1, r3
+; CHECK-LE-NEXT:    addi r4, r4, .LCPI65_0 at toc@l
+; CHECK-LE-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-NEXT:    xxspltw v2, vs1, 1
+; CHECK-LE-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-NEXT:    vadduwm v2, v2, v3
 ; CHECK-LE-NEXT:    blr
   %b1 = insertelement <2 x i32> undef, i32 %v, i32 0

diff  --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index ccefc5822c98d..929b65dc76d40 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -94,8 +94,7 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
 define double @teste0(<2 x double>* %p1) {
 ; CHECK-LABEL: teste0:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lxvd2x vs1, 0, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT:    lfd f1, 0(r3)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: teste0:
@@ -105,8 +104,7 @@ define double @teste0(<2 x double>* %p1) {
 ;
 ; CHECK-P9-VECTOR-LABEL: teste0:
 ; CHECK-P9-VECTOR:       # %bb.0:
-; CHECK-P9-VECTOR-NEXT:    lxvd2x vs1, 0, r3
-; CHECK-P9-VECTOR-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-P9-VECTOR-NEXT:    lfd f1, 0(r3)
 ; CHECK-P9-VECTOR-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: teste0:
@@ -123,9 +121,7 @@ define double @teste0(<2 x double>* %p1) {
 define double @teste1(<2 x double>* %p1) {
 ; CHECK-LABEL: teste1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-NEXT:    xxswapd vs1, vs0
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT:    lfd f1, 8(r3)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: teste1:
@@ -135,9 +131,7 @@ define double @teste1(<2 x double>* %p1) {
 ;
 ; CHECK-P9-VECTOR-LABEL: teste1:
 ; CHECK-P9-VECTOR:       # %bb.0:
-; CHECK-P9-VECTOR-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P9-VECTOR-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-VECTOR-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-P9-VECTOR-NEXT:    lfd f1, 8(r3)
 ; CHECK-P9-VECTOR-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: teste1:

diff  --git a/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll
index 35d181ccb24b2..a9415f400a45e 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll
@@ -12,8 +12,7 @@
 define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
 ; CHECK-LABEL: test00:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxspltd 34, 0, 0
+; CHECK-NEXT:    lxvdsx 34, 0, 3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test00:
@@ -106,8 +105,8 @@ define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
 define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
 ; CHECK-LABEL: test11:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxspltd 34, 0, 1
+; CHECK-NEXT:    addi 3, 3, 8
+; CHECK-NEXT:    lxvdsx 34, 0, 3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test11:
@@ -212,8 +211,7 @@ define <2 x double> @test21(<2 x double>* %p1, <2 x double>* %p2) {
 define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
 ; CHECK-LABEL: test22:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lxvd2x 0, 0, 4
-; CHECK-NEXT:    xxspltd 34, 0, 0
+; CHECK-NEXT:    lxvdsx 34, 0, 4
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test22:
@@ -306,8 +304,8 @@ define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
 define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
 ; CHECK-LABEL: test33:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lxvd2x 0, 0, 4
-; CHECK-NEXT:    xxspltd 34, 0, 1
+; CHECK-NEXT:    addi 3, 4, 8
+; CHECK-NEXT:    lxvdsx 34, 0, 3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test33:


        


More information about the llvm-commits mailing list