[llvm] 766ca2c - [PowerPC] Add missed VSX shuffles instead of Altivec ones

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 14 08:12:02 PDT 2022


Author: Nemanja Ivanovic
Date: 2022-03-14T10:11:54-05:00
New Revision: 766ca2c59e21829b5c552ed4cf9b2dea1824b234

URL: https://github.com/llvm/llvm-project/commit/766ca2c59e21829b5c552ed4cf9b2dea1824b234
DIFF: https://github.com/llvm/llvm-project/commit/766ca2c59e21829b5c552ed4cf9b2dea1824b234.diff

LOG: [PowerPC] Add missed VSX shuffles instead of Altivec ones

VSX introduced some permute instructions that are direct
replacements for Altivec ones except they can target all
the VSX registers. We have added code generation for most
of these but somehow missed the low/hi word merges (XXMRG[LH]W).
This caused some additional spills on some large
computationally intensive code.

This patch simply adds the missed patterns.

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
    llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
    llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
    llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
    llvm/test/CodeGen/PowerPC/fp-strict-round.ll
    llvm/test/CodeGen/PowerPC/load-and-splat.ll
    llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
    llvm/test/CodeGen/PowerPC/perfect-shuffle.ll
    llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
    llvm/test/CodeGen/PowerPC/pr25080.ll
    llvm/test/CodeGen/PowerPC/pr27078.ll
    llvm/test/CodeGen/PowerPC/pr47916.ll
    llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
    llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
    llvm/test/CodeGen/PowerPC/test-vector-insert.ll
    llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
    llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
    llvm/test/CodeGen/PowerPC/vec-trunc2.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
    llvm/test/CodeGen/PowerPC/vec_int_ext.ll
    llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
    llvm/test/CodeGen/PowerPC/vsx.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index d33593365691f..750e4129e8e34 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2900,6 +2900,27 @@ def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (XSMSUBMDP $A, $B, $C)>;
 def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (XSNMADDMDP $A, $B, $C)>;
 def : Pat<(int_ppc_fre f64:$A), (XSREDP $A)>;
 def : Pat<(int_ppc_frsqrte vsfrc:$XB), (XSRSQRTEDP $XB)>;
+
+// XXMRG[LH]W is a direct replacement for VMRG[LH]W respectively.
+// Prefer the VSX form for greater register range.
+def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
+        (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vA, VSRC),
+                                   (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
+        (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vA, VSRC),
+                                   (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def:Pat<(vmrglw_shuffle v16i8:$vA, v16i8:$vB),
+        (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vA, VSRC),
+                                   (COPY_TO_REGCLASS $vB, VSRC)), VRRC)>;
+def:Pat<(vmrghw_shuffle v16i8:$vA, v16i8:$vB),
+        (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vA, VSRC),
+                                   (COPY_TO_REGCLASS $vB, VSRC)), VRRC)>;
+def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vB, VSRC),
+                                   (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vB, VSRC),
+                                   (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
 } // HasVSX
 
 // Any big endian VSX subtarget.

diff  --git a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
index 14837771762d8..8586eb351ceff 100644
--- a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
@@ -1465,16 +1465,16 @@ entry:
 define <4 x float> @testSameVecEl1LE(<4 x float> %a) {
 ; CHECK-64-LABEL: testSameVecEl1LE:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    xxswapd 35, 34
-; CHECK-64-NEXT:    vmrghw 2, 2, 3
-; CHECK-64-NEXT:    vmrghw 2, 2, 3
+; CHECK-64-NEXT:    xxswapd 0, 34
+; CHECK-64-NEXT:    xxmrghw 1, 34, 0
+; CHECK-64-NEXT:    xxmrghw 34, 1, 0
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: testSameVecEl1LE:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    xxswapd 35, 34
-; CHECK-32-NEXT:    vmrghw 2, 2, 3
-; CHECK-32-NEXT:    vmrghw 2, 2, 3
+; CHECK-32-NEXT:    xxswapd 0, 34
+; CHECK-32-NEXT:    xxmrghw 1, 34, 0
+; CHECK-32-NEXT:    xxmrghw 34, 1, 0
 ; CHECK-32-NEXT:    blr
 entry:
   %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>

diff  --git a/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll b/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
index 65e2b7ef52945..8c9e7da82af61 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
@@ -29,11 +29,11 @@ define void @test_aix_splatimm(i32 %arg, i32 %arg1, i32 %arg2) {
 ; CHECK-AIX-NEXT:    lxvw4x 34, 0, 4
 ; CHECK-AIX-NEXT:    lxvw4x 35, 0, 3
 ; CHECK-AIX-NEXT:    addi 3, 1, -16
-; CHECK-AIX-NEXT:    vmrghh 3, 2, 3
-; CHECK-AIX-NEXT:    vsplth 4, 2, 0
-; CHECK-AIX-NEXT:    vmrghw 3, 3, 4
 ; CHECK-AIX-NEXT:    lxvw4x 36, 0, 3
+; CHECK-AIX-NEXT:    vmrghh 3, 2, 3
+; CHECK-AIX-NEXT:    vsplth 5, 2, 0
 ; CHECK-AIX-NEXT:    vmrghh 2, 4, 2
+; CHECK-AIX-NEXT:    xxmrghw 35, 35, 37
 ; CHECK-AIX-NEXT:    xxswapd 0, 35
 ; CHECK-AIX-NEXT:    xxsldwi 34, 0, 34, 2
 ; CHECK-AIX-NEXT:    vsplth 3, 2, 1
@@ -47,26 +47,26 @@ define void @test_aix_splatimm(i32 %arg, i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT:  # %bb.1: # %bb3
 ; CHECK-NEXT:    srwi 4, 4, 16
 ; CHECK-NEXT:    srwi 5, 5, 16
-; CHECK-NEXT:    slwi 3, 3, 8
 ; CHECK-NEXT:    mullw 4, 5, 4
-; CHECK-NEXT:    li 5, 0
-; CHECK-NEXT:    neg 3, 3
-; CHECK-NEXT:    mtvsrd 34, 5
 ; CHECK-NEXT:    lwz 5, 0(3)
+; CHECK-NEXT:    slwi 3, 3, 8
+; CHECK-NEXT:    neg 3, 3
+; CHECK-NEXT:    srwi 5, 5, 1
 ; CHECK-NEXT:    mtvsrd 35, 3
-; CHECK-NEXT:    srwi 3, 5, 1
-; CHECK-NEXT:    vsplth 4, 2, 3
-; CHECK-NEXT:    mullw 3, 4, 3
+; CHECK-NEXT:    mullw 4, 4, 5
+; CHECK-NEXT:    li 5, 0
+; CHECK-NEXT:    mtvsrd 34, 5
 ; CHECK-NEXT:    vmrghh 3, 3, 2
-; CHECK-NEXT:    neg 3, 3
-; CHECK-NEXT:    mtvsrd 37, 3
+; CHECK-NEXT:    neg 3, 4
+; CHECK-NEXT:    mtvsrd 36, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; CHECK-NEXT:    vmrglw 3, 4, 3
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-NEXT:    vmrghh 2, 2, 5
-; CHECK-NEXT:    lvx 4, 0, 3
+; CHECK-NEXT:    vmrghh 4, 2, 4
+; CHECK-NEXT:    vsplth 2, 2, 3
+; CHECK-NEXT:    xxmrglw 34, 34, 35
+; CHECK-NEXT:    lvx 3, 0, 3
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    vperm 2, 2, 3, 4
+; CHECK-NEXT:    vperm 2, 4, 2, 3
 ; CHECK-NEXT:    vsplth 3, 2, 6
 ; CHECK-NEXT:    vsplth 2, 2, 3
 ; CHECK-NEXT:    stvx 3, 0, 3

diff  --git a/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
index 11325b55e05f8..92e888a996520 100644
--- a/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
@@ -59,9 +59,9 @@ define <2 x i64> @buildl(i64 %a) {
 ; CHECK-NEXT:    stw 3, -32(1)
 ; CHECK-NEXT:    addi 3, 1, -16
 ; CHECK-NEXT:    addi 4, 1, -32
-; CHECK-NEXT:    lxvw4x 34, 0, 3
-; CHECK-NEXT:    lxvw4x 35, 0, 4
-; CHECK-NEXT:    vmrghw 2, 3, 2
+; CHECK-NEXT:    lxvw4x 0, 0, 3
+; CHECK-NEXT:    lxvw4x 1, 0, 4
+; CHECK-NEXT:    xxmrghw 34, 1, 0
 ; CHECK-NEXT:    xxswapd 0, 34
 ; CHECK-NEXT:    xxsldwi 34, 0, 34, 2
 ; CHECK-NEXT:    blr
@@ -90,7 +90,7 @@ entry:
 define <2 x double> @buildd() {
 ; CHECK-LABEL: buildd:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lwz 3, L..C0(2)
+; CHECK-NEXT:    lwz 3, L..C0(2) # @d
 ; CHECK-NEXT:    lxvdsx 34, 0, 3
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 1c55f4e43d1a7..97dffac2022c3 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -274,17 +274,17 @@ entry:
 define dso_local <16 x i8> @testmrghw(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: testmrghw:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vmrghw v2, v3, v2
+; CHECK-P8-NEXT:    xxmrghw v2, v3, v2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testmrghw:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    vmrghw v2, v3, v2
+; CHECK-P9-NEXT:    xxmrghw v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P9-BE-LABEL: testmrghw:
 ; CHECK-P9-BE:       # %bb.0: # %entry
-; CHECK-P9-BE-NEXT:    vmrglw v2, v2, v3
+; CHECK-P9-BE-NEXT:    xxmrglw v2, v2, v3
 ; CHECK-P9-BE-NEXT:    blr
 ;
 ; CHECK-NOVSX-LABEL: testmrghw:
@@ -294,7 +294,7 @@ define dso_local <16 x i8> @testmrghw(<16 x i8> %a, <16 x i8> %b) local_unnamed_
 ;
 ; CHECK-P7-LABEL: testmrghw:
 ; CHECK-P7:       # %bb.0: # %entry
-; CHECK-P7-NEXT:    vmrghw v2, v3, v2
+; CHECK-P7-NEXT:    xxmrghw v2, v3, v2
 ; CHECK-P7-NEXT:    blr
 entry:
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15, i32 28, i32 29, i32 30, i32 31>
@@ -303,17 +303,17 @@ entry:
 define dso_local <16 x i8> @testmrghw2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: testmrghw2:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vmrghw v2, v2, v3
+; CHECK-P8-NEXT:    xxmrghw v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testmrghw2:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    vmrghw v2, v2, v3
+; CHECK-P9-NEXT:    xxmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P9-BE-LABEL: testmrghw2:
 ; CHECK-P9-BE:       # %bb.0: # %entry
-; CHECK-P9-BE-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-BE-NEXT:    xxmrglw v2, v3, v2
 ; CHECK-P9-BE-NEXT:    blr
 ;
 ; CHECK-NOVSX-LABEL: testmrghw2:
@@ -338,17 +338,17 @@ entry:
 define dso_local <16 x i8> @testmrglw(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: testmrglw:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
+; CHECK-P8-NEXT:    xxmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testmrglw:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    xxmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P9-BE-LABEL: testmrglw:
 ; CHECK-P9-BE:       # %bb.0: # %entry
-; CHECK-P9-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-P9-BE-NEXT:    xxmrghw v2, v2, v3
 ; CHECK-P9-BE-NEXT:    blr
 ;
 ; CHECK-NOVSX-LABEL: testmrglw:
@@ -358,7 +358,7 @@ define dso_local <16 x i8> @testmrglw(<16 x i8> %a, <16 x i8> %b) local_unnamed_
 ;
 ; CHECK-P7-LABEL: testmrglw:
 ; CHECK-P7:       # %bb.0: # %entry
-; CHECK-P7-NEXT:    vmrglw v2, v3, v2
+; CHECK-P7-NEXT:    xxmrglw v2, v3, v2
 ; CHECK-P7-NEXT:    blr
 entry:
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23>
@@ -367,17 +367,17 @@ entry:
 define dso_local <16 x i8> @testmrglw2(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: testmrglw2:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vmrglw v2, v2, v3
+; CHECK-P8-NEXT:    xxmrglw v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testmrglw2:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    vmrglw v2, v2, v3
+; CHECK-P9-NEXT:    xxmrglw v2, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P9-BE-LABEL: testmrglw2:
 ; CHECK-P9-BE:       # %bb.0: # %entry
-; CHECK-P9-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-P9-BE-NEXT:    xxmrghw v2, v3, v2
 ; CHECK-P9-BE-NEXT:    blr
 ;
 ; CHECK-NOVSX-LABEL: testmrglw2:
@@ -552,8 +552,8 @@ define dso_local <16 x i8> @no_crash_bitcast(i32 %a) {
 ;
 ; CHECK-P9-BE-LABEL: no_crash_bitcast:
 ; CHECK-P9-BE:       # %bb.0: # %entry
-; CHECK-P9-BE-NEXT:    mtvsrwz v2, r3
-; CHECK-P9-BE-NEXT:    vmrghw v2, v2, v2
+; CHECK-P9-BE-NEXT:    mtfprwz f0, r3
+; CHECK-P9-BE-NEXT:    xxmrghw v2, vs0, vs0
 ; CHECK-P9-BE-NEXT:    blr
 ;
 ; CHECK-NOVSX-LABEL: no_crash_bitcast:

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
index a88c6201cf9af..0ca0cb9ae94d2 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
@@ -487,12 +487,12 @@ define <2 x float> @fptrunc_v2f32_v2f64(<2 x double> %vf1) {
 ;
 ; P9-LABEL: fptrunc_v2f32_v2f64:
 ; P9:       # %bb.0:
+; P9-NEXT:    xxswapd vs1, v2
 ; P9-NEXT:    xsrsp f0, v2
-; P9-NEXT:    xscvdpspn v3, f0
-; P9-NEXT:    xxswapd vs0, v2
-; P9-NEXT:    xsrsp f0, f0
-; P9-NEXT:    xscvdpspn v2, f0
-; P9-NEXT:    vmrghw v2, v3, v2
+; P9-NEXT:    xsrsp f1, f1
+; P9-NEXT:    xscvdpspn vs0, f0
+; P9-NEXT:    xscvdpspn vs1, f1
+; P9-NEXT:    xxmrghw v2, vs0, vs1
 ; P9-NEXT:    blr
   %res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
                         <2 x double> %vf1,

diff  --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index a72472d53502b..88dcfe3b78fe1 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -212,9 +212,9 @@ define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a
 ; P9-AIX32-NEXT:    lwz r4, 28(r4)
 ; P9-AIX32-NEXT:    stw r4, -16(r1)
 ; P9-AIX32-NEXT:    stw r5, -32(r1)
-; P9-AIX32-NEXT:    lxv v2, -16(r1)
-; P9-AIX32-NEXT:    lxv v3, -32(r1)
-; P9-AIX32-NEXT:    vmrghw v2, v3, v2
+; P9-AIX32-NEXT:    lxv vs0, -16(r1)
+; P9-AIX32-NEXT:    lxv vs1, -32(r1)
+; P9-AIX32-NEXT:    xxmrghw v2, vs1, vs0
 ; P9-AIX32-NEXT:    xxswapd vs0, v2
 ; P9-AIX32-NEXT:    xxsldwi vs0, vs0, v2, 2
 ; P9-AIX32-NEXT:    stxv vs0, 0(r3)
@@ -228,9 +228,9 @@ define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a
 ; P8-AIX32-NEXT:    stw r5, -32(r1)
 ; P8-AIX32-NEXT:    addi r4, r1, -16
 ; P8-AIX32-NEXT:    addi r5, r1, -32
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX32-NEXT:    lxvw4x v3, 0, r5
-; P8-AIX32-NEXT:    vmrghw v2, v3, v2
+; P8-AIX32-NEXT:    lxvw4x vs0, 0, r4
+; P8-AIX32-NEXT:    lxvw4x vs1, 0, r5
+; P8-AIX32-NEXT:    xxmrghw v2, vs1, vs0
 ; P8-AIX32-NEXT:    xxswapd vs0, v2
 ; P8-AIX32-NEXT:    xxsldwi vs0, vs0, v2, 2
 ; P8-AIX32-NEXT:    stxvw4x vs0, 0, r3
@@ -244,9 +244,9 @@ define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a
 ; P7-AIX32-NEXT:    stw r6, -16(r1)
 ; P7-AIX32-NEXT:    stw r4, -32(r1)
 ; P7-AIX32-NEXT:    addi r4, r1, -32
-; P7-AIX32-NEXT:    lxvw4x v2, 0, r5
-; P7-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P7-AIX32-NEXT:    vmrghw v2, v3, v2
+; P7-AIX32-NEXT:    lxvw4x vs0, 0, r5
+; P7-AIX32-NEXT:    lxvw4x vs1, 0, r4
+; P7-AIX32-NEXT:    xxmrghw v2, vs1, vs0
 ; P7-AIX32-NEXT:    xxswapd vs0, v2
 ; P7-AIX32-NEXT:    xxsldwi vs0, vs0, v2, 2
 ; P7-AIX32-NEXT:    stxvw4x vs0, 0, r3
@@ -288,10 +288,10 @@ define void @test5(<2 x i64>* %a, i32* %in) {
 ; P9-AIX32-NEXT:    lwz r4, 0(r4)
 ; P9-AIX32-NEXT:    srawi r5, r4, 31
 ; P9-AIX32-NEXT:    stw r4, -16(r1)
-; P9-AIX32-NEXT:    lxv v2, -16(r1)
+; P9-AIX32-NEXT:    lxv vs0, -16(r1)
 ; P9-AIX32-NEXT:    stw r5, -32(r1)
-; P9-AIX32-NEXT:    lxv v3, -32(r1)
-; P9-AIX32-NEXT:    vmrghw v2, v3, v2
+; P9-AIX32-NEXT:    lxv vs1, -32(r1)
+; P9-AIX32-NEXT:    xxmrghw v2, vs1, vs0
 ; P9-AIX32-NEXT:    xxswapd vs0, v2
 ; P9-AIX32-NEXT:    xxsldwi vs0, vs0, v2, 2
 ; P9-AIX32-NEXT:    stxv vs0, 0(r3)
@@ -305,9 +305,9 @@ define void @test5(<2 x i64>* %a, i32* %in) {
 ; P8-AIX32-NEXT:    addi r4, r1, -16
 ; P8-AIX32-NEXT:    stw r5, -32(r1)
 ; P8-AIX32-NEXT:    addi r5, r1, -32
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX32-NEXT:    lxvw4x v3, 0, r5
-; P8-AIX32-NEXT:    vmrghw v2, v3, v2
+; P8-AIX32-NEXT:    lxvw4x vs0, 0, r4
+; P8-AIX32-NEXT:    lxvw4x vs1, 0, r5
+; P8-AIX32-NEXT:    xxmrghw v2, vs1, vs0
 ; P8-AIX32-NEXT:    xxswapd vs0, v2
 ; P8-AIX32-NEXT:    xxsldwi vs0, vs0, v2, 2
 ; P8-AIX32-NEXT:    stxvw4x vs0, 0, r3
@@ -321,9 +321,9 @@ define void @test5(<2 x i64>* %a, i32* %in) {
 ; P7-AIX32-NEXT:    srawi r4, r4, 31
 ; P7-AIX32-NEXT:    stw r4, -32(r1)
 ; P7-AIX32-NEXT:    addi r4, r1, -32
-; P7-AIX32-NEXT:    lxvw4x v2, 0, r5
-; P7-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P7-AIX32-NEXT:    vmrghw v2, v3, v2
+; P7-AIX32-NEXT:    lxvw4x vs0, 0, r5
+; P7-AIX32-NEXT:    lxvw4x vs1, 0, r4
+; P7-AIX32-NEXT:    xxmrghw v2, vs1, vs0
 ; P7-AIX32-NEXT:    xxswapd vs0, v2
 ; P7-AIX32-NEXT:    xxsldwi vs0, vs0, v2, 2
 ; P7-AIX32-NEXT:    stxvw4x vs0, 0, r3
@@ -365,10 +365,10 @@ define void @test6(<2 x i64>* %a, i32* %in) {
 ; P9-AIX32-NEXT:    lwz r4, 0(r4)
 ; P9-AIX32-NEXT:    li r5, 0
 ; P9-AIX32-NEXT:    stw r5, -32(r1)
-; P9-AIX32-NEXT:    lxv v2, -32(r1)
+; P9-AIX32-NEXT:    lxv vs0, -32(r1)
 ; P9-AIX32-NEXT:    stw r4, -16(r1)
-; P9-AIX32-NEXT:    lxv v3, -16(r1)
-; P9-AIX32-NEXT:    vmrghw v2, v2, v3
+; P9-AIX32-NEXT:    lxv vs1, -16(r1)
+; P9-AIX32-NEXT:    xxmrghw v2, vs0, vs1
 ; P9-AIX32-NEXT:    xxswapd vs0, v2
 ; P9-AIX32-NEXT:    xxsldwi vs0, vs0, v2, 2
 ; P9-AIX32-NEXT:    stxv vs0, 0(r3)
@@ -382,9 +382,9 @@ define void @test6(<2 x i64>* %a, i32* %in) {
 ; P8-AIX32-NEXT:    addi r5, r1, -16
 ; P8-AIX32-NEXT:    stw r4, -16(r1)
 ; P8-AIX32-NEXT:    addi r4, r1, -32
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX32-NEXT:    lxvw4x v3, 0, r5
-; P8-AIX32-NEXT:    vmrghw v2, v2, v3
+; P8-AIX32-NEXT:    lxvw4x vs0, 0, r4
+; P8-AIX32-NEXT:    lxvw4x vs1, 0, r5
+; P8-AIX32-NEXT:    xxmrghw v2, vs0, vs1
 ; P8-AIX32-NEXT:    xxswapd vs0, v2
 ; P8-AIX32-NEXT:    xxsldwi vs0, vs0, v2, 2
 ; P8-AIX32-NEXT:    stxvw4x vs0, 0, r3
@@ -398,9 +398,9 @@ define void @test6(<2 x i64>* %a, i32* %in) {
 ; P7-AIX32-NEXT:    addi r5, r1, -16
 ; P7-AIX32-NEXT:    stw r4, -16(r1)
 ; P7-AIX32-NEXT:    addi r4, r1, -32
-; P7-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P7-AIX32-NEXT:    lxvw4x v3, 0, r5
-; P7-AIX32-NEXT:    vmrghw v2, v2, v3
+; P7-AIX32-NEXT:    lxvw4x vs0, 0, r4
+; P7-AIX32-NEXT:    lxvw4x vs1, 0, r5
+; P7-AIX32-NEXT:    xxmrghw v2, vs0, vs1
 ; P7-AIX32-NEXT:    xxswapd vs0, v2
 ; P7-AIX32-NEXT:    xxsldwi vs0, vs0, v2, 2
 ; P7-AIX32-NEXT:    stxvw4x vs0, 0, r3
@@ -606,8 +606,8 @@ define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
 ; P9-AIX32:       # %bb.0: # %entry
 ; P9-AIX32-NEXT:    lwz r3, 4(r3)
 ; P9-AIX32-NEXT:    stw r3, -16(r1)
-; P9-AIX32-NEXT:    lxv v2, -16(r1)
-; P9-AIX32-NEXT:    vmrghw v2, v2, v2
+; P9-AIX32-NEXT:    lxv vs0, -16(r1)
+; P9-AIX32-NEXT:    xxmrghw v2, vs0, vs0
 ; P9-AIX32-NEXT:    xxspltw v2, v2, 1
 ; P9-AIX32-NEXT:    blr
 ;
@@ -616,8 +616,8 @@ define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
 ; P8-AIX32-NEXT:    lwz r3, 4(r3)
 ; P8-AIX32-NEXT:    addi r4, r1, -16
 ; P8-AIX32-NEXT:    stw r3, -16(r1)
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX32-NEXT:    vmrghw v2, v2, v2
+; P8-AIX32-NEXT:    lxvw4x vs0, 0, r4
+; P8-AIX32-NEXT:    xxmrghw v2, vs0, vs0
 ; P8-AIX32-NEXT:    xxspltw v2, v2, 1
 ; P8-AIX32-NEXT:    blr
 ;
@@ -626,8 +626,8 @@ define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
 ; P7-AIX32-NEXT:    lwz r3, 4(r3)
 ; P7-AIX32-NEXT:    addi r4, r1, -16
 ; P7-AIX32-NEXT:    stw r3, -16(r1)
-; P7-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P7-AIX32-NEXT:    vmrghw v2, v2, v2
+; P7-AIX32-NEXT:    lxvw4x vs0, 0, r4
+; P7-AIX32-NEXT:    xxmrghw v2, vs0, vs0
 ; P7-AIX32-NEXT:    xxspltw v2, v2, 1
 ; P7-AIX32-NEXT:    blr
   entry:
@@ -838,10 +838,10 @@ define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
 ; P8-AIX32-NEXT:    lwz r3, 0(r3)
 ; P8-AIX32-NEXT:    stw r3, -16(r1)
 ; P8-AIX32-NEXT:    addi r3, r1, -32
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r3
-; P8-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P8-AIX32-NEXT:    vmrghw v2, v3, v2
-; P8-AIX32-NEXT:    xxsldwi vs0, v3, v2, 2
+; P8-AIX32-NEXT:    lxvw4x vs0, 0, r3
+; P8-AIX32-NEXT:    lxvw4x vs1, 0, r4
+; P8-AIX32-NEXT:    xxmrghw v2, vs1, vs0
+; P8-AIX32-NEXT:    xxsldwi vs0, vs1, v2, 2
 ; P8-AIX32-NEXT:    xxmrgld v2, vs0, vs0
 ; P8-AIX32-NEXT:    blr
 ;
@@ -853,10 +853,10 @@ define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
 ; P7-AIX32-NEXT:    lwz r3, 0(r3)
 ; P7-AIX32-NEXT:    stw r3, -16(r1)
 ; P7-AIX32-NEXT:    addi r3, r1, -16
-; P7-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P7-AIX32-NEXT:    lxvw4x v3, 0, r3
-; P7-AIX32-NEXT:    vmrghw v2, v3, v2
-; P7-AIX32-NEXT:    xxsldwi vs0, v3, v2, 2
+; P7-AIX32-NEXT:    lxvw4x vs0, 0, r4
+; P7-AIX32-NEXT:    lxvw4x vs1, 0, r3
+; P7-AIX32-NEXT:    xxmrghw v2, vs1, vs0
+; P7-AIX32-NEXT:    xxsldwi vs0, vs1, v2, 2
 ; P7-AIX32-NEXT:    xxmrgld v2, vs0, vs0
 ; P7-AIX32-NEXT:    blr
   entry:

diff  --git a/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll b/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
index de930af75b2d8..9838b9558a037 100644
--- a/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
+++ b/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
@@ -9,8 +9,8 @@ entry:
   ret <2 x i32> %strided.vec
 
 ; CHECK-LABEL: @test1
-; CHECK: xxswapd 35, 34
-; CHECK: vmrghw 2, 2, 3
+; CHECK: xxswapd 0, 34
+; CHECK: xxmrghw 34, 34, 0
 ; CHECK: blr
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/perfect-shuffle.ll b/llvm/test/CodeGen/PowerPC/perfect-shuffle.ll
index ee7d823d5e9aa..bf6da83ae7955 100644
--- a/llvm/test/CodeGen/PowerPC/perfect-shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/perfect-shuffle.ll
@@ -5,9 +5,9 @@
 define <4 x float> @shuffle1(<16 x i8> %v1, <16 x i8> %v2) {
 ; BE-LABEL: shuffle1:
 ; BE:       # %bb.0:
-; BE-NEXT:    vmrglw 4, 2, 3
-; BE-NEXT:    vmrghw 2, 2, 3
-; BE-NEXT:    vmrghw 2, 2, 4
+; BE-NEXT:    xxmrglw 0, 34, 35
+; BE-NEXT:    xxmrghw 1, 34, 35
+; BE-NEXT:    xxmrghw 34, 1, 0
 ; BE-NEXT:    blr
 ;
 ; LE-LABEL: shuffle1:
@@ -38,12 +38,12 @@ define <4 x float> @shuffle2(<16 x i8> %v1, <16 x i8> %v2) {
 define <4 x float> @shuffle3(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x i8> %v4) {
 ; BE-LABEL: shuffle3:
 ; BE:       # %bb.0:
-; BE-NEXT:    vmrglw 0, 2, 3
-; BE-NEXT:    vmrghw 2, 2, 3
-; BE-NEXT:    vmrglw 3, 4, 5
-; BE-NEXT:    vmrghw 4, 4, 5
-; BE-NEXT:    vmrghw 2, 2, 0
-; BE-NEXT:    vmrghw 3, 4, 3
+; BE-NEXT:    xxmrglw 0, 34, 35
+; BE-NEXT:    xxmrghw 1, 34, 35
+; BE-NEXT:    xxmrghw 34, 1, 0
+; BE-NEXT:    xxmrglw 0, 36, 37
+; BE-NEXT:    xxmrghw 1, 36, 37
+; BE-NEXT:    xxmrghw 35, 1, 0
 ; BE-NEXT:    xvaddsp 34, 34, 35
 ; BE-NEXT:    blr
 ;
@@ -87,22 +87,22 @@ define <4 x float> @shuffle4(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x
 define <4 x float> @shuffle5(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x i8> %v4) {
 ; BE-LABEL: shuffle5:
 ; BE:       # %bb.0: # %entry
-; BE-NEXT:    vmrglw 0, 2, 3
-; BE-NEXT:    vmrghw 3, 2, 3
+; BE-NEXT:    xxmrglw 0, 34, 35
+; BE-NEXT:    xxmrghw 1, 34, 35
 ; BE-NEXT:    li 3, 8
 ; BE-NEXT:    vextublx 3, 3, 2
-; BE-NEXT:    vmrghw 3, 3, 0
+; BE-NEXT:    xxmrghw 0, 1, 0
 ; BE-NEXT:    andi. 3, 3, 255
-; BE-NEXT:    vmr 2, 3
+; BE-NEXT:    xxlor 1, 0, 0
 ; BE-NEXT:    beq 0, .LBB4_2
 ; BE-NEXT:  # %bb.1: # %exit
-; BE-NEXT:    xvaddsp 34, 35, 34
+; BE-NEXT:    xvaddsp 34, 0, 1
 ; BE-NEXT:    blr
 ; BE-NEXT:  .LBB4_2: # %second
-; BE-NEXT:    vmrglw 2, 4, 5
-; BE-NEXT:    vmrghw 4, 4, 5
-; BE-NEXT:    vmrghw 2, 4, 2
-; BE-NEXT:    xvaddsp 34, 35, 34
+; BE-NEXT:    xxmrglw 1, 36, 37
+; BE-NEXT:    xxmrghw 2, 36, 37
+; BE-NEXT:    xxmrghw 1, 2, 1
+; BE-NEXT:    xvaddsp 34, 0, 1
 ; BE-NEXT:    blr
 ;
 ; LE-LABEL: shuffle5:

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index ec5e433b57cf7..94407cd4f8ed8 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -443,31 +443,31 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r9
 ; CHECK-PWR9-LE-NEXT:    xor r26, r26, r25
 ; CHECK-PWR9-LE-NEXT:    vmrghb v4, v5, v4
-; CHECK-PWR9-LE-NEXT:    mtvsrd v5, r30
 ; CHECK-PWR9-LE-NEXT:    sub r26, r26, r25
-; CHECK-PWR9-LE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-LE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT:    mtvsrd v0, r26
+; CHECK-PWR9-LE-NEXT:    mtvsrd v5, r26
 ; CHECK-PWR9-LE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-LE-NEXT:    vmrglh v3, v4, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r12
-; CHECK-PWR9-LE-NEXT:    vmrglw v2, v3, v2
-; CHECK-PWR9-LE-NEXT:    mtvsrd v3, r11
+; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r30
+; CHECK-PWR9-LE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-PWR9-LE-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-PWR9-LE-NEXT:    mtvsrd v2, r11
+; CHECK-PWR9-LE-NEXT:    mtvsrd v3, r12
+; CHECK-PWR9-LE-NEXT:    vmrghb v2, v3, v2
+; CHECK-PWR9-LE-NEXT:    mtvsrd v3, r0
 ; CHECK-PWR9-LE-NEXT:    vmrghb v3, v4, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r0
-; CHECK-PWR9-LE-NEXT:    vmrghb v4, v5, v4
-; CHECK-PWR9-LE-NEXT:    mtvsrd v5, r28
+; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r28
 ; CHECK-PWR9-LE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT:    vmrglh v3, v4, v3
-; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r29
+; CHECK-PWR9-LE-NEXT:    vmrglh v2, v3, v2
+; CHECK-PWR9-LE-NEXT:    mtvsrd v3, r29
 ; CHECK-PWR9-LE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT:    vmrghb v4, v5, v4
-; CHECK-PWR9-LE-NEXT:    mtvsrd v5, r27
+; CHECK-PWR9-LE-NEXT:    vmrghb v3, v4, v3
+; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r27
 ; CHECK-PWR9-LE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT:    vmrghb v5, v0, v5
-; CHECK-PWR9-LE-NEXT:    vmrglh v4, v5, v4
-; CHECK-PWR9-LE-NEXT:    vmrglw v3, v4, v3
-; CHECK-PWR9-LE-NEXT:    xxmrgld v2, v3, v2
+; CHECK-PWR9-LE-NEXT:    vmrghb v4, v5, v4
+; CHECK-PWR9-LE-NEXT:    vmrglh v3, v4, v3
+; CHECK-PWR9-LE-NEXT:    xxmrglw vs1, v3, v2
+; CHECK-PWR9-LE-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-PWR9-LE-NEXT:    blr
 ;
 ; CHECK-PWR9-BE-LABEL: sub_absv_8_ext:
@@ -508,7 +508,6 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    sub r5, r5, r8
 ; CHECK-PWR9-BE-NEXT:    vextublx r7, r6, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r6, r6, v3
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v1, r3
 ; CHECK-PWR9-BE-NEXT:    clrlwi r7, r7, 24
 ; CHECK-PWR9-BE-NEXT:    clrlwi r6, r6, 24
 ; CHECK-PWR9-BE-NEXT:    sub r6, r7, r6
@@ -645,23 +644,24 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    vperm v3, v5, v3, v4
 ; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r12
 ; CHECK-PWR9-BE-NEXT:    vperm v5, v0, v5, v4
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v0, r7
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-PWR9-BE-NEXT:    vmrghh v3, v5, v3
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r9
-; CHECK-PWR9-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r10
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r7
+; CHECK-PWR9-BE-NEXT:    xxmrghw vs0, v3, v2
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v2, r10
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r9
+; CHECK-PWR9-BE-NEXT:    vperm v2, v3, v2, v4
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r8
 ; CHECK-PWR9-BE-NEXT:    vperm v3, v5, v3, v4
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r8
-; CHECK-PWR9-BE-NEXT:    vperm v5, v0, v5, v4
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-PWR9-BE-NEXT:    vmrghh v3, v5, v3
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r6
-; CHECK-PWR9-BE-NEXT:    vperm v5, v0, v5, v4
-; CHECK-PWR9-BE-NEXT:    mtvsrwz v0, r4
-; CHECK-PWR9-BE-NEXT:    vperm v4, v1, v0, v4
-; CHECK-PWR9-BE-NEXT:    vmrghh v4, v4, v5
-; CHECK-PWR9-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-PWR9-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r5
+; CHECK-PWR9-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v3, r6
+; CHECK-PWR9-BE-NEXT:    vperm v3, v5, v3, v4
+; CHECK-PWR9-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-PWR9-BE-NEXT:    vperm v4, v0, v5, v4
+; CHECK-PWR9-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-PWR9-BE-NEXT:    xxmrghw vs1, v3, v2
+; CHECK-PWR9-BE-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-PWR9-BE-NEXT:    blr
 ;
 ; CHECK-PWR8-LABEL: sub_absv_8_ext:
@@ -868,9 +868,9 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR8-NEXT:    vmrglh v3, v3, v0
 ; CHECK-PWR8-NEXT:    vmrglh v4, v6, v5
 ; CHECK-PWR8-NEXT:    vmrglh v5, v1, v8
-; CHECK-PWR8-NEXT:    vmrglw v2, v3, v2
-; CHECK-PWR8-NEXT:    vmrglw v3, v5, v4
-; CHECK-PWR8-NEXT:    xxmrgld v2, v2, v3
+; CHECK-PWR8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-PWR8-NEXT:    xxmrglw vs1, v5, v4
+; CHECK-PWR8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-PWR8-NEXT:    blr
 ;
 ; CHECK-PWR7-LABEL: sub_absv_8_ext:
@@ -1061,9 +1061,9 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR7-NEXT:    vmrghh v4, v6, v0
 ; CHECK-PWR7-NEXT:    vmrghh v3, v5, v3
 ; CHECK-PWR7-NEXT:    vmrghh v5, v7, v1
-; CHECK-PWR7-NEXT:    vmrghw v2, v4, v2
-; CHECK-PWR7-NEXT:    vmrghw v3, v5, v3
-; CHECK-PWR7-NEXT:    xxmrghd v2, v3, v2
+; CHECK-PWR7-NEXT:    xxmrghw vs0, v4, v2
+; CHECK-PWR7-NEXT:    xxmrghw vs1, v5, v3
+; CHECK-PWR7-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-PWR7-NEXT:    addi r1, r1, 416
 ; CHECK-PWR7-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll
index aa2c7ba5d462a..a44c3d5e184d4 100644
--- a/llvm/test/CodeGen/PowerPC/pr25080.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25080.ll
@@ -41,11 +41,11 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; LE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
 ; LE-NEXT:    vmrghh 3, 3, 6
 ; LE-NEXT:    vmrghh 5, 0, 5
-; LE-NEXT:    vmrglw 2, 4, 2
+; LE-NEXT:    xxmrglw 0, 36, 34
 ; LE-NEXT:    vspltish 4, 15
-; LE-NEXT:    vmrglw 3, 5, 3
-; LE-NEXT:    xxmrgld 34, 35, 34
+; LE-NEXT:    xxmrglw 1, 37, 35
 ; LE-NEXT:    lvx 3, 0, 3
+; LE-NEXT:    xxmrgld 34, 1, 0
 ; LE-NEXT:    xxlor 34, 34, 35
 ; LE-NEXT:    vslh 2, 2, 4
 ; LE-NEXT:    vsrah 2, 2, 4
@@ -91,12 +91,12 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; BE-NEXT:    vperm 4, 6, 4, 1
 ; BE-NEXT:    addi 3, 3, .LCPI0_2 at toc@l
 ; BE-NEXT:    vperm 5, 5, 7, 1
-; BE-NEXT:    lxvw4x 0, 0, 3
 ; BE-NEXT:    vperm 3, 0, 3, 1
-; BE-NEXT:    vmrghw 2, 4, 2
-; BE-NEXT:    vmrghw 3, 3, 5
-; BE-NEXT:    xxmrghd 34, 35, 34
+; BE-NEXT:    xxmrghw 0, 36, 34
+; BE-NEXT:    xxmrghw 1, 35, 37
 ; BE-NEXT:    vspltish 3, 15
+; BE-NEXT:    xxmrghd 34, 1, 0
+; BE-NEXT:    lxvw4x 0, 0, 3
 ; BE-NEXT:    xxlor 34, 34, 0
 ; BE-NEXT:    vslh 2, 2, 3
 ; BE-NEXT:    vsrah 2, 2, 3

diff  --git a/llvm/test/CodeGen/PowerPC/pr27078.ll b/llvm/test/CodeGen/PowerPC/pr27078.ll
index 3041f57f1d13d..1f89895661ff2 100644
--- a/llvm/test/CodeGen/PowerPC/pr27078.ll
+++ b/llvm/test/CodeGen/PowerPC/pr27078.ll
@@ -13,13 +13,13 @@ define <4 x float> @bar(float* %p, float* %q) {
 ; CHECK-NEXT:    lxvw4x 4, 4, 5
 ; CHECK-NEXT:    xvsubsp 0, 1, 0
 ; CHECK-NEXT:    lxvw4x 1, 3, 5
-; CHECK-NEXT:    xvsubsp 34, 3, 2
-; CHECK-NEXT:    xvsubsp 35, 4, 1
-; CHECK-NEXT:    xxsldwi 36, 0, 0, 1
-; CHECK-NEXT:    vmrglw 2, 4, 2
-; CHECK-NEXT:    vmrghw 3, 3, 3
-; CHECK-NEXT:    xxsldwi 0, 36, 34, 3
-; CHECK-NEXT:    xxsldwi 0, 35, 0, 3
+; CHECK-NEXT:    xvsubsp 2, 3, 2
+; CHECK-NEXT:    xvsubsp 1, 4, 1
+; CHECK-NEXT:    xxsldwi 0, 0, 0, 1
+; CHECK-NEXT:    xxmrglw 34, 0, 2
+; CHECK-NEXT:    xxsldwi 0, 0, 34, 3
+; CHECK-NEXT:    xxmrghw 34, 1, 1
+; CHECK-NEXT:    xxsldwi 0, 34, 0, 3
 ; CHECK-NEXT:    xxsldwi 34, 0, 0, 1
 ; CHECK-NEXT:    blr
   %1 = bitcast float* %p to <12 x float>*

diff  --git a/llvm/test/CodeGen/PowerPC/pr47916.ll b/llvm/test/CodeGen/PowerPC/pr47916.ll
index 80e1d36d99c80..af5c45df84300 100644
--- a/llvm/test/CodeGen/PowerPC/pr47916.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47916.ll
@@ -8,9 +8,9 @@ define dso_local void @_Z1jjPiPj() local_unnamed_addr #0 {
 ; CHECK-NEXT:    std r3, -16(r1)
 ; CHECK-NEXT:    addi r3, r1, -16
 ; CHECK-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-NEXT:    xxswapd v2, vs0
-; CHECK-NEXT:    vmrglw v2, v2, v2
-; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    xxmrglw vs0, vs0, vs0
+; CHECK-NEXT:    xxswapd vs0, vs0
 ; CHECK-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index e6427880c5e8a..4e9a374c97523 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -35,10 +35,10 @@ define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec)  {
 ;
 ; P8BE-LABEL: s2v_test1:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lxsiwzx v3, 0, r3
-; P8BE-NEXT:    vmrghw v4, v2, v3
-; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
-; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
+; P8BE-NEXT:    lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi vs1, v2, vs0, 1
+; P8BE-NEXT:    xxmrghw v2, v2, vs0
+; P8BE-NEXT:    xxsldwi v2, v2, vs1, 3
 ; P8BE-NEXT:    blr
 entry:
   %0 = load i32, i32* %int32, align 4
@@ -75,10 +75,10 @@ define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8BE-LABEL: s2v_test2:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    addi r3, r3, 4
-; P8BE-NEXT:    lxsiwzx v3, 0, r3
-; P8BE-NEXT:    vmrghw v4, v2, v3
-; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
-; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
+; P8BE-NEXT:    lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi vs1, v2, vs0, 1
+; P8BE-NEXT:    xxmrghw v2, v2, vs0
+; P8BE-NEXT:    xxsldwi v2, v2, vs1, 3
 ; P8BE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
@@ -118,10 +118,10 @@ define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32
 ; P8BE-LABEL: s2v_test3:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    sldi r4, r7, 2
-; P8BE-NEXT:    lxsiwzx v3, r3, r4
-; P8BE-NEXT:    vmrghw v4, v2, v3
-; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
-; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
+; P8BE-NEXT:    lfiwzx f0, r3, r4
+; P8BE-NEXT:    xxsldwi vs1, v2, vs0, 1
+; P8BE-NEXT:    xxmrghw v2, v2, vs0
+; P8BE-NEXT:    xxsldwi v2, v2, vs1, 3
 ; P8BE-NEXT:    blr
 entry:
   %idxprom = sext i32 %Idx to i64
@@ -160,10 +160,10 @@ define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8BE-LABEL: s2v_test4:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    addi r3, r3, 4
-; P8BE-NEXT:    lxsiwzx v3, 0, r3
-; P8BE-NEXT:    vmrghw v4, v2, v3
-; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
-; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
+; P8BE-NEXT:    lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi vs1, v2, vs0, 1
+; P8BE-NEXT:    xxmrghw v2, v2, vs0
+; P8BE-NEXT:    xxsldwi v2, v2, vs1, 3
 ; P8BE-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
@@ -199,10 +199,10 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1)  {
 ;
 ; P8BE-LABEL: s2v_test5:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lxsiwzx v3, 0, r5
-; P8BE-NEXT:    vmrghw v4, v2, v3
-; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
-; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
+; P8BE-NEXT:    lfiwzx f0, 0, r5
+; P8BE-NEXT:    xxsldwi vs1, v2, vs0, 1
+; P8BE-NEXT:    xxmrghw v2, v2, vs0
+; P8BE-NEXT:    xxsldwi v2, v2, vs1, 3
 ; P8BE-NEXT:    blr
 entry:
   %0 = load i32, i32* %ptr1, align 4
@@ -237,10 +237,10 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
 ;
 ; P8BE-LABEL: s2v_test_f1:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lxsiwzx v3, 0, r3
-; P8BE-NEXT:    vmrghw v4, v2, v3
-; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
-; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
+; P8BE-NEXT:    lfiwzx f0, 0, r3
+; P8BE-NEXT:    xxsldwi vs1, v2, vs0, 1
+; P8BE-NEXT:    xxmrghw v2, v2, vs0
+; P8BE-NEXT:    xxsldwi v2, v2, vs1, 3
 ; P8BE-NEXT:    blr
 entry:
   %0 = load float, float* %f64, align 4
@@ -253,9 +253,9 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
 ; P9LE-LABEL: s2v_test_f2:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    addi r3, r3, 4
-; P9LE-NEXT:    vmrglw v2, v2, v2
-; P9LE-NEXT:    lxsiwzx v3, 0, r3
-; P9LE-NEXT:    vmrghw v2, v2, v3
+; P9LE-NEXT:    xxmrglw vs1, v2, v2
+; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: s2v_test_f2:
@@ -267,10 +267,10 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
 ;
 ; P8LE-LABEL: s2v_test_f2:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    vmrglw v2, v2, v2
 ; P8LE-NEXT:    addi r3, r3, 4
-; P8LE-NEXT:    lxsiwzx v3, 0, r3
-; P8LE-NEXT:    vmrghw v2, v2, v3
+; P8LE-NEXT:    xxmrglw vs1, v2, v2
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test_f2:
@@ -291,9 +291,9 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
 ; P9LE-LABEL: s2v_test_f3:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    sldi r4, r7, 2
-; P9LE-NEXT:    vmrglw v2, v2, v2
-; P9LE-NEXT:    lxsiwzx v3, r3, r4
-; P9LE-NEXT:    vmrghw v2, v2, v3
+; P9LE-NEXT:    xxmrglw vs1, v2, v2
+; P9LE-NEXT:    lfiwzx f0, r3, r4
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: s2v_test_f3:
@@ -305,10 +305,10 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
 ;
 ; P8LE-LABEL: s2v_test_f3:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    vmrglw v2, v2, v2
 ; P8LE-NEXT:    sldi r4, r7, 2
-; P8LE-NEXT:    lxsiwzx v3, r3, r4
-; P8LE-NEXT:    vmrghw v2, v2, v3
+; P8LE-NEXT:    xxmrglw vs1, v2, v2
+; P8LE-NEXT:    lfiwzx f0, r3, r4
+; P8LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test_f3:
@@ -330,9 +330,9 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
 ; P9LE-LABEL: s2v_test_f4:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    addi r3, r3, 4
-; P9LE-NEXT:    vmrglw v2, v2, v2
-; P9LE-NEXT:    lxsiwzx v3, 0, r3
-; P9LE-NEXT:    vmrghw v2, v2, v3
+; P9LE-NEXT:    xxmrglw vs1, v2, v2
+; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: s2v_test_f4:
@@ -344,10 +344,10 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
 ;
 ; P8LE-LABEL: s2v_test_f4:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    vmrglw v2, v2, v2
 ; P8LE-NEXT:    addi r3, r3, 4
-; P8LE-NEXT:    lxsiwzx v3, 0, r3
-; P8LE-NEXT:    vmrghw v2, v2, v3
+; P8LE-NEXT:    xxmrglw vs1, v2, v2
+; P8LE-NEXT:    lfiwzx f0, 0, r3
+; P8LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test_f4:
@@ -367,9 +367,9 @@ entry:
 define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1)  {
 ; P9LE-LABEL: s2v_test_f5:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    lxsiwzx v3, 0, r5
-; P9LE-NEXT:    vmrglw v2, v2, v2
-; P9LE-NEXT:    vmrghw v2, v2, v3
+; P9LE-NEXT:    lfiwzx f0, 0, r5
+; P9LE-NEXT:    xxmrglw vs1, v2, v2
+; P9LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: s2v_test_f5:
@@ -380,9 +380,9 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr
 ;
 ; P8LE-LABEL: s2v_test_f5:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    vmrglw v2, v2, v2
-; P8LE-NEXT:    lxsiwzx v3, 0, r5
-; P8LE-NEXT:    vmrghw v2, v2, v3
+; P8LE-NEXT:    lfiwzx f0, 0, r5
+; P8LE-NEXT:    xxmrglw vs1, v2, v2
+; P8LE-NEXT:    xxmrghw v2, vs1, vs0
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test_f5:

diff  --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
index 2c1483f9bedc8..a3719331c589b 100644
--- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
@@ -63,7 +63,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    sub r3, r3, r4
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    xxmrglw v2, v2, v3
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: fold_srem_vec_1:
@@ -123,7 +123,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    vmrghw v2, v3, v2
+; P9BE-NEXT:    xxmrghw v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_srem_vec_1:
@@ -178,7 +178,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P8LE-NEXT:    mtvsrd v5, r4
 ; P8LE-NEXT:    vmrghh v2, v3, v2
 ; P8LE-NEXT:    vmrghh v3, v5, v4
-; P8LE-NEXT:    vmrglw v2, v2, v3
+; P8LE-NEXT:    xxmrglw v2, v2, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_srem_vec_1:
@@ -235,7 +235,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P8BE-NEXT:    mtvsrwz v0, r4
 ; P8BE-NEXT:    vperm v2, v4, v2, v3
 ; P8BE-NEXT:    vperm v3, v0, v5, v3
-; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    xxmrghw v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
   ret <4 x i16> %1
@@ -292,7 +292,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    sub r3, r3, r4
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    xxmrglw v2, v2, v3
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: fold_srem_vec_2:
@@ -348,7 +348,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    vmrghw v2, v2, v3
+; P9BE-NEXT:    xxmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_srem_vec_2:
@@ -399,7 +399,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P8LE-NEXT:    vmrghh v2, v3, v2
 ; P8LE-NEXT:    mtvsrd v5, r3
 ; P8LE-NEXT:    vmrghh v3, v5, v4
-; P8LE-NEXT:    vmrglw v2, v3, v2
+; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_srem_vec_2:
@@ -452,7 +452,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P8BE-NEXT:    vperm v2, v4, v2, v3
 ; P8BE-NEXT:    mtvsrwz v0, r3
 ; P8BE-NEXT:    vperm v3, v0, v5, v3
-; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    xxmrghw v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   ret <4 x i16> %1
@@ -513,12 +513,12 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
 ; P9LE-NEXT:    mtvsrd v4, r6
-; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    xxmrglw v2, v2, v3
 ; P9LE-NEXT:    mtvsrd v3, r5
 ; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    mtvsrd v4, r7
 ; P9LE-NEXT:    vmrghh v4, v5, v4
-; P9LE-NEXT:    vmrglw v3, v4, v3
+; P9LE-NEXT:    xxmrglw v3, v4, v3
 ; P9LE-NEXT:    vadduhm v2, v2, v3
 ; P9LE-NEXT:    blr
 ;
@@ -577,12 +577,12 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    vperm v2, v2, v4, v5
 ; P9BE-NEXT:    mtvsrwz v4, r6
-; P9BE-NEXT:    vmrghw v2, v2, v3
+; P9BE-NEXT:    xxmrghw v2, v2, v3
 ; P9BE-NEXT:    mtvsrwz v3, r4
 ; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mtvsrwz v4, r7
 ; P9BE-NEXT:    vperm v4, v0, v4, v5
-; P9BE-NEXT:    vmrghw v3, v4, v3
+; P9BE-NEXT:    xxmrghw v3, v4, v3
 ; P9BE-NEXT:    vadduhm v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -640,8 +640,8 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P8LE-NEXT:    mtvsrd v5, r3
 ; P8LE-NEXT:    vmrghh v0, v1, v0
 ; P8LE-NEXT:    vmrghh v4, v5, v4
-; P8LE-NEXT:    vmrglw v3, v0, v3
-; P8LE-NEXT:    vmrglw v2, v4, v2
+; P8LE-NEXT:    xxmrglw v3, v0, v3
+; P8LE-NEXT:    xxmrglw v2, v4, v2
 ; P8LE-NEXT:    vadduhm v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
@@ -703,8 +703,8 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P8BE-NEXT:    mtvsrwz v0, r4
 ; P8BE-NEXT:    vperm v1, v6, v1, v2
 ; P8BE-NEXT:    vperm v2, v0, v5, v2
-; P8BE-NEXT:    vmrghw v4, v1, v4
-; P8BE-NEXT:    vmrghw v2, v2, v3
+; P8BE-NEXT:    xxmrghw v4, v1, v4
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    vadduhm v2, v4, v2
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -756,7 +756,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    sub r3, r3, r4
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v4, v2
-; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    xxmrglw v2, v2, v3
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: dont_fold_srem_power_of_two:
@@ -803,7 +803,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    vmrghw v2, v3, v2
+; P9BE-NEXT:    xxmrghw v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_srem_power_of_two:
@@ -845,7 +845,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P8LE-NEXT:    mtvsrd v4, r3
 ; P8LE-NEXT:    mtvsrd v5, r4
 ; P8LE-NEXT:    vmrghh v3, v4, v5
-; P8LE-NEXT:    vmrglw v2, v3, v2
+; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_srem_power_of_two:
@@ -889,7 +889,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P8BE-NEXT:    mtvsrwz v5, r3
 ; P8BE-NEXT:    mtvsrwz v0, r4
 ; P8BE-NEXT:    vperm v3, v0, v5, v3
-; P8BE-NEXT:    vmrghw v2, v2, v3
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
   ret <4 x i16> %1
@@ -941,7 +941,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    sub r3, r3, r4
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    xxmrglw v2, v2, v3
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: dont_fold_srem_one:
@@ -991,7 +991,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    vperm v2, v4, v2, v5
-; P9BE-NEXT:    vmrghw v2, v2, v3
+; P9BE-NEXT:    xxmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_srem_one:
@@ -1037,7 +1037,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P8LE-NEXT:    mtvsrd v5, r3
 ; P8LE-NEXT:    vmrghh v3, v3, v4
 ; P8LE-NEXT:    vmrghh v2, v5, v2
-; P8LE-NEXT:    vmrglw v2, v3, v2
+; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_srem_one:
@@ -1085,7 +1085,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P8BE-NEXT:    mtvsrwz v0, r3
 ; P8BE-NEXT:    vperm v4, v5, v4, v3
 ; P8BE-NEXT:    vperm v2, v2, v0, v3
-; P8BE-NEXT:    vmrghw v2, v2, v4
+; P8BE-NEXT:    xxmrghw v2, v2, v4
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
   ret <4 x i16> %1
@@ -1132,7 +1132,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9LE-NEXT:    li r3, 0
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    vmrglw v2, v3, v2
+; P9LE-NEXT:    xxmrglw v2, v3, v2
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: dont_fold_urem_i16_smax:
@@ -1177,7 +1177,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    vperm v2, v4, v2, v5
-; P9BE-NEXT:    vmrghw v2, v2, v3
+; P9BE-NEXT:    xxmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_urem_i16_smax:
@@ -1218,7 +1218,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P8LE-NEXT:    mtvsrd v5, r3
 ; P8LE-NEXT:    vmrghh v3, v3, v4
 ; P8LE-NEXT:    vmrghh v2, v5, v2
-; P8LE-NEXT:    vmrglw v2, v3, v2
+; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_urem_i16_smax:
@@ -1261,7 +1261,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P8BE-NEXT:    mtvsrwz v0, r3
 ; P8BE-NEXT:    vperm v4, v5, v4, v3
 ; P8BE-NEXT:    vperm v2, v2, v0, v3
-; P8BE-NEXT:    vmrghw v2, v2, v4
+; P8BE-NEXT:    xxmrghw v2, v2, v4
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
   ret <4 x i16> %1

diff  --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
index 4c4d7be624cb8..138d96fd53e12 100644
--- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -66,8 +66,8 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ;
 ; CHECK-BE-P8-LABEL: test:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpsxws v3, f1
-; CHECK-BE-P8-NEXT:    vmrghw v3, v2, v3
+; CHECK-BE-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-BE-P8-NEXT:    xxmrghw v3, v2, vs0
 ; CHECK-BE-P8-NEXT:    xxsldwi vs0, v3, v2, 3
 ; CHECK-BE-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
 ; CHECK-BE-P8-NEXT:    blr
@@ -129,8 +129,8 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ;
 ; CHECK-BE-P8-LABEL: test2:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpsxws v3, f1
-; CHECK-BE-P8-NEXT:    vmrghw v3, v2, v3
+; CHECK-BE-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-BE-P8-NEXT:    xxmrghw v3, v2, vs0
 ; CHECK-BE-P8-NEXT:    xxsldwi vs0, v3, v2, 3
 ; CHECK-BE-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
 ; CHECK-BE-P8-NEXT:    blr
@@ -192,8 +192,8 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ;
 ; CHECK-BE-P8-LABEL: test3:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpuxws v3, f1
-; CHECK-BE-P8-NEXT:    vmrghw v3, v2, v3
+; CHECK-BE-P8-NEXT:    xscvdpuxws f0, f1
+; CHECK-BE-P8-NEXT:    xxmrghw v3, v2, vs0
 ; CHECK-BE-P8-NEXT:    xxsldwi vs0, v3, v2, 3
 ; CHECK-BE-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
 ; CHECK-BE-P8-NEXT:    blr
@@ -255,8 +255,8 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ;
 ; CHECK-BE-P8-LABEL: test4:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpuxws v3, f1
-; CHECK-BE-P8-NEXT:    vmrghw v3, v2, v3
+; CHECK-BE-P8-NEXT:    xscvdpuxws f0, f1
+; CHECK-BE-P8-NEXT:    xxmrghw v3, v2, vs0
 ; CHECK-BE-P8-NEXT:    xxsldwi vs0, v3, v2, 3
 ; CHECK-BE-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
 ; CHECK-BE-P8-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
index ef73fa686b3c6..a079a657d2c8b 100644
--- a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
@@ -161,17 +161,17 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 ;
 ; PPC64LE-LABEL: test_urem_vec:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    mtvsrwz 34, 3
-; PPC64LE-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
-; PPC64LE-NEXT:    mtvsrwz 35, 4
-; PPC64LE-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
+; PPC64LE-NEXT:    addis 6, 2, .LCPI4_0 at toc@ha
+; PPC64LE-NEXT:    mtfprwz 0, 3
+; PPC64LE-NEXT:    mtfprwz 1, 4
+; PPC64LE-NEXT:    addi 3, 6, .LCPI4_0 at toc@l
 ; PPC64LE-NEXT:    addis 4, 2, .LCPI4_2 at toc@ha
+; PPC64LE-NEXT:    lvx 2, 0, 3
 ; PPC64LE-NEXT:    mtvsrwz 36, 5
-; PPC64LE-NEXT:    vmrghw 2, 3, 2
-; PPC64LE-NEXT:    lvx 3, 0, 3
 ; PPC64LE-NEXT:    addis 3, 2, .LCPI4_1 at toc@ha
+; PPC64LE-NEXT:    xxmrghw 35, 1, 0
 ; PPC64LE-NEXT:    addi 3, 3, .LCPI4_1 at toc@l
-; PPC64LE-NEXT:    vperm 2, 4, 2, 3
+; PPC64LE-NEXT:    vperm 2, 4, 3, 2
 ; PPC64LE-NEXT:    vspltisw 3, -11
 ; PPC64LE-NEXT:    lvx 4, 0, 3
 ; PPC64LE-NEXT:    addi 3, 4, .LCPI4_2 at toc@l

diff  --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
index d7217372f8bac..87e6ae20ff2bc 100644
--- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
@@ -57,7 +57,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    sub r3, r3, r4
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v4, v2
-; P9LE-NEXT:    vmrglw v2, v3, v2
+; P9LE-NEXT:    xxmrglw v2, v3, v2
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: fold_urem_vec_1:
@@ -111,7 +111,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    vmrghw v2, v2, v3
+; P9BE-NEXT:    xxmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_urem_vec_1:
@@ -160,7 +160,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P8LE-NEXT:    mtvsrd v5, r4
 ; P8LE-NEXT:    vmrghh v2, v3, v2
 ; P8LE-NEXT:    vmrghh v3, v5, v4
-; P8LE-NEXT:    vmrglw v2, v2, v3
+; P8LE-NEXT:    xxmrglw v2, v2, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_urem_vec_1:
@@ -211,7 +211,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P8BE-NEXT:    mtvsrwz v0, r4
 ; P8BE-NEXT:    vperm v2, v4, v2, v3
 ; P8BE-NEXT:    vperm v3, v5, v0, v3
-; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    xxmrghw v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
   ret <4 x i16> %1
@@ -268,7 +268,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    sub r3, r3, r4
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    xxmrglw v2, v2, v3
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: fold_urem_vec_2:
@@ -324,7 +324,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    vmrghw v2, v2, v3
+; P9BE-NEXT:    xxmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fold_urem_vec_2:
@@ -375,7 +375,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P8LE-NEXT:    mtvsrd v5, r3
 ; P8LE-NEXT:    vmrghh v2, v3, v2
 ; P8LE-NEXT:    vmrghh v3, v5, v4
-; P8LE-NEXT:    vmrglw v2, v3, v2
+; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_urem_vec_2:
@@ -428,7 +428,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P8BE-NEXT:    mtvsrwz v0, r3
 ; P8BE-NEXT:    vperm v2, v4, v2, v3
 ; P8BE-NEXT:    vperm v3, v0, v5, v3
-; P8BE-NEXT:    vmrghw v2, v3, v2
+; P8BE-NEXT:    xxmrghw v2, v3, v2
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   ret <4 x i16> %1
@@ -489,12 +489,12 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
 ; P9LE-NEXT:    mtvsrd v4, r6
-; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    xxmrglw v2, v2, v3
 ; P9LE-NEXT:    mtvsrd v3, r5
 ; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    mtvsrd v4, r7
 ; P9LE-NEXT:    vmrghh v4, v5, v4
-; P9LE-NEXT:    vmrglw v3, v4, v3
+; P9LE-NEXT:    xxmrglw v3, v4, v3
 ; P9LE-NEXT:    vadduhm v2, v2, v3
 ; P9LE-NEXT:    blr
 ;
@@ -553,12 +553,12 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    vperm v2, v2, v4, v5
 ; P9BE-NEXT:    mtvsrwz v4, r6
-; P9BE-NEXT:    vmrghw v2, v2, v3
+; P9BE-NEXT:    xxmrghw v2, v2, v3
 ; P9BE-NEXT:    mtvsrwz v3, r4
 ; P9BE-NEXT:    vperm v3, v4, v3, v5
 ; P9BE-NEXT:    mtvsrwz v4, r7
 ; P9BE-NEXT:    vperm v4, v0, v4, v5
-; P9BE-NEXT:    vmrghw v3, v4, v3
+; P9BE-NEXT:    xxmrghw v3, v4, v3
 ; P9BE-NEXT:    vadduhm v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
@@ -618,8 +618,8 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P8LE-NEXT:    mtvsrd v5, r3
 ; P8LE-NEXT:    vmrghh v0, v1, v0
 ; P8LE-NEXT:    vmrghh v4, v5, v4
-; P8LE-NEXT:    vmrglw v3, v0, v3
-; P8LE-NEXT:    vmrglw v2, v4, v2
+; P8LE-NEXT:    xxmrglw v3, v0, v3
+; P8LE-NEXT:    xxmrglw v2, v4, v2
 ; P8LE-NEXT:    vadduhm v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
@@ -679,8 +679,8 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P8BE-NEXT:    mtvsrwz v0, r4
 ; P8BE-NEXT:    vperm v1, v6, v1, v2
 ; P8BE-NEXT:    vperm v2, v0, v5, v2
-; P8BE-NEXT:    vmrghw v4, v1, v4
-; P8BE-NEXT:    vmrghw v2, v2, v3
+; P8BE-NEXT:    xxmrghw v4, v1, v4
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    vadduhm v2, v4, v2
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -720,7 +720,7 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    clrlwi r3, r3, 29
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v4, v2
-; P9LE-NEXT:    vmrglw v2, v2, v3
+; P9LE-NEXT:    xxmrglw v2, v2, v3
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: dont_fold_urem_power_of_two:
@@ -755,7 +755,7 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    clrlwi r3, r3, 29
 ; P9BE-NEXT:    mtvsrwz v2, r3
 ; P9BE-NEXT:    vperm v2, v2, v4, v5
-; P9BE-NEXT:    vmrghw v2, v3, v2
+; P9BE-NEXT:    xxmrghw v2, v3, v2
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_urem_power_of_two:
@@ -785,7 +785,7 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P8LE-NEXT:    sub r3, r5, r3
 ; P8LE-NEXT:    mtvsrd v4, r3
 ; P8LE-NEXT:    vmrghh v3, v4, v5
-; P8LE-NEXT:    vmrglw v2, v3, v2
+; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_urem_power_of_two:
@@ -817,7 +817,7 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P8BE-NEXT:    vperm v2, v4, v2, v3
 ; P8BE-NEXT:    mtvsrwz v5, r3
 ; P8BE-NEXT:    vperm v3, v0, v5, v3
-; P8BE-NEXT:    vmrghw v2, v2, v3
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
   ret <4 x i16> %1
@@ -862,7 +862,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    li r3, 0
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
-; P9LE-NEXT:    vmrglw v2, v3, v2
+; P9LE-NEXT:    xxmrglw v2, v3, v2
 ; P9LE-NEXT:    blr
 ;
 ; P9BE-LABEL: dont_fold_urem_one:
@@ -905,7 +905,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    mtvsrwz v4, r3
 ; P9BE-NEXT:    vperm v2, v4, v2, v5
-; P9BE-NEXT:    vmrghw v2, v2, v3
+; P9BE-NEXT:    xxmrghw v2, v2, v3
 ; P9BE-NEXT:    blr
 ;
 ; P8LE-LABEL: dont_fold_urem_one:
@@ -944,7 +944,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P8LE-NEXT:    mtvsrd v5, r3
 ; P8LE-NEXT:    vmrghh v2, v3, v2
 ; P8LE-NEXT:    vmrghh v3, v5, v4
-; P8LE-NEXT:    vmrglw v2, v3, v2
+; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_urem_one:
@@ -985,7 +985,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P8BE-NEXT:    mtvsrwz v0, r3
 ; P8BE-NEXT:    vperm v4, v5, v4, v3
 ; P8BE-NEXT:    vperm v2, v2, v0, v3
-; P8BE-NEXT:    vmrghw v2, v2, v4
+; P8BE-NEXT:    xxmrghw v2, v2, v4
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
   ret <4 x i16> %1

diff  --git a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
index 9824d9e99fde0..7f49208b9a292 100644
--- a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
@@ -36,31 +36,31 @@ define dso_local <8 x i8> @test8x32(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
 ; CHECK-BE-NEXT:    stw r4, -48(r1)
 ; CHECK-BE-NEXT:    stw r3, -64(r1)
 ; CHECK-BE-NEXT:    addi r3, r1, -80
-; CHECK-BE-NEXT:    lxvw4x v2, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -96
-; CHECK-BE-NEXT:    lxvw4x v3, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -112
-; CHECK-BE-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs2, 0, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -128
-; CHECK-BE-NEXT:    lxvw4x v5, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs3, 0, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -16
-; CHECK-BE-NEXT:    lxvw4x v0, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs4, 0, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -32
-; CHECK-BE-NEXT:    lxvw4x v1, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs5, 0, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -48
-; CHECK-BE-NEXT:    lxvw4x v6, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs6, 0, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -64
-; CHECK-BE-NEXT:    lxvw4x v7, 0, r3
+; CHECK-BE-NEXT:    lxvw4x vs7, 0, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
-; CHECK-BE-NEXT:    vmrghw v4, v1, v0
+; CHECK-BE-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghw vs1, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghw vs2, vs5, vs4
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
-; CHECK-BE-NEXT:    lxvw4x v8, 0, r3
-; CHECK-BE-NEXT:    vmrghw v5, v7, v6
-; CHECK-BE-NEXT:    xxmrghd v3, v5, v4
-; CHECK-BE-NEXT:    vperm v2, v3, v2, v8
+; CHECK-BE-NEXT:    xxmrghd v3, vs1, vs0
+; CHECK-BE-NEXT:    lxvw4x v2, 0, r3
+; CHECK-BE-NEXT:    xxmrghw vs3, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd v4, vs3, vs2
+; CHECK-BE-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-BE-NEXT:    blr
 %v10 = insertelement <8 x i32> undef, i32 %i1, i32 0
 %v11 = insertelement <8 x i32> %v10, i32 %i2, i32 1
@@ -127,9 +127,9 @@ define dso_local <8 x i16> @test8x24(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5
 ; CHECK-NEXT:    mtvsrd v5, r10
 ; CHECK-NEXT:    vmrghh v0, v1, v0
 ; CHECK-NEXT:    vmrghh v3, v5, v3
-; CHECK-NEXT:    vmrglw v2, v4, v2
-; CHECK-NEXT:    vmrglw v3, v3, v0
-; CHECK-NEXT:    xxmrgld v2, v3, v2
+; CHECK-NEXT:    xxmrglw vs0, v4, v2
+; CHECK-NEXT:    xxmrglw vs1, v3, v0
+; CHECK-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8x24:
@@ -161,10 +161,10 @@ define dso_local <8 x i16> @test8x24(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    vmrghh v3, v5, v4
 ; CHECK-BE-NEXT:    vmrghh v4, v1, v0
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v3, v2
 ; CHECK-BE-NEXT:    vmrghh v5, v7, v6
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
-; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-NEXT:    xxmrghw vs1, v5, v4
+; CHECK-BE-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-BE-NEXT:    blr
 %i11 = trunc i32 %i1 to i24
 %i21 = trunc i32 %i2 to i24

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
index 71a9484000a5c..2fec5286d1142 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
@@ -96,8 +96,8 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    mtvsrd v5, r3
 ; CHECK-P8-NEXT:    vmrghh v3, v4, v3
 ; CHECK-P8-NEXT:    vmrghh v2, v2, v5
-; CHECK-P8-NEXT:    vmrglw v2, v2, v3
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -124,8 +124,8 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    vmrghh v2, v4, v2
-; CHECK-P9-NEXT:    vmrglw v2, v2, v3
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
@@ -154,8 +154,8 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    vperm v2, v5, v2, v3
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v4
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = fptoui <4 x float> %a to <4 x i16>
@@ -209,11 +209,11 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-P8-NEXT:    mtvsrd v5, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    vmrghh v5, v0, v5
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    mtvsrd v1, r3
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    vmrghh v4, v4, v1
-; CHECK-P8-NEXT:    vmrglw v3, v4, v5
-; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    xxmrglw vs1, v4, v5
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -232,39 +232,39 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs0, vs0, 3
 ; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xxmrglw vs1, v3, v2
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
+; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghh v3, v4, v3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghh v4, v4, v5
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
@@ -286,39 +286,39 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
 ; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxmrghw vs1, v4, v3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs0
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, <8 x float>* %0, align 32
@@ -413,223 +413,223 @@ define void @test16elt(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.resul
 ; CHECK-P8-NEXT:    vmrghh v0, v0, v7
 ; CHECK-P8-NEXT:    mtvsrd v7, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
 ; CHECK-P8-NEXT:    vmrghh v4, v8, v4
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    vmrghh v1, v1, v9
+; CHECK-P8-NEXT:    xxmrglw vs1, v0, v5
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
 ; CHECK-P8-NEXT:    vmrghh v7, v8, v7
 ; CHECK-P8-NEXT:    vmrghh v6, v6, v9
-; CHECK-P8-NEXT:    vmrglw v2, v2, v3
-; CHECK-P8-NEXT:    vmrglw v3, v0, v5
-; CHECK-P8-NEXT:    vmrglw v4, v1, v4
-; CHECK-P8-NEXT:    vmrglw v5, v6, v7
-; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    xxmrglw vs2, v1, v4
 ; CHECK-P8-NEXT:    stvx v2, 0, r3
-; CHECK-P8-NEXT:    xxmrgld v3, v5, v4
+; CHECK-P8-NEXT:    xxmrglw vs3, v6, v7
+; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
 ; CHECK-P8-NEXT:    stvx v3, r3, r5
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 0(r4)
-; CHECK-P9-NEXT:    lxv vs1, 16(r4)
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
-; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxswapd vs4, vs2
-; CHECK-P9-NEXT:    xscvspdpn f5, vs2
-; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT:    xxsldwi vs6, vs1, vs1, 3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xscvspdpn f4, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
 ; CHECK-P9-NEXT:    mtvsrd v2, r5
-; CHECK-P9-NEXT:    mffprwz r5, f4
-; CHECK-P9-NEXT:    xscvdpsxws f4, f5
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f4
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
 ; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mffprwz r5, f4
-; CHECK-P9-NEXT:    xscvspdpn f4, vs6
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs5
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mtvsrd v4, r5
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mffprwz r5, f1
 ; CHECK-P9-NEXT:    mtvsrd v4, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    xxsldwi vs3, vs0, vs0, 3
 ; CHECK-P9-NEXT:    mtvsrd v5, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs3
-; CHECK-P9-NEXT:    vmrghh v4, v5, v4
-; CHECK-P9-NEXT:    mtvsrd v5, r5
-; CHECK-P9-NEXT:    mffprwz r5, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
 ; CHECK-P9-NEXT:    mtvsrd v0, r5
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghh v5, v5, v0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglw v3, v5, v4
-; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    mffprwz r5, f0
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    vmrghh v5, v0, v5
 ; CHECK-P9-NEXT:    mtvsrd v0, r5
+; CHECK-P9-NEXT:    vmrghh v4, v4, v0
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvspdpn f4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r5, f1
 ; CHECK-P9-NEXT:    lxv vs1, 48(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mtvsrd v1, r5
-; CHECK-P9-NEXT:    vmrghh v0, v1, v0
-; CHECK-P9-NEXT:    mffprwz r4, f2
-; CHECK-P9-NEXT:    xxmrgld vs2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v4, r4
+; CHECK-P9-NEXT:    mffprwz r4, f4
+; CHECK-P9-NEXT:    mtvsrd v2, r5
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    xxmrglw vs3, v4, v5
+; CHECK-P9-NEXT:    mtvsrd v3, r5
+; CHECK-P9-NEXT:    xxmrgld vs2, vs3, vs2
+; CHECK-P9-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v3, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    mtvsrd v4, r4
 ; CHECK-P9-NEXT:    stxv vs2, 0(r3)
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
 ; CHECK-P9-NEXT:    mtvsrd v2, r4
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    vmrghh v2, v4, v2
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrglw v2, v2, v0
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    xxswapd vs0, vs1
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    xscvspdpn f0, vs1
-; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v3, v4, v3
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f1
 ; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r4
-; CHECK-P9-NEXT:    vmrghh v4, v4, v5
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xxmrglw vs1, v3, v2
+; CHECK-P9-NEXT:    xxmrgld vs0, vs1, vs0
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 16(r4)
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
 ; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
 ; CHECK-BE-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxswapd vs4, vs2
-; CHECK-BE-NEXT:    xscvspdpn f5, vs2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-BE-NEXT:    xxsldwi vs6, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xscvspdpn f4, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs0
 ; CHECK-BE-NEXT:    mtvsrwz v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f5
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f4
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrwz v4, r5
 ; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    xscvspdpn f4, vs6
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs5
 ; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mtvsrwz v5, r5
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    mtvsrwz v5, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs0, vs0, 3
 ; CHECK-BE-NEXT:    mtvsrwz v0, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs3
-; CHECK-BE-NEXT:    vperm v5, v0, v5, v2
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
 ; CHECK-BE-NEXT:    mtvsrwz v1, r5
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v0, v0, v1, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghw v4, v0, v5
-; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    mffprwz r5, f0
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    vperm v0, v1, v0, v2
 ; CHECK-BE-NEXT:    mtvsrwz v1, r5
+; CHECK-BE-NEXT:    vperm v5, v5, v1, v2
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvspdpn f4, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    lxv vs1, 32(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrwz v6, r5
-; CHECK-BE-NEXT:    vperm v1, v6, v1, v2
-; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    xxmrghd vs2, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    mtvsrwz v3, r5
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    xxmrghw vs3, v5, v0
+; CHECK-BE-NEXT:    mtvsrwz v4, r5
+; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs1, vs1, 3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xxmrghw vs0, v4, v3
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
 ; CHECK-BE-NEXT:    mtvsrwz v3, r4
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    vperm v3, v5, v3, v2
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghw v3, v3, v1
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    xxswapd vs0, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    xscvspdpn f0, vs1
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r4
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd vs0, v2, v3
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    xxmrghw vs1, v2, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -726,8 +726,8 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    mtvsrd v5, r3
 ; CHECK-P8-NEXT:    vmrghh v3, v4, v3
 ; CHECK-P8-NEXT:    vmrghh v2, v2, v5
-; CHECK-P8-NEXT:    vmrglw v2, v2, v3
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -754,8 +754,8 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    vmrghh v2, v4, v2
-; CHECK-P9-NEXT:    vmrglw v2, v2, v3
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
@@ -784,8 +784,8 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    mtvsrwz v2, r3
 ; CHECK-BE-NEXT:    vperm v2, v5, v2, v3
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v4
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = fptosi <4 x float> %a to <4 x i16>
@@ -839,11 +839,11 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-P8-NEXT:    mtvsrd v5, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    vmrghh v5, v0, v5
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    mtvsrd v1, r3
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    vmrghh v4, v4, v1
-; CHECK-P8-NEXT:    vmrglw v3, v4, v5
-; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    xxmrglw vs1, v4, v5
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -862,39 +862,39 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs0, vs0, 3
 ; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xxmrglw vs1, v3, v2
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
+; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghh v3, v4, v3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghh v4, v4, v5
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
@@ -916,39 +916,39 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
 ; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxmrghw vs1, v4, v3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs0
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, <8 x float>* %0, align 32
@@ -1043,223 +1043,223 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret(<16 x i16>) %ag
 ; CHECK-P8-NEXT:    vmrghh v0, v0, v7
 ; CHECK-P8-NEXT:    mtvsrd v7, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
 ; CHECK-P8-NEXT:    vmrghh v4, v8, v4
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    vmrghh v1, v1, v9
+; CHECK-P8-NEXT:    xxmrglw vs1, v0, v5
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
 ; CHECK-P8-NEXT:    vmrghh v7, v8, v7
 ; CHECK-P8-NEXT:    vmrghh v6, v6, v9
-; CHECK-P8-NEXT:    vmrglw v2, v2, v3
-; CHECK-P8-NEXT:    vmrglw v3, v0, v5
-; CHECK-P8-NEXT:    vmrglw v4, v1, v4
-; CHECK-P8-NEXT:    vmrglw v5, v6, v7
-; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    xxmrglw vs2, v1, v4
 ; CHECK-P8-NEXT:    stvx v2, 0, r3
-; CHECK-P8-NEXT:    xxmrgld v3, v5, v4
+; CHECK-P8-NEXT:    xxmrglw vs3, v6, v7
+; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
 ; CHECK-P8-NEXT:    stvx v3, r3, r5
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 0(r4)
-; CHECK-P9-NEXT:    lxv vs1, 16(r4)
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
-; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxswapd vs4, vs2
-; CHECK-P9-NEXT:    xscvspdpn f5, vs2
-; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT:    xxsldwi vs6, vs1, vs1, 3
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xscvspdpn f4, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
 ; CHECK-P9-NEXT:    mtvsrd v2, r5
-; CHECK-P9-NEXT:    mffprwz r5, f4
-; CHECK-P9-NEXT:    xscvdpsxws f4, f5
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f4
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
 ; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mffprwz r5, f4
-; CHECK-P9-NEXT:    xscvspdpn f4, vs6
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs5
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs1
-; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mtvsrd v4, r5
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mffprwz r5, f1
 ; CHECK-P9-NEXT:    mtvsrd v4, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    xxsldwi vs3, vs0, vs0, 3
 ; CHECK-P9-NEXT:    mtvsrd v5, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs3
-; CHECK-P9-NEXT:    vmrghh v4, v5, v4
-; CHECK-P9-NEXT:    mtvsrd v5, r5
-; CHECK-P9-NEXT:    mffprwz r5, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
 ; CHECK-P9-NEXT:    mtvsrd v0, r5
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghh v5, v5, v0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglw v3, v5, v4
-; CHECK-P9-NEXT:    mffprwz r5, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    mffprwz r5, f0
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    vmrghh v5, v0, v5
 ; CHECK-P9-NEXT:    mtvsrd v0, r5
+; CHECK-P9-NEXT:    vmrghh v4, v4, v0
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvspdpn f4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r5, f1
 ; CHECK-P9-NEXT:    lxv vs1, 48(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mtvsrd v1, r5
-; CHECK-P9-NEXT:    vmrghh v0, v1, v0
-; CHECK-P9-NEXT:    mffprwz r4, f2
-; CHECK-P9-NEXT:    xxmrgld vs2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v4, r4
+; CHECK-P9-NEXT:    mffprwz r4, f4
+; CHECK-P9-NEXT:    mtvsrd v2, r5
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    xxmrglw vs3, v4, v5
+; CHECK-P9-NEXT:    mtvsrd v3, r5
+; CHECK-P9-NEXT:    xxmrgld vs2, vs3, vs2
+; CHECK-P9-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v3, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    mtvsrd v4, r4
 ; CHECK-P9-NEXT:    stxv vs2, 0(r3)
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
 ; CHECK-P9-NEXT:    mtvsrd v2, r4
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    vmrghh v2, v4, v2
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrglw v2, v2, v0
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    xxswapd vs0, vs1
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    mtvsrd v3, r4
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    xscvspdpn f0, vs1
-; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v3, v4, v3
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f1
 ; CHECK-P9-NEXT:    mtvsrd v4, r4
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r4
-; CHECK-P9-NEXT:    vmrghh v4, v4, v5
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xxmrglw vs1, v3, v2
+; CHECK-P9-NEXT:    xxmrgld vs0, vs1, vs0
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 16(r4)
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
 ; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
 ; CHECK-BE-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxswapd vs4, vs2
-; CHECK-BE-NEXT:    xscvspdpn f5, vs2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-BE-NEXT:    xxsldwi vs6, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xscvspdpn f4, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs0
 ; CHECK-BE-NEXT:    mtvsrwz v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f5
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f4
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrwz v4, r5
 ; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    xscvspdpn f4, vs6
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs5
 ; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs1
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mtvsrwz v5, r5
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    mtvsrwz v5, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    xxsldwi vs3, vs0, vs0, 3
 ; CHECK-BE-NEXT:    mtvsrwz v0, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs3
-; CHECK-BE-NEXT:    vperm v5, v0, v5, v2
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r5, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
 ; CHECK-BE-NEXT:    mtvsrwz v1, r5
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v0, v0, v1, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghw v4, v0, v5
-; CHECK-BE-NEXT:    mffprwz r5, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    mffprwz r5, f0
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    vperm v0, v1, v0, v2
 ; CHECK-BE-NEXT:    mtvsrwz v1, r5
+; CHECK-BE-NEXT:    vperm v5, v5, v1, v2
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvspdpn f4, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    lxv vs1, 32(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mtvsrwz v6, r5
-; CHECK-BE-NEXT:    vperm v1, v6, v1, v2
-; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    xxmrghd vs2, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    mffprwz r4, f4
+; CHECK-BE-NEXT:    mtvsrwz v3, r5
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    xxmrghw vs3, v5, v0
+; CHECK-BE-NEXT:    mtvsrwz v4, r5
+; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs1, vs1, 3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xxmrghw vs0, v4, v3
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
 ; CHECK-BE-NEXT:    mtvsrwz v3, r4
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    vperm v3, v5, v3, v2
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vmrghw v3, v3, v1
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    xxswapd vs0, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    xscvspdpn f0, vs1
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    xxsldwi vs0, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    mtvsrwz v5, r4
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r4
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd vs0, v2, v3
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    xxmrghw vs1, v2, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
index ef6f011169254..5160ad2759918 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
@@ -222,8 +222,8 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    vmrghb v4, v4, v1
 ; CHECK-P8-NEXT:    vmrglh v3, v4, v5
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -275,8 +275,8 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P9-NEXT:    mtvsrd v5, r3
 ; CHECK-P9-NEXT:    vmrghb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
@@ -330,8 +330,8 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, <8 x float>* %0, align 32
@@ -438,9 +438,9 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    vmrglh v2, v5, v2
 ; CHECK-P8-NEXT:    vmrglh v4, v1, v0
 ; CHECK-P8-NEXT:    vmrglh v5, v6, v7
-; CHECK-P8-NEXT:    vmrglw v2, v2, v3
-; CHECK-P8-NEXT:    vmrglw v3, v5, v4
-; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -483,63 +483,63 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
 ; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    vmrghb v3, v4, v3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    xxsldwi vs3, vs1, vs1, 3
 ; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs1
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghb v3, v4, v3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v4, v5, v4
+; CHECK-P9-NEXT:    vmrghb v3, v4, v3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v0, r3
-; CHECK-P9-NEXT:    vmrghb v5, v5, v0
-; CHECK-P9-NEXT:    vmrglh v4, v5, v4
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
@@ -585,63 +585,63 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
 ; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
 ; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v5, v0, v5, v2
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v1, r3
-; CHECK-BE-NEXT:    vperm v2, v0, v1, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v5
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x float>, <16 x float>* %0, align 64
@@ -862,8 +862,8 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    vmrghb v4, v4, v1
 ; CHECK-P8-NEXT:    vmrglh v3, v4, v5
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -915,8 +915,8 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    mtvsrd v5, r3
 ; CHECK-P9-NEXT:    vmrghb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
@@ -970,8 +970,8 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, <8 x float>* %0, align 32
@@ -1078,9 +1078,9 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-P8-NEXT:    vmrglh v2, v5, v2
 ; CHECK-P8-NEXT:    vmrglh v4, v1, v0
 ; CHECK-P8-NEXT:    vmrglh v5, v6, v7
-; CHECK-P8-NEXT:    vmrglw v2, v2, v3
-; CHECK-P8-NEXT:    vmrglw v3, v5, v4
-; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
@@ -1123,63 +1123,63 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
 ; CHECK-P9-NEXT:    mtvsrd v4, r3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    vmrghb v3, v4, v3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    xxsldwi vs3, vs1, vs1, 3
 ; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    vmrghb v4, v4, v5
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    mtvsrd v3, r3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xscvspdpn f2, vs1
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs1
+; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v2, r3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghb v3, v4, v3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v4, v5, v4
+; CHECK-P9-NEXT:    vmrghb v3, v4, v3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v0, r3
-; CHECK-P9-NEXT:    vmrghb v5, v5, v0
-; CHECK-P9-NEXT:    vmrglh v4, v5, v4
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
@@ -1225,63 +1225,63 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
 ; CHECK-BE-NEXT:    mtvsrwz v5, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
 ; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v5, v0, v5, v2
+; CHECK-BE-NEXT:    vperm v4, v5, v4, v2
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v1, r3
-; CHECK-BE-NEXT:    vperm v2, v0, v1, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v5
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x float>, <16 x float>* %0, align 64

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
index 93ba5b84dc7f8..d0f75b84f07a3 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
@@ -82,8 +82,8 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    vmrghh v2, v4, v2
 ; CHECK-P8-NEXT:    vmrghh v3, v5, v3
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -107,8 +107,8 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
@@ -134,8 +134,8 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <4 x double>, <4 x double>* %0, align 32
@@ -186,9 +186,9 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
 ; CHECK-P8-NEXT:    vmrghh v4, v0, v4
 ; CHECK-P8-NEXT:    vmrghh v5, v1, v5
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    vmrglw v3, v5, v4
-; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -209,29 +209,29 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghh v4, v4, v5
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
@@ -255,29 +255,29 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, <8 x double>* %0, align 64
@@ -363,188 +363,188 @@ define void @test16elt(<16 x i16>* noalias nocapture sret(<16 x i16>) %agg.resul
 ; CHECK-P8-NEXT:    vmrghh v0, v8, v0
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    vmrghh v1, v9, v1
+; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
 ; CHECK-P8-NEXT:    vmrghh v6, v8, v6
 ; CHECK-P8-NEXT:    vmrghh v7, v9, v7
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    vmrglw v3, v5, v4
-; CHECK-P8-NEXT:    vmrglw v4, v1, v0
-; CHECK-P8-NEXT:    vmrglw v5, v7, v6
-; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    xxmrglw vs2, v1, v0
 ; CHECK-P8-NEXT:    stvx v2, 0, r3
-; CHECK-P8-NEXT:    xxmrgld v3, v5, v4
+; CHECK-P8-NEXT:    xxmrglw vs3, v7, v6
+; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
 ; CHECK-P8-NEXT:    stvx v3, r3, r5
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs3, 0(r4)
-; CHECK-P9-NEXT:    lxv vs2, 16(r4)
-; CHECK-P9-NEXT:    lxv vs1, 32(r4)
-; CHECK-P9-NEXT:    lxv vs0, 48(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xscvdpsxws f5, f2
-; CHECK-P9-NEXT:    xscvdpsxws f6, f1
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f7, f0
+; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs1, 16(r4)
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xscvdpsxws f4, f1
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f5, f0
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    lxv vs3, 64(r4)
 ; CHECK-P9-NEXT:    mtvsrd v2, r5
-; CHECK-P9-NEXT:    mffprwz r5, f5
+; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    lxv vs4, 48(r4)
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    mffprwz r5, f6
+; CHECK-P9-NEXT:    mffprwz r5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f7, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    mtvsrd v4, r5
-; CHECK-P9-NEXT:    mffprwz r5, f7
-; CHECK-P9-NEXT:    mtvsrd v5, r5
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    lxv vs3, 64(r4)
-; CHECK-P9-NEXT:    mtvsrd v0, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f2
 ; CHECK-P9-NEXT:    lxv vs2, 80(r4)
-; CHECK-P9-NEXT:    vmrghh v2, v2, v0
-; CHECK-P9-NEXT:    mtvsrd v0, r5
+; CHECK-P9-NEXT:    xscvdpsxws f5, f4
+; CHECK-P9-NEXT:    xxswapd vs4, vs4
+; CHECK-P9-NEXT:    mtvsrd v5, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    lxv vs1, 96(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    vmrghh v3, v3, v0
-; CHECK-P9-NEXT:    mtvsrd v0, r5
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    vmrghh v2, v2, v5
+; CHECK-P9-NEXT:    mtvsrd v5, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f0
 ; CHECK-P9-NEXT:    lxv vs0, 112(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    vmrghh v4, v4, v0
-; CHECK-P9-NEXT:    mtvsrd v0, r5
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    vmrghh v5, v5, v0
+; CHECK-P9-NEXT:    vmrghh v3, v3, v5
+; CHECK-P9-NEXT:    mtvsrd v5, r5
+; CHECK-P9-NEXT:    mffprwz r4, f5
+; CHECK-P9-NEXT:    vmrghh v4, v4, v5
+; CHECK-P9-NEXT:    xxmrglw vs6, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f4
-; CHECK-P9-NEXT:    vmrglw v4, v5, v4
+; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f7
+; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mtvsrd v3, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    xxmrgld vs4, v4, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xxmrglw vs4, v2, v4
+; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    stxv vs4, 0(r3)
+; CHECK-P9-NEXT:    xxmrgld vs4, vs4, vs6
 ; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    stxv vs4, 0(r3)
 ; CHECK-P9-NEXT:    mffprwz r4, f2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v4, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mtvsrd v4, r4
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r4, f2
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v4, r4
+; CHECK-P9-NEXT:    mtvsrd v3, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r4
+; CHECK-P9-NEXT:    mtvsrd v3, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r4
-; CHECK-P9-NEXT:    vmrghh v4, v4, v5
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v4, r4
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    xxmrgld vs0, vs0, vs2
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs3, 48(r4)
-; CHECK-BE-NEXT:    lxv vs2, 32(r4)
-; CHECK-BE-NEXT:    lxv vs1, 16(r4)
-; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    lxv vs2, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
 ; CHECK-BE-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
-; CHECK-BE-NEXT:    xscvdpsxws f5, f2
-; CHECK-BE-NEXT:    xscvdpsxws f6, f1
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f7, f0
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f1
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f5, f0
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    lxv vs3, 112(r4)
 ; CHECK-BE-NEXT:    mtvsrwz v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    lxv vs4, 0(r4)
 ; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f6
+; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f7, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    mtvsrwz v5, r5
-; CHECK-BE-NEXT:    mffprwz r5, f7
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    lxv vs3, 112(r4)
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    lxv vs2, 96(r4)
-; CHECK-BE-NEXT:    vperm v3, v3, v1, v2
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs4
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    lxv vs1, 80(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    vperm v4, v4, v1, v2
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    vperm v3, v3, v0, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f0
 ; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vperm v5, v5, v1, v2
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    vperm v0, v0, v1, v2
+; CHECK-BE-NEXT:    vperm v4, v4, v0, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    xxmrghw vs6, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f4
-; CHECK-BE-NEXT:    vmrghw v5, v0, v5
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mffprwz r4, f7
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    xxmrghd vs4, v5, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xxmrghw vs4, v3, v5
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
+; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs6
 ; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r4
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd vs0, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -627,8 +627,8 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    vmrghh v2, v4, v2
 ; CHECK-P8-NEXT:    vmrghh v3, v5, v3
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -652,8 +652,8 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    mffprwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
@@ -679,8 +679,8 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <4 x double>, <4 x double>* %0, align 32
@@ -731,9 +731,9 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
 ; CHECK-P8-NEXT:    vmrghh v4, v0, v4
 ; CHECK-P8-NEXT:    vmrghh v5, v1, v5
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    vmrglw v3, v5, v4
-; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -754,29 +754,29 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
-; CHECK-P9-NEXT:    vmrghh v4, v4, v5
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
@@ -800,29 +800,29 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    mffprwz r3, f3
+; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs2
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, <8 x double>* %0, align 64
@@ -908,188 +908,188 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret(<16 x i16>) %ag
 ; CHECK-P8-NEXT:    vmrghh v0, v8, v0
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    vmrghh v1, v9, v1
+; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
 ; CHECK-P8-NEXT:    vmrghh v6, v8, v6
 ; CHECK-P8-NEXT:    vmrghh v7, v9, v7
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    vmrglw v3, v5, v4
-; CHECK-P8-NEXT:    vmrglw v4, v1, v0
-; CHECK-P8-NEXT:    vmrglw v5, v7, v6
-; CHECK-P8-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-P8-NEXT:    xxmrglw vs2, v1, v0
 ; CHECK-P8-NEXT:    stvx v2, 0, r3
-; CHECK-P8-NEXT:    xxmrgld v3, v5, v4
+; CHECK-P8-NEXT:    xxmrglw vs3, v7, v6
+; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
 ; CHECK-P8-NEXT:    stvx v3, r3, r5
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs3, 0(r4)
-; CHECK-P9-NEXT:    lxv vs2, 16(r4)
-; CHECK-P9-NEXT:    lxv vs1, 32(r4)
-; CHECK-P9-NEXT:    lxv vs0, 48(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xscvdpsxws f5, f2
-; CHECK-P9-NEXT:    xscvdpsxws f6, f1
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f7, f0
+; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs1, 16(r4)
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xscvdpsxws f4, f1
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f5, f0
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    mffprwz r5, f3
+; CHECK-P9-NEXT:    lxv vs3, 64(r4)
 ; CHECK-P9-NEXT:    mtvsrd v2, r5
-; CHECK-P9-NEXT:    mffprwz r5, f5
+; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    lxv vs4, 48(r4)
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
-; CHECK-P9-NEXT:    mffprwz r5, f6
+; CHECK-P9-NEXT:    mffprwz r5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f7, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    mtvsrd v4, r5
-; CHECK-P9-NEXT:    mffprwz r5, f7
-; CHECK-P9-NEXT:    mtvsrd v5, r5
-; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    lxv vs3, 64(r4)
-; CHECK-P9-NEXT:    mtvsrd v0, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f2
 ; CHECK-P9-NEXT:    lxv vs2, 80(r4)
-; CHECK-P9-NEXT:    vmrghh v2, v2, v0
-; CHECK-P9-NEXT:    mtvsrd v0, r5
+; CHECK-P9-NEXT:    xscvdpsxws f5, f4
+; CHECK-P9-NEXT:    xxswapd vs4, vs4
+; CHECK-P9-NEXT:    mtvsrd v5, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    lxv vs1, 96(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    vmrghh v3, v3, v0
-; CHECK-P9-NEXT:    mtvsrd v0, r5
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    vmrghh v2, v2, v5
+; CHECK-P9-NEXT:    mtvsrd v5, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f0
 ; CHECK-P9-NEXT:    lxv vs0, 112(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    vmrghh v4, v4, v0
-; CHECK-P9-NEXT:    mtvsrd v0, r5
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    vmrghh v5, v5, v0
+; CHECK-P9-NEXT:    vmrghh v3, v3, v5
+; CHECK-P9-NEXT:    mtvsrd v5, r5
+; CHECK-P9-NEXT:    mffprwz r4, f5
+; CHECK-P9-NEXT:    vmrghh v4, v4, v5
+; CHECK-P9-NEXT:    xxmrglw vs6, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f4
-; CHECK-P9-NEXT:    vmrglw v4, v5, v4
+; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f7
+; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mtvsrd v3, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    xxmrgld vs4, v4, v2
-; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xxmrglw vs4, v2, v4
+; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    vmrghh v2, v3, v2
-; CHECK-P9-NEXT:    stxv vs4, 0(r3)
+; CHECK-P9-NEXT:    xxmrgld vs4, vs4, vs6
 ; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    stxv vs4, 0(r3)
 ; CHECK-P9-NEXT:    mffprwz r4, f2
-; CHECK-P9-NEXT:    xscvdpsxws f2, f1
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v4, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mtvsrd v4, r4
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v4
-; CHECK-P9-NEXT:    mffprwz r4, f2
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    xxmrglw vs2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v4, r4
+; CHECK-P9-NEXT:    mtvsrd v3, r4
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd v4, r4
+; CHECK-P9-NEXT:    mtvsrd v3, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd v5, r4
-; CHECK-P9-NEXT:    vmrghh v4, v4, v5
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v4, r4
+; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    xxmrgld vs0, vs0, vs2
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs3, 48(r4)
-; CHECK-BE-NEXT:    lxv vs2, 32(r4)
-; CHECK-BE-NEXT:    lxv vs1, 16(r4)
-; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    lxv vs2, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
 ; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
 ; CHECK-BE-NEXT:    lxv v2, 0(r5)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
-; CHECK-BE-NEXT:    xscvdpsxws f5, f2
-; CHECK-BE-NEXT:    xscvdpsxws f6, f1
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f7, f0
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f1
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f5, f0
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    mffprwz r5, f3
+; CHECK-BE-NEXT:    lxv vs3, 112(r4)
 ; CHECK-BE-NEXT:    mtvsrwz v3, r5
-; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    mffprwz r5, f4
+; CHECK-BE-NEXT:    lxv vs4, 0(r4)
 ; CHECK-BE-NEXT:    mtvsrwz v4, r5
-; CHECK-BE-NEXT:    mffprwz r5, f6
+; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    xscvdpsxws f7, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    mtvsrwz v5, r5
-; CHECK-BE-NEXT:    mffprwz r5, f7
-; CHECK-BE-NEXT:    mtvsrwz v0, r5
-; CHECK-BE-NEXT:    mffprwz r5, f3
-; CHECK-BE-NEXT:    lxv vs3, 112(r4)
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    lxv vs2, 96(r4)
-; CHECK-BE-NEXT:    vperm v3, v3, v1, v2
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs4
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f1
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    lxv vs1, 80(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    vperm v4, v4, v1, v2
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    vperm v3, v3, v0, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f0
 ; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vperm v5, v5, v1, v2
-; CHECK-BE-NEXT:    mtvsrwz v1, r5
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    vperm v0, v0, v1, v2
+; CHECK-BE-NEXT:    vperm v4, v4, v0, v2
+; CHECK-BE-NEXT:    mtvsrwz v0, r5
+; CHECK-BE-NEXT:    mffprwz r4, f5
+; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    xxmrghw vs6, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f4
-; CHECK-BE-NEXT:    vmrghw v5, v0, v5
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mffprwz r4, f7
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    xxmrghd vs4, v5, v3
-; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xxmrghw vs4, v3, v5
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    vperm v3, v4, v3, v2
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
+; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs6
 ; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
 ; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
-; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-NEXT:    mffprwz r4, f3
+; CHECK-BE-NEXT:    xxmrghw vs2, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    mtvsrwz v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    mtvsrwz v0, r4
-; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd vs0, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v5, r4
+; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
index 242e9966827a2..959f3ce289989 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -13,20 +13,20 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    xscvdpuxws v2, v2
-; CHECK-P8-NEXT:    xscvdpuxws v3, f0
-; CHECK-P8-NEXT:    vmrghw v2, v2, v3
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpuxws f1, v2
+; CHECK-P8-NEXT:    xscvdpuxws f0, f0
+; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    xscvdpuxws v3, v2
-; CHECK-P9-NEXT:    xscvdpuxws v2, f0
-; CHECK-P9-NEXT:    vmrghw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xxswapd vs1, v2
+; CHECK-P9-NEXT:    xscvdpuxws f0, v2
+; CHECK-P9-NEXT:    xscvdpuxws f1, f1
+; CHECK-P9-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
@@ -293,20 +293,20 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    xscvdpsxws v2, v2
-; CHECK-P8-NEXT:    xscvdpsxws v3, f0
-; CHECK-P8-NEXT:    vmrghw v2, v2, v3
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f1, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    xscvdpsxws v3, v2
-; CHECK-P9-NEXT:    xscvdpsxws v2, f0
-; CHECK-P9-NEXT:    vmrghw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xxswapd vs1, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, v2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
index c43e93f7917e3..e15dbf6d2b61e 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
@@ -197,8 +197,8 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    vmrghb v5, v1, v5
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -242,8 +242,8 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    mtvsrd v5, r3
 ; CHECK-P9-NEXT:    vmrghb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
@@ -289,8 +289,8 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, <8 x double>* %0, align 64
@@ -385,9 +385,9 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-P8-NEXT:    vmrglh v5, v1, v0
 ; CHECK-P8-NEXT:    vmrglh v2, v2, v6
 ; CHECK-P8-NEXT:    vmrglh v3, v7, v3
-; CHECK-P8-NEXT:    vmrglw v4, v5, v4
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    xxmrgld v2, v2, v4
+; CHECK-P8-NEXT:    xxmrglw vs0, v5, v4
+; CHECK-P8-NEXT:    xxmrglw vs1, v3, v2
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -429,48 +429,48 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    mffprwz r3, f5
+; CHECK-P9-NEXT:    xscvdpsxws f5, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    mtvsrd v5, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mtvsrd v5, r3
 ; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    mffprwz r3, f5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xxmrglw vs4, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
+; CHECK-P9-NEXT:    vmrghb v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v0, r3
-; CHECK-P9-NEXT:    vmrghb v5, v5, v0
-; CHECK-P9-NEXT:    vmrglh v4, v5, v4
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs4
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
@@ -515,48 +515,48 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    mffprwz r3, f5
 ; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxmrghw vs4, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v1, r3
-; CHECK-BE-NEXT:    vperm v2, v0, v1, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v5
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs4
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, <16 x double>* %0, align 128
@@ -752,8 +752,8 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    vmrghb v5, v1, v5
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -797,8 +797,8 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    mtvsrd v5, r3
 ; CHECK-P9-NEXT:    vmrghb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
@@ -844,8 +844,8 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrd r3, v2
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, <8 x double>* %0, align 64
@@ -940,9 +940,9 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-P8-NEXT:    vmrglh v5, v1, v0
 ; CHECK-P8-NEXT:    vmrglh v2, v2, v6
 ; CHECK-P8-NEXT:    vmrglh v3, v7, v3
-; CHECK-P8-NEXT:    vmrglw v4, v5, v4
-; CHECK-P8-NEXT:    vmrglw v2, v3, v2
-; CHECK-P8-NEXT:    xxmrgld v2, v2, v4
+; CHECK-P8-NEXT:    xxmrglw vs0, v5, v4
+; CHECK-P8-NEXT:    xxmrglw vs1, v3, v2
+; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
@@ -984,48 +984,48 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    mffprwz r3, f5
+; CHECK-P9-NEXT:    xscvdpsxws f5, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    mtvsrd v5, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mtvsrd v5, r3
 ; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    mffprwz r3, f5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mffprwz r3, f4
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd v3, r3
+; CHECK-P9-NEXT:    xxmrglw vs4, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    vmrghb v3, v3, v4
+; CHECK-P9-NEXT:    vmrghb v2, v2, v3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    mffprwz r3, f2
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mtvsrd v4, r3
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v3, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    vmrghb v3, v3, v4
 ; CHECK-P9-NEXT:    mffprwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    mtvsrd v4, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd v0, r3
-; CHECK-P9-NEXT:    vmrghb v5, v5, v0
-; CHECK-P9-NEXT:    vmrglh v4, v5, v4
-; CHECK-P9-NEXT:    vmrglw v3, v4, v3
-; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
+; CHECK-P9-NEXT:    mtvsrd v5, r3
+; CHECK-P9-NEXT:    vmrghb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs4
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
@@ -1070,48 +1070,48 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
 ; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    mffprwz r3, f5
 ; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    mffprwz r3, f4
-; CHECK-BE-NEXT:    vmrghw v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-NEXT:    xxmrghw vs4, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
+; CHECK-BE-NEXT:    vperm v3, v3, v4, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    mtvsrwz v5, r3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    vperm v5, v5, v0, v2
+; CHECK-BE-NEXT:    vperm v4, v4, v5, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
-; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    mtvsrwz v5, r3
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    mtvsrwz v1, r3
-; CHECK-BE-NEXT:    vperm v2, v0, v1, v2
-; CHECK-BE-NEXT:    vmrghh v2, v2, v5
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    xxmrghd v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrwz v0, r3
+; CHECK-BE-NEXT:    vperm v2, v5, v0, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    xxmrghw vs0, v2, v3
+; CHECK-BE-NEXT:    xxmrghd v2, vs0, vs4
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, <16 x double>* %0, align 128

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index 023a7ba8a06e3..a6a8d36a400d5 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -22,10 +22,10 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprwz f1, r3
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xscvdpspn v2, f0
-; CHECK-P8-NEXT:    xscvdpspn v3, f1
-; CHECK-P8-NEXT:    vmrghw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -35,11 +35,11 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    vextractuh v3, v2, 14
 ; CHECK-P9-NEXT:    vextractuh v2, v2, 12
 ; CHECK-P9-NEXT:    xscvuxdsp f0, v3
-; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    xscvuxdsp f0, v2
-; CHECK-P9-NEXT:    xscvdpspn v2, f0
-; CHECK-P9-NEXT:    vmrghw v2, v2, v3
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xscvuxdsp f1, v2
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
@@ -229,10 +229,10 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprwa f1, r3
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xscvdpspn v2, f0
-; CHECK-P8-NEXT:    xscvdpspn v3, f1
-; CHECK-P8-NEXT:    vmrghw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -244,11 +244,11 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    vextsh2d v3, v3
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xscvsxdsp f0, v3
-; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    xscvsxdsp f0, v2
-; CHECK-P9-NEXT:    xscvdpspn v2, f0
-; CHECK-P9-NEXT:    vmrghw v2, v2, v3
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xscvsxdsp f1, v2
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
index 8a704f6b735e0..c68a0dc7b613f 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
@@ -15,22 +15,22 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvuxdsp f1, v2
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
-; CHECK-P8-NEXT:    xscvdpspn v3, f1
-; CHECK-P8-NEXT:    xscvdpspn v2, f0
-; CHECK-P8-NEXT:    vmrghw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvuxdsp f1, v2
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    xscvuxdsp f0, v2
-; CHECK-P9-NEXT:    xscvdpspn v2, f0
-; CHECK-P9-NEXT:    vmrghw v2, v2, v3
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
@@ -301,22 +301,22 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvsxdsp f1, v2
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
-; CHECK-P8-NEXT:    xscvdpspn v3, f1
-; CHECK-P8-NEXT:    xscvdpspn v2, f0
-; CHECK-P8-NEXT:    vmrghw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvsxdsp f1, v2
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    xscvsxdsp f0, v2
-; CHECK-P9-NEXT:    xscvdpspn v2, f0
-; CHECK-P9-NEXT:    vmrghw v2, v2, v3
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index d26f1f2d29d8a..81faab9defbca 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -22,10 +22,10 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprwz f1, r3
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    xscvuxdsp f1, f1
-; CHECK-P8-NEXT:    xscvdpspn v2, f0
-; CHECK-P8-NEXT:    xscvdpspn v3, f1
-; CHECK-P8-NEXT:    vmrghw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -35,11 +35,11 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    vextractub v3, v2, 15
 ; CHECK-P9-NEXT:    vextractub v2, v2, 14
 ; CHECK-P9-NEXT:    xscvuxdsp f0, v3
-; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    xscvuxdsp f0, v2
-; CHECK-P9-NEXT:    xscvdpspn v2, f0
-; CHECK-P9-NEXT:    vmrghw v2, v2, v3
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xscvuxdsp f1, v2
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
@@ -271,10 +271,10 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprwa f1, r3
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    xscvsxdsp f1, f1
-; CHECK-P8-NEXT:    xscvdpspn v2, f0
-; CHECK-P8-NEXT:    xscvdpspn v3, f1
-; CHECK-P8-NEXT:    vmrghw v2, v3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xscvdpspn vs0, f0
+; CHECK-P8-NEXT:    xscvdpspn vs1, f1
+; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -286,11 +286,11 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    vextsh2d v3, v3
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xscvsxdsp f0, v3
-; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    xscvsxdsp f0, v2
-; CHECK-P9-NEXT:    xscvdpspn v2, f0
-; CHECK-P9-NEXT:    vmrghw v2, v2, v3
-; CHECK-P9-NEXT:    mfvsrld r3, v2
+; CHECK-P9-NEXT:    xscvsxdsp f1, v2
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs1, f1
+; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-P9-NEXT:    mfvsrld r3, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
index 939b18e5384b0..e66851358d1e3 100644
--- a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
@@ -190,10 +190,10 @@ define double @test10(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-BE-LABEL: test10:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
-; CHECK-BE-NEXT:    vmrghw 3, 3, 3
-; CHECK-BE-NEXT:    lfs 0, .LCPI9_0 at toc@l(3)
-; CHECK-BE-NEXT:    vmrglw 2, 3, 2
-; CHECK-BE-NEXT:    xsadddp 1, 34, 0
+; CHECK-BE-NEXT:    xxmrghw 0, 35, 35
+; CHECK-BE-NEXT:    lfs 1, .LCPI9_0 at toc@l(3)
+; CHECK-BE-NEXT:    xxmrglw 0, 0, 34
+; CHECK-BE-NEXT:    xsadddp 1, 0, 1
 ; CHECK-BE-NEXT:    blr
 entry:
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 2, i32 3, i32 7>

diff  --git a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
index 06716aaabc486..8e5e5d95fbec4 100644
--- a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
@@ -315,18 +315,18 @@ define <8 x i16> @testInvalidExtend(<16 x i8> %a) {
 ; CHECK-LE-NEXT:    mtvsrd 36, 6
 ; CHECK-LE-NEXT:    extsb 9, 9
 ; CHECK-LE-NEXT:    extsb 10, 10
-; CHECK-LE-NEXT:    mtvsrd 37, 10
 ; CHECK-LE-NEXT:    vmrghh 2, 3, 2
 ; CHECK-LE-NEXT:    mtvsrd 35, 5
 ; CHECK-LE-NEXT:    vmrghh 3, 4, 3
-; CHECK-LE-NEXT:    mtvsrd 36, 8
-; CHECK-LE-NEXT:    vmrglw 2, 3, 2
-; CHECK-LE-NEXT:    mtvsrd 35, 7
+; CHECK-LE-NEXT:    mtvsrd 36, 10
+; CHECK-LE-NEXT:    xxmrglw 0, 35, 34
+; CHECK-LE-NEXT:    mtvsrd 34, 7
+; CHECK-LE-NEXT:    mtvsrd 35, 8
+; CHECK-LE-NEXT:    vmrghh 2, 3, 2
+; CHECK-LE-NEXT:    mtvsrd 35, 9
 ; CHECK-LE-NEXT:    vmrghh 3, 4, 3
-; CHECK-LE-NEXT:    mtvsrd 36, 9
-; CHECK-LE-NEXT:    vmrghh 4, 5, 4
-; CHECK-LE-NEXT:    vmrglw 3, 4, 3
-; CHECK-LE-NEXT:    xxmrgld 34, 35, 34
+; CHECK-LE-NEXT:    xxmrglw 1, 35, 34
+; CHECK-LE-NEXT:    xxmrgld 34, 1, 0
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testInvalidExtend:
@@ -359,20 +359,20 @@ define <8 x i16> @testInvalidExtend(<16 x i8> %a) {
 ; CHECK-BE-NEXT:    mtvsrwz 34, 10
 ; CHECK-BE-NEXT:    mtvsrwz 37, 7
 ; CHECK-BE-NEXT:    extsb 6, 6
-; CHECK-BE-NEXT:    mtvsrwz 32, 3
 ; CHECK-BE-NEXT:    addi 9, 9, .LCPI11_0 at toc@l
 ; CHECK-BE-NEXT:    lxv 36, 0(9)
 ; CHECK-BE-NEXT:    vperm 2, 3, 2, 4
 ; CHECK-BE-NEXT:    mtvsrwz 35, 8
 ; CHECK-BE-NEXT:    vperm 3, 5, 3, 4
-; CHECK-BE-NEXT:    mtvsrwz 37, 5
-; CHECK-BE-NEXT:    vmrghw 2, 3, 2
-; CHECK-BE-NEXT:    mtvsrwz 35, 6
+; CHECK-BE-NEXT:    mtvsrwz 37, 3
+; CHECK-BE-NEXT:    xxmrghw 0, 35, 34
+; CHECK-BE-NEXT:    mtvsrwz 34, 6
+; CHECK-BE-NEXT:    mtvsrwz 35, 5
+; CHECK-BE-NEXT:    vperm 2, 3, 2, 4
+; CHECK-BE-NEXT:    mtvsrwz 35, 4
 ; CHECK-BE-NEXT:    vperm 3, 5, 3, 4
-; CHECK-BE-NEXT:    mtvsrwz 37, 4
-; CHECK-BE-NEXT:    vperm 4, 0, 5, 4
-; CHECK-BE-NEXT:    vmrghw 3, 4, 3
-; CHECK-BE-NEXT:    xxmrghd 34, 35, 34
+; CHECK-BE-NEXT:    xxmrghw 1, 35, 34
+; CHECK-BE-NEXT:    xxmrghd 34, 1, 0
 ; CHECK-BE-NEXT:    blr
 entry:
 

diff  --git a/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll b/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
index 134f0449470a8..56ed28ab56cc2 100644
--- a/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
@@ -14,11 +14,11 @@ define void @VPKUDUM_unary(<2 x i64>* %A) {
 ;
 ; CHECK-PWR7-LABEL: VPKUDUM_unary:
 ; CHECK-PWR7:       # %bb.0: # %entry
-; CHECK-PWR7-NEXT:    lxvw4x 34, 0, 3
-; CHECK-PWR7-NEXT:    vmrglw 3, 2, 2
-; CHECK-PWR7-NEXT:    vmrghw 2, 2, 2
-; CHECK-PWR7-NEXT:    vmrglw 2, 2, 3
-; CHECK-PWR7-NEXT:    stxvw4x 34, 0, 3
+; CHECK-PWR7-NEXT:    lxvw4x 0, 0, 3
+; CHECK-PWR7-NEXT:    xxmrglw 1, 0, 0
+; CHECK-PWR7-NEXT:    xxmrghw 0, 0, 0
+; CHECK-PWR7-NEXT:    xxmrglw 0, 0, 1
+; CHECK-PWR7-NEXT:    stxvw4x 0, 0, 3
 ; CHECK-PWR7-NEXT:    blr
 entry:
         %tmp = load <2 x i64>, <2 x i64>* %A
@@ -45,12 +45,12 @@ define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) {
 ;
 ; CHECK-PWR7-LABEL: VPKUDUM:
 ; CHECK-PWR7:       # %bb.0: # %entry
-; CHECK-PWR7-NEXT:    lxvw4x 34, 0, 3
-; CHECK-PWR7-NEXT:    lxvw4x 35, 0, 4
-; CHECK-PWR7-NEXT:    vmrglw 4, 2, 3
-; CHECK-PWR7-NEXT:    vmrghw 2, 2, 3
-; CHECK-PWR7-NEXT:    vmrglw 2, 2, 4
-; CHECK-PWR7-NEXT:    stxvw4x 34, 0, 3
+; CHECK-PWR7-NEXT:    lxvw4x 0, 0, 3
+; CHECK-PWR7-NEXT:    lxvw4x 1, 0, 4
+; CHECK-PWR7-NEXT:    xxmrglw 2, 0, 1
+; CHECK-PWR7-NEXT:    xxmrghw 0, 0, 1
+; CHECK-PWR7-NEXT:    xxmrglw 0, 0, 2
+; CHECK-PWR7-NEXT:    stxvw4x 0, 0, 3
 ; CHECK-PWR7-NEXT:    blr
 entry:
         %tmp = load <2 x i64>, <2 x i64>* %A

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index ce195b734cca5..96eebccf13b1c 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -56,16 +56,16 @@ define <3 x float> @constrained_vector_fdiv_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    xscvspdpn 3, 3
+; PC64LE-NEXT:    xscvspdpn 4, 4
+; PC64LE-NEXT:    xscvspdpn 5, 5
 ; PC64LE-NEXT:    xsdivsp 1, 2, 1
-; PC64LE-NEXT:    xscvspdpn 2, 4
 ; PC64LE-NEXT:    xsdivsp 0, 3, 0
-; PC64LE-NEXT:    xscvspdpn 3, 5
-; PC64LE-NEXT:    xsdivsp 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 34, 1
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 2
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xsdivsp 2, 5, 4
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fdiv_v3f32:
@@ -88,10 +88,10 @@ define <3 x float> @constrained_vector_fdiv_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xsdivsp 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 1
-; PC64LE9-NEXT:    xscvdpspn 34, 2
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxmrghw 34, 1, 2
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    blr
 entry:
@@ -307,20 +307,20 @@ define <3 x float> @constrained_vector_frem_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    bl fmodf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -357,17 +357,17 @@ define <3 x float> @constrained_vector_frem_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    bl fmodf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 36, 0(3)
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 31
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -638,29 +638,29 @@ entry:
 define <3 x float> @constrained_vector_fmul_v3f32(<3 x float> %x, <3 x float> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fmul_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 35, 35, 3
-; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
+; PC64LE-NEXT:    xxswapd 0, 35
+; PC64LE-NEXT:    xxsldwi 1, 35, 35, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI12_0 at toc@ha
-; PC64LE-NEXT:    xxswapd 2, 35
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 3
 ; PC64LE-NEXT:    xxswapd 3, 34
 ; PC64LE-NEXT:    addi 3, 3, .LCPI12_0 at toc@l
 ; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
+; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    xscvspdpn 3, 3
 ; PC64LE-NEXT:    xscvspdpn 4, 4
-; PC64LE-NEXT:    xsmulsp 0, 1, 0
-; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    xsmulsp 2, 3, 2
-; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 2
-; PC64LE-NEXT:    xsmulsp 0, 1, 4
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xscvspdpn 5, 5
+; PC64LE-NEXT:    xsmulsp 1, 2, 1
+; PC64LE-NEXT:    xsmulsp 0, 3, 0
+; PC64LE-NEXT:    xsmulsp 2, 5, 4
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fmul_v3f32:
@@ -681,12 +681,12 @@ define <3 x float> @constrained_vector_fmul_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xsmulsp 1, 2, 1
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xsmulsp 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 1
-; PC64LE9-NEXT:    xscvdpspn 34, 2
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxmrghw 34, 1, 2
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    blr
 entry:
@@ -799,29 +799,29 @@ entry:
 define <3 x float> @constrained_vector_fadd_v3f32(<3 x float> %x, <3 x float> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fadd_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 35, 35, 3
-; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
+; PC64LE-NEXT:    xxswapd 0, 35
+; PC64LE-NEXT:    xxsldwi 1, 35, 35, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI17_0 at toc@ha
-; PC64LE-NEXT:    xxswapd 2, 35
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 3
 ; PC64LE-NEXT:    xxswapd 3, 34
 ; PC64LE-NEXT:    addi 3, 3, .LCPI17_0 at toc@l
 ; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
+; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    xscvspdpn 3, 3
 ; PC64LE-NEXT:    xscvspdpn 4, 4
-; PC64LE-NEXT:    xsaddsp 0, 1, 0
-; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    xsaddsp 2, 3, 2
-; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 2
-; PC64LE-NEXT:    xsaddsp 0, 1, 4
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xscvspdpn 5, 5
+; PC64LE-NEXT:    xsaddsp 1, 2, 1
+; PC64LE-NEXT:    xsaddsp 0, 3, 0
+; PC64LE-NEXT:    xsaddsp 2, 5, 4
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fadd_v3f32:
@@ -842,12 +842,12 @@ define <3 x float> @constrained_vector_fadd_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xsaddsp 1, 2, 1
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xsaddsp 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 1
-; PC64LE9-NEXT:    xscvdpspn 34, 2
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxmrghw 34, 1, 2
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    blr
 entry:
@@ -960,29 +960,29 @@ entry:
 define <3 x float> @constrained_vector_fsub_v3f32(<3 x float> %x, <3 x float> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fsub_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 35, 35, 3
-; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
+; PC64LE-NEXT:    xxswapd 0, 35
+; PC64LE-NEXT:    xxsldwi 1, 35, 35, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI22_0 at toc@ha
-; PC64LE-NEXT:    xxswapd 2, 35
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 3
 ; PC64LE-NEXT:    xxswapd 3, 34
 ; PC64LE-NEXT:    addi 3, 3, .LCPI22_0 at toc@l
 ; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
+; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    xscvspdpn 3, 3
 ; PC64LE-NEXT:    xscvspdpn 4, 4
-; PC64LE-NEXT:    xssubsp 0, 1, 0
-; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    xssubsp 2, 3, 2
-; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 2
-; PC64LE-NEXT:    xssubsp 0, 1, 4
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xscvspdpn 5, 5
+; PC64LE-NEXT:    xssubsp 1, 2, 1
+; PC64LE-NEXT:    xssubsp 0, 3, 0
+; PC64LE-NEXT:    xssubsp 2, 5, 4
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fsub_v3f32:
@@ -1003,12 +1003,12 @@ define <3 x float> @constrained_vector_fsub_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xssubsp 1, 2, 1
 ; PC64LE9-NEXT:    xxsldwi 2, 35, 35, 3
+; PC64LE9-NEXT:    xscvdpspn 35, 0
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xssubsp 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 1
-; PC64LE9-NEXT:    xscvdpspn 34, 2
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxmrghw 34, 1, 2
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    blr
 entry:
@@ -1131,11 +1131,11 @@ define <3 x float> @constrained_vector_sqrt_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xssqrtsp 0, 0
 ; PC64LE-NEXT:    xssqrtsp 1, 1
 ; PC64LE-NEXT:    xssqrtsp 2, 2
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 2
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sqrt_v3f32:
@@ -1148,14 +1148,14 @@ define <3 x float> @constrained_vector_sqrt_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xssqrtsp 1, 1
 ; PC64LE9-NEXT:    xssqrtsp 2, 2
 ; PC64LE9-NEXT:    xssqrtsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 2
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -1362,20 +1362,20 @@ define <3 x float> @constrained_vector_pow_v3f32(<3 x float> %x, <3 x float> %y)
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    bl powf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI32_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI32_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -1412,17 +1412,17 @@ define <3 x float> @constrained_vector_pow_v3f32(<3 x float> %x, <3 x float> %y)
 ; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    bl powf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI32_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI32_0 at toc@l
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 36, 0(3)
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 31
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -1793,19 +1793,19 @@ define <3 x float> @constrained_vector_powi_v3f32(<3 x float> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl __powisf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI37_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    ld 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI37_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
 ; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -1839,17 +1839,17 @@ define <3 x float> @constrained_vector_powi_v3f32(<3 x float> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl __powisf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI37_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    ld 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI37_0 at toc@l
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 36, 0(3)
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 31
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -2179,18 +2179,18 @@ define <3 x float> @constrained_vector_sin_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl sinf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI42_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI42_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2219,16 +2219,16 @@ define <3 x float> @constrained_vector_sin_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl sinf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI42_0 at toc@ha
 ; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI42_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -2529,18 +2529,18 @@ define <3 x float> @constrained_vector_cos_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl cosf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI47_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI47_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2569,16 +2569,16 @@ define <3 x float> @constrained_vector_cos_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl cosf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI47_0 at toc@ha
 ; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI47_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -2879,18 +2879,18 @@ define <3 x float> @constrained_vector_exp_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl expf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI52_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI52_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2919,16 +2919,16 @@ define <3 x float> @constrained_vector_exp_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl expf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI52_0 at toc@ha
 ; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI52_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -3229,18 +3229,18 @@ define <3 x float> @constrained_vector_exp2_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl exp2f
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI57_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI57_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3269,16 +3269,16 @@ define <3 x float> @constrained_vector_exp2_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl exp2f
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI57_0 at toc@ha
 ; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI57_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -3579,18 +3579,18 @@ define <3 x float> @constrained_vector_log_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl logf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI62_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI62_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3619,16 +3619,16 @@ define <3 x float> @constrained_vector_log_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl logf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI62_0 at toc@ha
 ; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI62_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -3929,18 +3929,18 @@ define <3 x float> @constrained_vector_log10_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl log10f
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI67_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI67_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3969,16 +3969,16 @@ define <3 x float> @constrained_vector_log10_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl log10f
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI67_0 at toc@ha
 ; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI67_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -4279,18 +4279,18 @@ define <3 x float> @constrained_vector_log2_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl log2f
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI72_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI72_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4319,16 +4319,16 @@ define <3 x float> @constrained_vector_log2_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl log2f
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI72_0 at toc@ha
 ; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI72_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -4560,12 +4560,12 @@ define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    xsrdpic 0, 0
 ; PC64LE-NEXT:    xsrdpic 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    xsrdpic 0, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xsrdpic 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_rint_v3f32:
@@ -4578,14 +4578,14 @@ define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xsrdpic 1, 1
 ; PC64LE9-NEXT:    xsrdpic 2, 2
 ; PC64LE9-NEXT:    xsrdpic 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 2
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
  entry:
@@ -4767,18 +4767,18 @@ define <3 x float> @constrained_vector_nearbyint_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl nearbyintf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI82_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI82_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4807,16 +4807,16 @@ define <3 x float> @constrained_vector_nearbyint_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl nearbyintf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI82_0 at toc@ha
 ; PC64LE9-NEXT:    xscvdpspn 36, 31
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI82_0 at toc@l
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -5081,20 +5081,20 @@ define <3 x float> @constrained_vector_maxnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    bl fmaxf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI87_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI87_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -5131,17 +5131,17 @@ define <3 x float> @constrained_vector_maxnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    bl fmaxf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI87_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI87_0 at toc@l
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 36, 0(3)
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 31
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -5322,20 +5322,20 @@ define <3 x float> @constrained_vector_minnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    bl fminf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 34, 30
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI92_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 35, 1
+; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI92_0 at toc@l
+; PC64LE-NEXT:    xscvdpspn 34, 31
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    vmrghw 2, 2, 3
-; PC64LE-NEXT:    xscvdpspn 35, 31
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xxmrghw 35, 0, 1
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -5372,17 +5372,17 @@ define <3 x float> @constrained_vector_minnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    bl fminf
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 34, 1
-; PC64LE9-NEXT:    xscvdpspn 35, 30
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 30
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI92_0 at toc@ha
+; PC64LE9-NEXT:    xscvdpspn 35, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI92_0 at toc@l
+; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 36, 0(3)
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 31
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -5510,24 +5510,24 @@ define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32(<2 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvdpsxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    mffprwz 4, 1
-; PC64LE-NEXT:    mtvsrwz 34, 3
-; PC64LE-NEXT:    mtvsrwz 35, 4
-; PC64LE-NEXT:    vmrghw 2, 3, 2
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v2i32_v2f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
+; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
+; PC64LE9-NEXT:    xscvdpsxws 1, 1
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xxswapd 0, 34
-; PC64LE9-NEXT:    mtvsrwz 35, 3
-; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    xscvdpsxws 0, 0
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrwz 34, 3
-; PC64LE9-NEXT:    vmrghw 2, 2, 3
+; PC64LE9-NEXT:    mtfprwz 0, 3
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtfprwz 1, 3
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(
@@ -5544,6 +5544,7 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI97_0 at toc@ha
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI97_0 at toc@l
+; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
@@ -5552,34 +5553,33 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvdpsxws 2, 2
 ; PC64LE-NEXT:    mffprwz 4, 0
 ; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtvsrwz 34, 4
-; PC64LE-NEXT:    mtvsrwz 35, 5
+; PC64LE-NEXT:    mtfprwz 0, 4
+; PC64LE-NEXT:    mtfprwz 1, 5
 ; PC64LE-NEXT:    mffprwz 4, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE-NEXT:    mtvsrwz 36, 4
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    vperm 2, 4, 3, 2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
+; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvdpsxws 0, 0
+; PC64LE9-NEXT:    xscvdpsxws 1, 1
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xxswapd 0, 34
-; PC64LE9-NEXT:    mtvsrwz 35, 3
-; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    xscvdpsxws 0, 0
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
-; PC64LE9-NEXT:    mtvsrwz 36, 3
+; PC64LE9-NEXT:    mtfprwz 0, 3
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI97_0 at toc@ha
-; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI97_0 at toc@l
-; PC64LE9-NEXT:    vmrghw 3, 4, 3
-; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    lxv 36, 0(3)
+; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xscvdpsxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    mtvsrwz 34, 3
 ; PC64LE9-NEXT:    vperm 2, 2, 3, 4
@@ -5782,22 +5782,22 @@ define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    xscvdpsxws 1, 34
 ; PC64LE-NEXT:    xscvdpsxws 0, 0
 ; PC64LE-NEXT:    mffprwz 3, 1
-; PC64LE-NEXT:    mtvsrwz 34, 3
 ; PC64LE-NEXT:    mffprwz 4, 0
-; PC64LE-NEXT:    mtvsrwz 35, 4
-; PC64LE-NEXT:    vmrghw 2, 2, 3
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v2i32_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xscvdpsxws 0, 34
+; PC64LE9-NEXT:    xscvdpsxws 1, 1
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xxswapd 0, 34
-; PC64LE9-NEXT:    mtvsrwz 35, 3
-; PC64LE9-NEXT:    xscvdpsxws 0, 0
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrwz 34, 3
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
+; PC64LE9-NEXT:    mtfprwz 0, 3
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtfprwz 1, 3
+; PC64LE9-NEXT:    xxmrghw 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(
@@ -5814,29 +5814,29 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xscvdpsxws 1, 2
 ; PC64LE-NEXT:    addi 3, 3, .LCPI105_0 at toc@l
 ; PC64LE-NEXT:    xscvdpsxws 2, 3
+; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    mffprwz 4, 0
 ; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtvsrwz 34, 4
-; PC64LE-NEXT:    mtvsrwz 35, 5
+; PC64LE-NEXT:    mtfprwz 0, 4
+; PC64LE-NEXT:    mtfprwz 1, 5
 ; PC64LE-NEXT:    mffprwz 4, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE-NEXT:    mtvsrwz 36, 4
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    vperm 2, 4, 3, 2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    xscvdpsxws 0, 1
+; PC64LE9-NEXT:    xscvdpsxws 1, 2
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xscvdpsxws 0, 2
-; PC64LE9-NEXT:    mtvsrwz 34, 3
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xscvdpsxws 0, 3
-; PC64LE9-NEXT:    mtvsrwz 35, 3
+; PC64LE9-NEXT:    mtfprwz 0, 3
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI105_0 at toc@ha
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
+; PC64LE9-NEXT:    xscvdpsxws 0, 3
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI105_0 at toc@l
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    mtvsrwz 36, 3
@@ -6004,24 +6004,24 @@ define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32(<2 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvdpuxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    mffprwz 4, 1
-; PC64LE-NEXT:    mtvsrwz 34, 3
-; PC64LE-NEXT:    mtvsrwz 35, 4
-; PC64LE-NEXT:    vmrghw 2, 3, 2
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v2i32_v2f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
+; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
+; PC64LE9-NEXT:    xscvdpuxws 1, 1
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xxswapd 0, 34
-; PC64LE9-NEXT:    mtvsrwz 35, 3
-; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    xscvdpuxws 0, 0
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrwz 34, 3
-; PC64LE9-NEXT:    vmrghw 2, 2, 3
+; PC64LE9-NEXT:    mtfprwz 0, 3
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtfprwz 1, 3
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(
@@ -6038,6 +6038,7 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI113_0 at toc@ha
 ; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI113_0 at toc@l
+; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
 ; PC64LE-NEXT:    xscvspdpn 2, 2
@@ -6046,34 +6047,33 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvdpuxws 2, 2
 ; PC64LE-NEXT:    mffprwz 4, 0
 ; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtvsrwz 34, 4
-; PC64LE-NEXT:    mtvsrwz 35, 5
+; PC64LE-NEXT:    mtfprwz 0, 4
+; PC64LE-NEXT:    mtfprwz 1, 5
 ; PC64LE-NEXT:    mffprwz 4, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE-NEXT:    mtvsrwz 36, 4
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    vperm 2, 4, 3, 2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 3
+; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvdpuxws 0, 0
+; PC64LE9-NEXT:    xscvdpuxws 1, 1
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xxswapd 0, 34
-; PC64LE9-NEXT:    mtvsrwz 35, 3
-; PC64LE9-NEXT:    xscvspdpn 0, 0
-; PC64LE9-NEXT:    xscvdpuxws 0, 0
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
-; PC64LE9-NEXT:    mtvsrwz 36, 3
+; PC64LE9-NEXT:    mtfprwz 0, 3
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI113_0 at toc@ha
-; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
+; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI113_0 at toc@l
-; PC64LE9-NEXT:    vmrghw 3, 4, 3
-; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    lxv 36, 0(3)
+; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    xscvdpuxws 0, 0
 ; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    mtvsrwz 34, 3
 ; PC64LE9-NEXT:    vperm 2, 2, 3, 4
@@ -6275,22 +6275,22 @@ define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    xscvdpuxws 1, 34
 ; PC64LE-NEXT:    xscvdpuxws 0, 0
 ; PC64LE-NEXT:    mffprwz 3, 1
-; PC64LE-NEXT:    mtvsrwz 34, 3
 ; PC64LE-NEXT:    mffprwz 4, 0
-; PC64LE-NEXT:    mtvsrwz 35, 4
-; PC64LE-NEXT:    vmrghw 2, 2, 3
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v2i32_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xscvdpuxws 0, 34
+; PC64LE9-NEXT:    xscvdpuxws 1, 1
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xxswapd 0, 34
-; PC64LE9-NEXT:    mtvsrwz 35, 3
-; PC64LE9-NEXT:    xscvdpuxws 0, 0
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    mtvsrwz 34, 3
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
+; PC64LE9-NEXT:    mtfprwz 0, 3
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtfprwz 1, 3
+; PC64LE9-NEXT:    xxmrghw 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(
@@ -6307,29 +6307,29 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xscvdpuxws 1, 2
 ; PC64LE-NEXT:    addi 3, 3, .LCPI121_0 at toc@l
 ; PC64LE-NEXT:    xscvdpuxws 2, 3
+; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    mffprwz 4, 0
 ; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtvsrwz 34, 4
-; PC64LE-NEXT:    mtvsrwz 35, 5
+; PC64LE-NEXT:    mtfprwz 0, 4
+; PC64LE-NEXT:    mtfprwz 1, 5
 ; PC64LE-NEXT:    mffprwz 4, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE-NEXT:    mtvsrwz 36, 4
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    vperm 2, 4, 3, 2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    xscvdpuxws 0, 1
+; PC64LE9-NEXT:    xscvdpuxws 1, 2
 ; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xscvdpuxws 0, 2
-; PC64LE9-NEXT:    mtvsrwz 34, 3
-; PC64LE9-NEXT:    mffprwz 3, 0
-; PC64LE9-NEXT:    xscvdpuxws 0, 3
-; PC64LE9-NEXT:    mtvsrwz 35, 3
+; PC64LE9-NEXT:    mtfprwz 0, 3
+; PC64LE9-NEXT:    mffprwz 3, 1
+; PC64LE9-NEXT:    mtfprwz 1, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI121_0 at toc@ha
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
+; PC64LE9-NEXT:    xscvdpuxws 0, 3
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI121_0 at toc@l
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    mtvsrwz 36, 3
@@ -6491,19 +6491,19 @@ define <2 x float> @constrained_vector_fptrunc_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xsrsp 1, 34
 ; PC64LE-NEXT:    xsrsp 0, 0
-; PC64LE-NEXT:    xscvdpspn 34, 1
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vmrghw 2, 2, 3
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptrunc_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    xxswapd 1, 34
 ; PC64LE9-NEXT:    xsrsp 0, 34
-; PC64LE9-NEXT:    xscvdpspn 35, 0
-; PC64LE9-NEXT:    xxswapd 0, 34
-; PC64LE9-NEXT:    xsrsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 0
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
+; PC64LE9-NEXT:    xsrsp 1, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxmrghw 34, 0, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
@@ -6521,26 +6521,26 @@ define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xsrsp 1, 2
 ; PC64LE-NEXT:    addi 3, 3, .LCPI129_0 at toc@l
 ; PC64LE-NEXT:    xsrsp 2, 3
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    xscvdpspn 36, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    xsrsp 0, 1
+; PC64LE9-NEXT:    xsrsp 1, 2
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI129_0 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI129_0 at toc@l
-; PC64LE9-NEXT:    xscvdpspn 34, 0
-; PC64LE9-NEXT:    xsrsp 0, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    xsrsp 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6726,12 +6726,12 @@ define <3 x float> @constrained_vector_ceil_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    xsrdpip 0, 0
 ; PC64LE-NEXT:    xsrdpip 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    xsrdpip 0, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xsrdpip 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_ceil_v3f32:
@@ -6744,14 +6744,14 @@ define <3 x float> @constrained_vector_ceil_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xsrdpip 1, 1
 ; PC64LE9-NEXT:    xsrdpip 2, 2
 ; PC64LE9-NEXT:    xsrdpip 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 2
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6841,12 +6841,12 @@ define <3 x float> @constrained_vector_floor_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    xsrdpim 0, 0
 ; PC64LE-NEXT:    xsrdpim 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    xsrdpim 0, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xsrdpim 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_floor_v3f32:
@@ -6859,14 +6859,14 @@ define <3 x float> @constrained_vector_floor_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xsrdpim 1, 1
 ; PC64LE9-NEXT:    xsrdpim 2, 2
 ; PC64LE9-NEXT:    xsrdpim 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 2
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -6955,12 +6955,12 @@ define <3 x float> @constrained_vector_round_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    xsrdpi 0, 0
 ; PC64LE-NEXT:    xsrdpi 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    xsrdpi 0, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xsrdpi 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_round_v3f32:
@@ -6973,14 +6973,14 @@ define <3 x float> @constrained_vector_round_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xsrdpi 1, 1
 ; PC64LE9-NEXT:    xsrdpi 2, 2
 ; PC64LE9-NEXT:    xsrdpi 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 2
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -7070,12 +7070,12 @@ define <3 x float> @constrained_vector_trunc_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    xsrdpiz 0, 0
 ; PC64LE-NEXT:    xsrdpiz 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    xsrdpiz 0, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xsrdpiz 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_trunc_v3f32:
@@ -7088,14 +7088,14 @@ define <3 x float> @constrained_vector_trunc_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-NEXT:    xscvspdpn 1, 1
 ; PC64LE9-NEXT:    xscvspdpn 2, 2
 ; PC64LE9-NEXT:    xscvspdpn 0, 0
+; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xsrdpiz 1, 1
 ; PC64LE9-NEXT:    xsrdpiz 2, 2
 ; PC64LE9-NEXT:    xsrdpiz 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xscvdpspn 1, 1
 ; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xxmrghw 34, 1, 2
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
 entry:
@@ -7294,9 +7294,9 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
 ; PC64LE-NEXT:    mtfprwa 1, 4
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    vmrghw 2, 3, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v2f32_v2i32:
@@ -7305,13 +7305,13 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
 ; PC64LE9-NEXT:    mtfprwa 0, 3
 ; PC64LE9-NEXT:    li 3, 4
-; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 0
-; PC64LE9-NEXT:    mtfprwa 0, 3
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 0
-; PC64LE9-NEXT:    vmrghw 2, 2, 3
+; PC64LE9-NEXT:    mtfprwa 1, 3
+; PC64LE9-NEXT:    xscvsxdsp 1, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <2 x float>
@@ -7349,9 +7349,9 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; PC64LE-NEXT:    mtfprd 1, 4
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    vmrghw 2, 2, 3
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v2f32_v2i64:
@@ -7359,12 +7359,12 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; PC64LE9-NEXT:    mfvsrld 3, 34
 ; PC64LE9-NEXT:    mtfprd 0, 3
 ; PC64LE9-NEXT:    mfvsrd 3, 34
+; PC64LE9-NEXT:    mtfprd 1, 3
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 35, 0
-; PC64LE9-NEXT:    mtfprd 0, 3
-; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 0
-; PC64LE9-NEXT:    vmrghw 2, 2, 3
+; PC64LE9-NEXT:    xscvsxdsp 1, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <2 x float>
@@ -7417,23 +7417,23 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mffprwz 4, 1
-; PC64LE-NEXT:    mtfprwa 0, 3
-; PC64LE-NEXT:    mtfprwa 1, 4
-; PC64LE-NEXT:    xscvsxdsp 0, 0
-; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    mfvsrwz 3, 34
-; PC64LE-NEXT:    mtfprwa 2, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI161_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI161_0 at toc@l
-; PC64LE-NEXT:    xscvdpspn 34, 0
 ; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    xscvsxdsp 0, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    mffprwz 5, 1
+; PC64LE-NEXT:    mtfprwa 0, 4
+; PC64LE-NEXT:    mtfprwa 1, 5
+; PC64LE-NEXT:    mfvsrwz 4, 34
+; PC64LE-NEXT:    xscvsxdsp 0, 0
+; PC64LE-NEXT:    xscvsxdsp 1, 1
+; PC64LE-NEXT:    mtfprwa 2, 4
+; PC64LE-NEXT:    xscvsxdsp 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i32:
@@ -7442,17 +7442,17 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
 ; PC64LE9-NEXT:    mtfprwa 0, 3
 ; PC64LE9-NEXT:    li 3, 4
-; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 0
-; PC64LE9-NEXT:    mtfprwa 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI161_0 at toc@ha
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
+; PC64LE9-NEXT:    mtfprwa 1, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI161_0 at toc@ha
+; PC64LE9-NEXT:    xscvsxdsp 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI161_0 at toc@l
-; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vmrghw 3, 4, 3
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    lxv 36, 0(3)
 ; PC64LE9-NEXT:    mfvsrwz 3, 34
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    mtfprwa 0, 3
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 34, 0
@@ -7498,35 +7498,35 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    addis 3, 2, .LCPI163_0 at toc@ha
+; PC64LE-NEXT:    addis 6, 2, .LCPI163_0 at toc@ha
 ; PC64LE-NEXT:    mtfprd 1, 4
-; PC64LE-NEXT:    addi 3, 3, .LCPI163_0 at toc@l
+; PC64LE-NEXT:    addi 3, 6, .LCPI163_0 at toc@l
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
 ; PC64LE-NEXT:    mtfprd 2, 5
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    xscvsxdsp 0, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xscvsxdsp 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mtfprd 0, 3
+; PC64LE9-NEXT:    mtfprd 1, 4
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI163_0 at toc@ha
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
+; PC64LE9-NEXT:    xscvsxdsp 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI163_0 at toc@l
-; PC64LE9-NEXT:    xscvdpspn 34, 0
-; PC64LE9-NEXT:    mtfprd 0, 4
-; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    mtfprd 0, 5
 ; PC64LE9-NEXT:    xscvsxdsp 0, 0
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xscvdpspn 36, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr
@@ -7857,9 +7857,9 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
 ; PC64LE-NEXT:    mtfprwz 1, 4
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    vmrghw 2, 3, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v2f32_v2i32:
@@ -7868,13 +7868,13 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
 ; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    li 3, 4
-; PC64LE9-NEXT:    xscvuxdsp 0, 0
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 0
-; PC64LE9-NEXT:    vmrghw 2, 2, 3
+; PC64LE9-NEXT:    mtfprwz 1, 3
+; PC64LE9-NEXT:    xscvuxdsp 1, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <2 x float>
@@ -7912,9 +7912,9 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; PC64LE-NEXT:    mtfprd 1, 4
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    vmrghw 2, 2, 3
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v2f32_v2i64:
@@ -7922,12 +7922,12 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; PC64LE9-NEXT:    mfvsrld 3, 34
 ; PC64LE9-NEXT:    mtfprd 0, 3
 ; PC64LE9-NEXT:    mfvsrd 3, 34
+; PC64LE9-NEXT:    mtfprd 1, 3
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 35, 0
-; PC64LE9-NEXT:    mtfprd 0, 3
-; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 34, 0
-; PC64LE9-NEXT:    vmrghw 2, 2, 3
+; PC64LE9-NEXT:    xscvuxdsp 1, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    blr
 entry:
   %result = call <2 x float>
@@ -7980,23 +7980,23 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mffprwz 4, 1
-; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mtfprwz 1, 4
-; PC64LE-NEXT:    xscvuxdsp 0, 0
-; PC64LE-NEXT:    xscvuxdsp 1, 1
-; PC64LE-NEXT:    mfvsrwz 3, 34
-; PC64LE-NEXT:    mtfprwz 2, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI179_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI179_0 at toc@l
-; PC64LE-NEXT:    xscvdpspn 34, 0
 ; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    xscvuxdsp 0, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    mffprwz 5, 1
+; PC64LE-NEXT:    mtfprwz 0, 4
+; PC64LE-NEXT:    mtfprwz 1, 5
+; PC64LE-NEXT:    mfvsrwz 4, 34
+; PC64LE-NEXT:    xscvuxdsp 0, 0
+; PC64LE-NEXT:    xscvuxdsp 1, 1
+; PC64LE-NEXT:    mtfprwz 2, 4
+; PC64LE-NEXT:    xscvuxdsp 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v3f32_v3i32:
@@ -8005,17 +8005,17 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
 ; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    li 3, 4
-; PC64LE9-NEXT:    xscvuxdsp 0, 0
 ; PC64LE9-NEXT:    vextuwrx 3, 3, 2
-; PC64LE9-NEXT:    xscvdpspn 35, 0
-; PC64LE9-NEXT:    mtfprwz 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI179_0 at toc@ha
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
+; PC64LE9-NEXT:    mtfprwz 1, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI179_0 at toc@ha
+; PC64LE9-NEXT:    xscvuxdsp 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI179_0 at toc@l
-; PC64LE9-NEXT:    xscvdpspn 36, 0
-; PC64LE9-NEXT:    vmrghw 3, 4, 3
+; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    lxv 36, 0(3)
 ; PC64LE9-NEXT:    mfvsrwz 3, 34
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxmrghw 35, 1, 0
 ; PC64LE9-NEXT:    mtfprwz 0, 3
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
 ; PC64LE9-NEXT:    xscvdpspn 34, 0
@@ -8061,35 +8061,35 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_uitofp_v3f32_v3i64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    addis 3, 2, .LCPI181_0 at toc@ha
+; PC64LE-NEXT:    addis 6, 2, .LCPI181_0 at toc@ha
 ; PC64LE-NEXT:    mtfprd 1, 4
-; PC64LE-NEXT:    addi 3, 3, .LCPI181_0 at toc@l
+; PC64LE-NEXT:    addi 3, 6, .LCPI181_0 at toc@l
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    lvx 4, 0, 3
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
 ; PC64LE-NEXT:    mtfprd 2, 5
-; PC64LE-NEXT:    xscvdpspn 34, 0
-; PC64LE-NEXT:    xscvdpspn 35, 1
-; PC64LE-NEXT:    xscvuxdsp 0, 2
-; PC64LE-NEXT:    vmrghw 2, 3, 2
-; PC64LE-NEXT:    xscvdpspn 35, 0
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    xscvuxdsp 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 34, 2
+; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v3f32_v3i64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mtfprd 0, 3
+; PC64LE9-NEXT:    mtfprd 1, 4
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI181_0 at toc@ha
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
+; PC64LE9-NEXT:    xscvuxdsp 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI181_0 at toc@l
-; PC64LE9-NEXT:    xscvdpspn 34, 0
-; PC64LE9-NEXT:    mtfprd 0, 4
-; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 35, 0
+; PC64LE9-NEXT:    lxv 35, 0(3)
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE9-NEXT:    mtfprd 0, 5
 ; PC64LE9-NEXT:    xscvuxdsp 0, 0
-; PC64LE9-NEXT:    vmrghw 2, 3, 2
-; PC64LE9-NEXT:    lxv 35, 0(3)
 ; PC64LE9-NEXT:    xscvdpspn 36, 0
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE9-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 10dbd4d363a62..603a1bb611b31 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -1316,9 +1316,9 @@ define <2 x float> @test44(<2 x i64> %a) {
 ; CHECK-NEXT:    lfd f0, -32(r1)
 ; CHECK-NEXT:    fcfidus f0, f0
 ; CHECK-NEXT:    stfs f0, -64(r1)
-; CHECK-NEXT:    lxvw4x v2, 0, r3
-; CHECK-NEXT:    lxvw4x v3, 0, r4
-; CHECK-NEXT:    vmrghw v2, v3, v2
+; CHECK-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-NEXT:    lxvw4x vs1, 0, r4
+; CHECK-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test44:
@@ -1337,9 +1337,9 @@ define <2 x float> @test44(<2 x i64> %a) {
 ; CHECK-REG-NEXT:    lfd f0, -32(r1)
 ; CHECK-REG-NEXT:    fcfidus f0, f0
 ; CHECK-REG-NEXT:    stfs f0, -64(r1)
-; CHECK-REG-NEXT:    lxvw4x v2, 0, r3
-; CHECK-REG-NEXT:    lxvw4x v3, 0, r4
-; CHECK-REG-NEXT:    vmrghw v2, v3, v2
+; CHECK-REG-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-REG-NEXT:    lxvw4x vs1, 0, r4
+; CHECK-REG-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test44:
@@ -1357,10 +1357,10 @@ define <2 x float> @test44(<2 x i64> %a) {
 ; CHECK-FISL-NEXT:    fcfidus f0, f0
 ; CHECK-FISL-NEXT:    stfs f0, -64(r1)
 ; CHECK-FISL-NEXT:    addi r3, r1, -48
-; CHECK-FISL-NEXT:    lxvw4x v3, 0, r3
+; CHECK-FISL-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-FISL-NEXT:    addi r3, r1, -64
-; CHECK-FISL-NEXT:    lxvw4x v2, 0, r3
-; CHECK-FISL-NEXT:    vmrghw v2, v2, v3
+; CHECK-FISL-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-FISL-NEXT:    xxmrghw v2, vs0, vs1
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test44:
@@ -1368,9 +1368,9 @@ define <2 x float> @test44(<2 x i64> %a) {
 ; CHECK-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-NEXT:    xscvuxdsp f1, v2
 ; CHECK-LE-NEXT:    xscvuxdsp f0, f0
-; CHECK-LE-NEXT:    xscvdpspn v3, f1
-; CHECK-LE-NEXT:    xscvdpspn v2, f0
-; CHECK-LE-NEXT:    vmrghw v2, v3, v2
+; CHECK-LE-NEXT:    xscvdpspn vs1, f1
+; CHECK-LE-NEXT:    xscvdpspn vs0, f0
+; CHECK-LE-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-LE-NEXT:    blr
   %v = uitofp <2 x i64> %a to <2 x float>
   ret <2 x float> %v
@@ -1395,9 +1395,9 @@ define <2 x float> @test45(<2 x i64> %a) {
 ; CHECK-NEXT:    lfd f0, -32(r1)
 ; CHECK-NEXT:    fcfids f0, f0
 ; CHECK-NEXT:    stfs f0, -64(r1)
-; CHECK-NEXT:    lxvw4x v2, 0, r3
-; CHECK-NEXT:    lxvw4x v3, 0, r4
-; CHECK-NEXT:    vmrghw v2, v3, v2
+; CHECK-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-NEXT:    lxvw4x vs1, 0, r4
+; CHECK-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test45:
@@ -1416,9 +1416,9 @@ define <2 x float> @test45(<2 x i64> %a) {
 ; CHECK-REG-NEXT:    lfd f0, -32(r1)
 ; CHECK-REG-NEXT:    fcfids f0, f0
 ; CHECK-REG-NEXT:    stfs f0, -64(r1)
-; CHECK-REG-NEXT:    lxvw4x v2, 0, r3
-; CHECK-REG-NEXT:    lxvw4x v3, 0, r4
-; CHECK-REG-NEXT:    vmrghw v2, v3, v2
+; CHECK-REG-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-REG-NEXT:    lxvw4x vs1, 0, r4
+; CHECK-REG-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test45:
@@ -1436,10 +1436,10 @@ define <2 x float> @test45(<2 x i64> %a) {
 ; CHECK-FISL-NEXT:    fcfids f0, f0
 ; CHECK-FISL-NEXT:    stfs f0, -64(r1)
 ; CHECK-FISL-NEXT:    addi r3, r1, -48
-; CHECK-FISL-NEXT:    lxvw4x v3, 0, r3
+; CHECK-FISL-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-FISL-NEXT:    addi r3, r1, -64
-; CHECK-FISL-NEXT:    lxvw4x v2, 0, r3
-; CHECK-FISL-NEXT:    vmrghw v2, v2, v3
+; CHECK-FISL-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-FISL-NEXT:    xxmrghw v2, vs0, vs1
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test45:
@@ -1447,9 +1447,9 @@ define <2 x float> @test45(<2 x i64> %a) {
 ; CHECK-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-NEXT:    xscvsxdsp f1, v2
 ; CHECK-LE-NEXT:    xscvsxdsp f0, f0
-; CHECK-LE-NEXT:    xscvdpspn v3, f1
-; CHECK-LE-NEXT:    xscvdpspn v2, f0
-; CHECK-LE-NEXT:    vmrghw v2, v3, v2
+; CHECK-LE-NEXT:    xscvdpspn vs1, f1
+; CHECK-LE-NEXT:    xscvdpspn vs0, f0
+; CHECK-LE-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-LE-NEXT:    blr
   %v = sitofp <2 x i64> %a to <2 x float>
   ret <2 x float> %v


        


More information about the llvm-commits mailing list