[llvm] r350285 - [Power9] Enable the Out-of-Order scheduling model for P9 hw

QingShan Zhang via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 2 21:04:18 PST 2019


Author: qshanz
Date: Wed Jan  2 21:04:18 2019
New Revision: 350285

URL: http://llvm.org/viewvc/llvm-project?rev=350285&view=rev
Log:
[Power9] Enable the Out-of-Order scheduling model for P9 hw

When switched to the MI scheduler for P9, the hardware is modeled as out of order.
However, inside the MI Scheduler algorithm, we still use the in-order scheduling model
as the MicroOpBufferSize isn't set. The MI scheduler take it as the hw cannot buffer
the op. So, only when all the available instructions issued, the pending instruction
could be scheduled. That is not true for our P9 hw in fact.

This patch is trying to enable the Out-of-Order scheduling model. The buffer size 44 is
picked from the P9 hw spec, and the perf test indicate that, its value won't hurt the cpu2017.

With this patch, there are 3 specs improved over 3% and 1 spec deg over 3%. The detail is as follows:

x264_r: +6.95%
cactuBSSN_r: +6.94%
lbm_r: +4.11%
xz_r: -3.85%

And the GEOMEAN for all the C/C++ spec in spec2017 is about 0.18% improved. 

Reviewer: Nemanjai
Differential Revision: https://reviews.llvm.org/D55810

Modified:
    llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td
    llvm/trunk/test/CodeGen/PowerPC/PR33671.ll
    llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
    llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll
    llvm/trunk/test/CodeGen/PowerPC/builtins-ppc-p9-f128.ll
    llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll
    llvm/trunk/test/CodeGen/PowerPC/f128-arith.ll
    llvm/trunk/test/CodeGen/PowerPC/f128-conv.ll
    llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll
    llvm/trunk/test/CodeGen/PowerPC/f128-truncateNconv.ll
    llvm/trunk/test/CodeGen/PowerPC/mi-scheduling-lhs.ll
    llvm/trunk/test/CodeGen/PowerPC/mulld.ll
    llvm/trunk/test/CodeGen/PowerPC/pre-inc-disable.ll
    llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll
    llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll
    llvm/trunk/test/CodeGen/PowerPC/store_fptoi.ll
    llvm/trunk/test/CodeGen/PowerPC/stwu-sched.ll
    llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll
    llvm/trunk/test/CodeGen/PowerPC/vec-itofp.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx-p9.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx-spill.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td Wed Jan  2 21:04:18 2019
@@ -33,6 +33,12 @@ def P9Model : SchedMachineModel {
   // A dispatch group is 6 instructions.
   let LoopMicroOpBufferSize = 60;
 
+  // As iops are dispatched to a slice, they are held in an independent slice
+  // issue queue until all register sources and other dependencies have been
+  // resolved and they can be issued. Each of four execution slices has an
+  // 11-entry iop issue queue.
+  let MicroOpBufferSize = 44;
+
   let CompleteModel = 1;
 
   // Do not support QPX (Quad Processing eXtension) or SPE (Signal Procesing

Modified: llvm/trunk/test/CodeGen/PowerPC/PR33671.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/PR33671.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/PR33671.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/PR33671.ll Wed Jan  2 21:04:18 2019
@@ -27,6 +27,6 @@ entry:
 ; CHECK-LABEL: test2
 ; CHECK: addi 3, 3, 8
 ; CHECK: lxvx [[LD:[0-9]+]], 0, 3
-; CHECK: addi 3, 4, 4
-; CHECK: stxvx [[LD]], 0, 3
+; CHECK: addi [[REG:[0-9]+]], 4, 4
+; CHECK: stxvx [[LD]], 0, [[REG]] 
 }

Modified: llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll Wed Jan  2 21:04:18 2019
@@ -33,12 +33,12 @@ define void @testExpandPostRAPseudo(i32*
 ; CHECK-P9:    xxpermdi vs0, f0, f0, 2
 ; CHECK-P9:    xxspltw vs0, vs0, 3
 ; CHECK-P9:    stxvx vs0, 0, r4
-; CHECK-P9:    lis r4, 1024
 ; CHECK-P9:    lfiwax f0, 0, r3
 ; CHECK-P9:    addis r3, r2, .LC1 at toc@ha
 ; CHECK-P9:    ld r3, .LC1 at toc@l(r3)
 ; CHECK-P9:    xscvsxdsp f0, f0
 ; CHECK-P9:    ld r3, 0(r3)
+; CHECK-P9:    lis r4, 1024
 ; CHECK-P9:    stfsx f0, r3, r4
 ; CHECK-P9:    blr
 entry:

Modified: llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll Wed Jan  2 21:04:18 2019
@@ -1244,15 +1244,15 @@ entry:
 ; P9LE-LABEL: fromRegsConvftoi
 ; P8BE-LABEL: fromRegsConvftoi
 ; P8LE-LABEL: fromRegsConvftoi
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
+; P9BE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
+; P9BE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
+; P9LE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
+; P9LE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
@@ -1516,15 +1516,15 @@ entry:
 ; P9LE-LABEL: fromRegsConvdtoi
 ; P8BE-LABEL: fromRegsConvdtoi
 ; P8LE-LABEL: fromRegsConvdtoi
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
+; P9BE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
+; P9BE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
+; P9LE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
+; P9LE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
@@ -1642,8 +1642,8 @@ entry:
 ; P9LE: lfd
 ; P9LE: lfd
 ; P9LE: xxmrghd
-; P9LE: xxmrghd
 ; P9LE: xvcvdpsxws
+; P9LE: xxmrghd
 ; P9LE: xvcvdpsxws
 ; P9LE: vmrgew v2
 ; P8BE: lfdx
@@ -1711,8 +1711,8 @@ entry:
 ; P9LE: lfd
 ; P9LE: lfd
 ; P9LE: xxmrghd
-; P9LE: xxmrghd
 ; P9LE: xvcvdpsxws
+; P9LE: xxmrghd
 ; P9LE: xvcvdpsxws
 ; P9LE: vmrgew v2
 ; P8BE: lfdux
@@ -1780,8 +1780,8 @@ entry:
 ; P9LE: lfd
 ; P9LE: lfd
 ; P9LE: xxmrghd
-; P9LE: xxmrghd
 ; P9LE: xvcvdpsxws
+; P9LE: xxmrghd
 ; P9LE: xvcvdpsxws
 ; P9LE: vmrgew v2
 ; P8BE: lfdux
@@ -2376,15 +2376,15 @@ entry:
 ; P9LE-LABEL: fromRegsConvftoui
 ; P8BE-LABEL: fromRegsConvftoui
 ; P8LE-LABEL: fromRegsConvftoui
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
+; P9BE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
+; P9BE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
+; P9LE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
+; P9LE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
@@ -2648,15 +2648,15 @@ entry:
 ; P9LE-LABEL: fromRegsConvdtoui
 ; P8BE-LABEL: fromRegsConvdtoui
 ; P8LE-LABEL: fromRegsConvdtoui
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
-; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
+; P9BE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
+; P9BE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
-; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
+; P9LE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
+; P9LE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
@@ -2774,8 +2774,8 @@ entry:
 ; P9LE: lfd
 ; P9LE: lfd
 ; P9LE: xxmrghd
-; P9LE: xxmrghd
 ; P9LE: xvcvdpuxws
+; P9LE: xxmrghd
 ; P9LE: xvcvdpuxws
 ; P9LE: vmrgew v2
 ; P8BE: lfdx
@@ -2843,8 +2843,8 @@ entry:
 ; P9LE: lfd
 ; P9LE: lfd
 ; P9LE: xxmrghd
-; P9LE: xxmrghd
 ; P9LE: xvcvdpuxws
+; P9LE: xxmrghd
 ; P9LE: xvcvdpuxws
 ; P9LE: vmrgew v2
 ; P8BE: lfdux
@@ -2912,8 +2912,8 @@ entry:
 ; P9LE: lfd
 ; P9LE: lfd
 ; P9LE: xxmrghd
-; P9LE: xxmrghd
 ; P9LE: xvcvdpuxws
+; P9LE: xxmrghd
 ; P9LE: xvcvdpuxws
 ; P9LE: vmrgew v2
 ; P8BE: lfdux

Modified: llvm/trunk/test/CodeGen/PowerPC/builtins-ppc-p9-f128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/builtins-ppc-p9-f128.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/builtins-ppc-p9-f128.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/builtins-ppc-p9-f128.ll Wed Jan  2 21:04:18 2019
@@ -112,8 +112,8 @@ entry:
   %2 = call fp128 @llvm.ppc.scalar.insert.exp.qp(fp128 %0, i64 %1)
   ret fp128 %2
 ; CHECK-LABEL: insert_exp_qp
-; CHECK: mtvsrd [[FPREG:f[0-9]+]], r3
-; CHECK: lxvx [[VECREG:v[0-9]+]]
+; CHECK-DAG: mtvsrd [[FPREG:f[0-9]+]], r3
+; CHECK-DAG: lxvx [[VECREG:v[0-9]+]]
 ; CHECK: xsiexpqp v2, [[VECREG]], [[FPREG]]
 ; CHECK: blr
 }

Modified: llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll Wed Jan  2 21:04:18 2019
@@ -82,7 +82,6 @@ define fp128 @testStruct_03(%struct.With
                             align 16 %a) {
 ; CHECK-LABEL: testStruct_03:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lxv v2, 128(r1)
 ; CHECK-NEXT:    std r10, 88(r1)
 ; CHECK-NEXT:    std r9, 80(r1)
 ; CHECK-NEXT:    std r8, 72(r1)
@@ -91,11 +90,11 @@ define fp128 @testStruct_03(%struct.With
 ; CHECK-NEXT:    std r5, 48(r1)
 ; CHECK-NEXT:    std r4, 40(r1)
 ; CHECK-NEXT:    std r3, 32(r1)
+; CHECK-NEXT:    lxv v2, 128(r1)
 ; CHECK-NEXT:    blr
 
 ; CHECK-BE-LABEL: testStruct_03:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 144(r1)
 ; CHECK-BE-NEXT:    std r10, 104(r1)
 ; CHECK-BE-NEXT:    std r9, 96(r1)
 ; CHECK-BE-NEXT:    std r8, 88(r1)
@@ -104,6 +103,7 @@ define fp128 @testStruct_03(%struct.With
 ; CHECK-BE-NEXT:    std r5, 64(r1)
 ; CHECK-BE-NEXT:    std r4, 56(r1)
 ; CHECK-BE-NEXT:    std r3, 48(r1)
+; CHECK-BE-NEXT:    lxv v2, 144(r1)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a7 = getelementptr inbounds %struct.With9fp128params,
@@ -228,12 +228,12 @@ entry:
 define fp128 @testMixedAggregate_03([4 x i128] %sa.coerce) {
 ; CHECK-LABEL: testMixedAggregate_03:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-DAG:     mtvsrwa v2, r3
-; CHECK-DAG:     mtvsrdd v3, r6, r5
-; CHECK:         mtvsrd v4, r10
+; CHECK:         mtvsrwa v2, r3
 ; CHECK:         xscvsdqp v2, v2
-; CHECK-DAG:     xscvsdqp v[[REG:[0-9]+]], v4
-; CHECK-DAG:     xsaddqp v2, v3, v2
+; CHECK:         mtvsrdd v3, r6, r5
+; CHECK:         xsaddqp v2, v3, v2
+; CHECK:         mtvsrd v[[REG1:[0-9]+]], r10
+; CHECK:         xscvsdqp v[[REG:[0-9]+]], v[[REG1]]
 ; CHECK:         xsaddqp v2, v2, v[[REG]]
 ; CHECK-NEXT:    blr
 entry:
@@ -260,11 +260,11 @@ define fp128 @testNestedAggregate(%struc
 ; CHECK-NEXT:    std r7, 64(r1)
 ; CHECK-NEXT:    std r10, 88(r1)
 ; CHECK-NEXT:    std r9, 80(r1)
-; CHECK-NEXT:    lxv v2, 64(r1)
 ; CHECK-NEXT:    std r6, 56(r1)
 ; CHECK-NEXT:    std r5, 48(r1)
 ; CHECK-NEXT:    std r4, 40(r1)
 ; CHECK-NEXT:    std r3, 32(r1)
+; CHECK-NEXT:    lxv v2, 64(r1)
 ; CHECK-NEXT:    blr
 
 ; CHECK-BE-LABEL: testNestedAggregate:
@@ -273,11 +273,11 @@ define fp128 @testNestedAggregate(%struc
 ; CHECK-BE-NEXT:    std r7, 80(r1)
 ; CHECK-BE-NEXT:    std r10, 104(r1)
 ; CHECK-BE-NEXT:    std r9, 96(r1)
-; CHECK-BE-NEXT:    lxv v2, 80(r1)
 ; CHECK-BE-NEXT:    std r6, 72(r1)
 ; CHECK-BE-NEXT:    std r5, 64(r1)
 ; CHECK-BE-NEXT:    std r4, 56(r1)
 ; CHECK-BE-NEXT:    std r3, 48(r1)
+; CHECK-BE-NEXT:    lxv v2, 80(r1)
 ; CHECK-BE-NEXT:    blr
 entry:
   %c = getelementptr inbounds %struct.MixedC, %struct.MixedC* %a, i64 0, i32 1, i32 1
@@ -337,25 +337,25 @@ entry:
 define fp128 @sum_float128(i32 signext %count, ...) {
 ; CHECK-LABEL: sum_float128:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r11, r2, .LCPI17_0 at toc@ha
-; CHECK-NEXT:    cmpwi cr0, r3, 1
 ; CHECK-NEXT:    std r10, 88(r1)
 ; CHECK-NEXT:    std r9, 80(r1)
 ; CHECK-NEXT:    std r8, 72(r1)
 ; CHECK-NEXT:    std r7, 64(r1)
 ; CHECK-NEXT:    std r6, 56(r1)
-; CHECK-NEXT:    std r5, 48(r1)
+; CHECK-NEXT:    cmpwi cr0, r3, 1
 ; CHECK-NEXT:    std r4, 40(r1)
-; CHECK-NEXT:    addi r11, r11, .LCPI17_0 at toc@l
-; CHECK-NEXT:    lxvx v2, 0, r11
+; CHECK-NEXT:    addis [[REG:r[0-9]+]], r2, .LCPI17_0 at toc@ha
+; CHECK-NEXT:    addi [[REG1:r[0-9]+]], [[REG]], .LCPI17_0 at toc@l
+; CHECK-NEXT:    lxvx v2, 0, [[REG1]]
+; CHECK-NEXT:    std r5, 48(r1)
 ; CHECK-NEXT:    bltlr cr0
 ; CHECK-NEXT:  # %bb.1: # %if.end
 ; CHECK-NEXT:    addi r3, r1, 40
 ; CHECK-NEXT:    lxvx v3, 0, r3
 ; CHECK-NEXT:    xsaddqp v2, v3, v2
+; CHECK-NEXT:    addi [[REG2:r[0-9]+]], r1, 72
+; CHECK-NEXT:    std [[REG2]], -8(r1)
 ; CHECK-NEXT:    lxv v3, 16(r3)
-; CHECK-NEXT:    addi r3, r1, 72
-; CHECK-NEXT:    std r3, -8(r1)
 ; CHECK-NEXT:    xsaddqp v2, v2, v3
 ; CHECK-NEXT:    blr
 entry:

Modified: llvm/trunk/test/CodeGen/PowerPC/f128-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/f128-arith.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/f128-arith.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/f128-arith.ll Wed Jan  2 21:04:18 2019
@@ -283,7 +283,7 @@ define void @qp_powi(fp128* nocapture re
                      fp128* nocapture %res) {
 ; CHECK-LABEL: qp_powi:
 ; CHECK:         lxv v2, 0(r3)
-; CHECK:         lwz r3, 0(r4)
+; CHECK:         lwz r5, 0(r4)
 ; CHECK:         bl __powikf2
 ; CHECK:         blr
 entry:

Modified: llvm/trunk/test/CodeGen/PowerPC/f128-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/f128-conv.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/f128-conv.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/f128-conv.ll Wed Jan  2 21:04:18 2019
@@ -444,10 +444,10 @@ define void @qpConv2dp_03(double* nocapt
 ; CHECK-LABEL: qpConv2dp_03:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis r5, r2, .LC7 at toc@ha
-; CHECK-NEXT:    sldi r4, r4, 3
 ; CHECK-NEXT:    ld r5, .LC7 at toc@l(r5)
 ; CHECK-NEXT:    lxvx v2, 0, r5
 ; CHECK-NEXT:    xscvqpdp v2, v2
+; CHECK-NEXT:    sldi r4, r4, 3
 ; CHECK-NEXT:    stxsdx v2, r3, r4
 ; CHECK-NEXT:    blr
 entry:
@@ -517,11 +517,11 @@ define void @qpConv2sp_03(float* nocaptu
 ; CHECK-LABEL: qpConv2sp_03:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis r5, r2, .LC7 at toc@ha
-; CHECK-NEXT:    sldi r4, r4, 2
 ; CHECK-NEXT:    ld r5, .LC7 at toc@l(r5)
 ; CHECK-NEXT:    lxv v2, 48(r5)
 ; CHECK-NEXT:    xscvqpdpo v2, v2
 ; CHECK-NEXT:    xsrsp f0, v2
+; CHECK-NEXT:    sldi r4, r4, 2
 ; CHECK-NEXT:    stfsx f0, r3, r4
 ; CHECK-NEXT:    blr
 entry:
@@ -609,8 +609,8 @@ define void @dpConv2qp_03(fp128* nocaptu
 ; CHECK-LABEL: dpConv2qp_03:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscpsgndp v2, f1, f1
-; CHECK-NEXT:    sldi r4, r4, 4
-; CHECK-NEXT:    xscvdpqp v2, v2
+; CHECK-DAG:     sldi r4, r4, 4
+; CHECK-DAG:     xscvdpqp v2, v2
 ; CHECK-NEXT:    stxvx v2, r3, r4
 ; CHECK-NEXT:    blr
 entry:
@@ -689,8 +689,8 @@ define void @spConv2qp_03(fp128* nocaptu
 ; CHECK-LABEL: spConv2qp_03:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscpsgndp v2, f1, f1
-; CHECK-NEXT:    sldi r4, r4, 4
-; CHECK-NEXT:    xscvdpqp v2, v2
+; CHECK-DAG:     sldi r4, r4, 4
+; CHECK-DAG:     xscvdpqp v2, v2
 ; CHECK-NEXT:    stxvx v2, r3, r4
 ; CHECK-NEXT:    blr
 entry:

Modified: llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll Wed Jan  2 21:04:18 2019
@@ -63,8 +63,8 @@ define fp128 @fp128Array(fp128* nocaptur
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sldi r4, r4, 4
 ; CHECK-NEXT:    lxv v2, 0(r3)
-; CHECK-NEXT:    add r4, r3, r4
-; CHECK-NEXT:    lxv v3, -16(r4)
+; CHECK-NEXT:    add [[REG:r[0-9]+]], r3, r4
+; CHECK-NEXT:    lxv v3, -16([[REG]])
 ; CHECK-NEXT:    xsaddqp v2, v2, v3
 ; CHECK-NEXT:    blr
                          i32 signext %loopcnt, fp128* nocapture readnone %sum) {
@@ -85,7 +85,6 @@ define fp128 @maxVecParam(fp128 %p1, fp1
 ; CHECK-LABEL: maxVecParam:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xsaddqp v2, v2, v3
-; CHECK-NEXT:    lxv v[[REG0:[0-9]+]], 224(r1)
 ; CHECK-NEXT:    xsaddqp v2, v2, v4
 ; CHECK-NEXT:    xsaddqp v2, v2, v5
 ; CHECK-NEXT:    xsaddqp v2, v2, v6
@@ -96,6 +95,7 @@ define fp128 @maxVecParam(fp128 %p1, fp1
 ; CHECK-NEXT:    xsaddqp v2, v2, v11
 ; CHECK-NEXT:    xsaddqp v2, v2, v12
 ; CHECK-NEXT:    xsaddqp v2, v2, v13
+; CHECK-NEXT:    lxv v[[REG0:[0-9]+]], 224(r1)
 ; CHECK-NEXT:    xssubqp v2, v2, v[[REG0]]
 ; CHECK-NEXT:    blr
                           fp128 %p6, fp128 %p7, fp128 %p8, fp128 %p9, fp128 %p10,
@@ -121,9 +121,9 @@ entry:
 define fp128 @mixParam_01(fp128 %a, i32 signext %i, fp128 %b) {
 ; CHECK-LABEL: mixParam_01:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mtvsrwa v4, r5
-; CHECK-NEXT:    xsaddqp v2, v2, v3
-; CHECK-NEXT:    xscvsdqp v[[REG0:[0-9]+]], v4
+; CHECK-DAG:     mtvsrwa [[REG1:v[0-9]+]], r5
+; CHECK-DAG:     xsaddqp v2, v2, v3
+; CHECK-NEXT:    xscvsdqp v[[REG0:[0-9]+]], [[REG1]]
 ; CHECK-NEXT:    xsaddqp v2, v2, v[[REG0]]
 ; CHECK-NEXT:    blr
 entry:
@@ -136,8 +136,8 @@ entry:
 define fastcc fp128 @mixParam_01f(fp128 %a, i32 signext %i, fp128 %b) {
 ; CHECK-LABEL: mixParam_01f:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mtvsrwa v[[REG0:[0-9]+]], r3
-; CHECK-NEXT:    xsaddqp v2, v2, v3
+; CHECK-DAG:     mtvsrwa v[[REG0:[0-9]+]], r3
+; CHECK-DAG:     xsaddqp v2, v2, v3
 ; CHECK-NEXT:    xscvsdqp v[[REG1:[0-9]+]], v[[REG0]]
 ; CHECK-NEXT:    xsaddqp v2, v2, v[[REG1]]
 ; CHECK-NEXT:    blr
@@ -152,17 +152,17 @@ entry:
 define fp128 @mixParam_02(fp128 %p1, double %p2, i64* nocapture %p3,
 ; CHECK-LABEL: mixParam_02:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-DAG:     lwz r3, 96(r1)
+; CHECK:         lwz r3, 96(r1)
 ; CHECK:         add r4, r7, r9
-; CHECK-NEXT:    xscpsgndp v[[REG0:[0-9]+]], f1, f1
-; CHECK-DAG:     add r4, r4, r10
+; CHECK:         add r4, r4, r10
+; CHECK:         add r3, r4, r3
+; CHECK:         clrldi r3, r3, 32
+; CHECK:         std r3, 0(r6)
+; CHECK:         lxv v[[REG1:[0-9]+]], 0(r8)
+; CHECK:         xscpsgndp v[[REG0:[0-9]+]], f1, f1
 ; CHECK:         xscvdpqp v[[REG0]], v[[REG0]]
-; CHECK-NEXT:    add r3, r4, r3
-; CHECK-NEXT:    clrldi r3, r3, 32
-; CHECK-NEXT:    std r3, 0(r6)
-; CHECK-NEXT:    lxv v[[REG1:[0-9]+]], 0(r8)
-; CHECK-NEXT:    xsaddqp v2, v[[REG1]], v2
-; CHECK-NEXT:    xsaddqp v2, v2, v3
+; CHECK:         xsaddqp v2, v[[REG1]], v2
+; CHECK:         xsaddqp v2, v2, v3
 ; CHECK-NEXT:    blr
                           i16 signext %p4, fp128* nocapture readonly %p5,
                           i32 signext %p6, i8 zeroext %p7, i32 zeroext %p8) {
@@ -186,13 +186,13 @@ define fastcc fp128 @mixParam_02f(fp128
 ; CHECK-LABEL: mixParam_02f:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    add r4, r4, r6
-; CHECK-NEXT:    xscpsgndp v[[REG0:[0-9]+]], f1, f1
 ; CHECK-NEXT:    add r4, r4, r7
-; CHECK-NEXT:    xscvdpqp v[[REG0]], v[[REG0]]
 ; CHECK-NEXT:    add r4, r4, r8
 ; CHECK-NEXT:    clrldi r4, r4, 32
-; CHECK-NEXT:    std r4, 0(r3)
-; CHECK-NEXT:    lxv v[[REG1:[0-9]+]], 0(r5)
+; CHECK-DAG:     std r4, 0(r3)
+; CHECK-DAG:     lxv v[[REG1:[0-9]+]], 0(r5)
+; CHECK-NEXT:    xscpsgndp v[[REG0:[0-9]+]], f1, f1
+; CHECK-NEXT:    xscvdpqp v[[REG0]], v[[REG0]]
 ; CHECK-NEXT:    xsaddqp v2, v[[REG1]], v2
 ; CHECK-NEXT:    xsaddqp v2, v2, v[[REG0]] 
 ; CHECK-NEXT:    blr
@@ -219,11 +219,11 @@ define void @mixParam_03(fp128 %f1, doub
 ; CHECK-LABEL: mixParam_03:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-DAG:     ld r3, 104(r1)
-; CHECK-DAG:     mtvsrwa v[[REG2:[0-9]+]], r10
 ; CHECK-DAG:     stxv v2, 0(r9)
-; CHECK-DAG:     xscvsdqp v[[REG1:[0-9]+]], v[[REG2]]
 ; CHECK:         stxvx v3, 0, r3
-; CHECK-NEXT:    lxv v2, 0(r9)
+; CHECK:         mtvsrwa v[[REG2:[0-9]+]], r10
+; CHECK-DAG:     xscvsdqp v[[REG1:[0-9]+]], v[[REG2]]
+; CHECK-DAG:     lxv v2, 0(r9)
 ; CHECK-NEXT:    xsaddqp v2, v2, v[[REG1]]
 ; CHECK-NEXT:    xscvqpdp v2, v2
 ; CHECK-NEXT:    stxsd v2, 0(r5)
@@ -245,10 +245,10 @@ entry:
 define fastcc void @mixParam_03f(fp128 %f1, double* nocapture %d1, <4 x i32> %vec1,
 ; CHECK-LABEL: mixParam_03f:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mtvsrwa v[[REG0:[0-9]+]], r5
-; CHECK-NEXT:    stxv v[[REG1:[0-9]+]], 0(r4)
-; CHECK-NEXT:    stxv v[[REG2:[0-9]+]], 0(r7)
-; CHECK-NEXT:    lxv v[[REG1]], 0(r4)
+; CHECK-DAG:     mtvsrwa v[[REG0:[0-9]+]], r5
+; CHECK-DAG:     stxv v[[REG1:[0-9]+]], 0(r4)
+; CHECK-DAG:     stxv v[[REG2:[0-9]+]], 0(r7)
+; CHECK-DAG:     lxv v[[REG1]], 0(r4)
 ; CHECK-NEXT:    xscvsdqp v[[REG3:[0-9]+]], v[[REG0]]
 ; CHECK-NEXT:    xsaddqp v[[REG4:[0-9]+]], v[[REG1]], v[[REG3]]
 ; CHECK-NEXT:    xscvqpdp v2, v[[REG4]]

Modified: llvm/trunk/test/CodeGen/PowerPC/f128-truncateNconv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/f128-truncateNconv.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/f128-truncateNconv.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/f128-truncateNconv.ll Wed Jan  2 21:04:18 2019
@@ -53,10 +53,10 @@ entry:
   ret i64 %conv
 
 ; CHECK-LABEL: qpConv2sdw_03
+; CHECK: lxv v[[REG:[0-9]+]], 0(r3)
 ; CHECK: addis r[[REG0:[0-9]+]], r2, .LC0 at toc@ha
-; CHECK-DAG: ld r[[REG0]], .LC0 at toc@l(r[[REG0]])
-; CHECK-DAG: lxv v[[REG1:[0-9]+]], 16(r[[REG0]])
-; CHECK-DAG: lxv v[[REG:[0-9]+]], 0(r3)
+; CHECK: ld r[[REG0]], .LC0 at toc@l(r[[REG0]])
+; CHECK: lxv v[[REG1:[0-9]+]], 16(r[[REG0]])
 ; CHECK: xsaddqp v[[REG]], v[[REG]], v[[REG1]]
 ; CHECK-NEXT: xscvqpsdz v[[CONV:[0-9]+]], v[[REG]]
 ; CHECK-NEXT: mfvsrd r3, v[[CONV]]
@@ -97,7 +97,7 @@ entry:
 
 ; CHECK-LABEL: qpConv2sdw_testXForm
 ; CHECK: xscvqpsdz v[[CONV:[0-9]+]],
-; CHECK-NEXT: stxsdx v[[CONV]], r3, r4
+; CHECK: stxsdx v[[CONV]], r3, r4
 ; CHECK-NEXT: blr
 }
 
@@ -146,10 +146,10 @@ entry:
   ret i64 %conv
 
 ; CHECK-LABEL: qpConv2udw_03
+; CHECK: lxv v[[REG:[0-9]+]], 0(r3)
 ; CHECK: addis r[[REG0:[0-9]+]], r2, .LC0 at toc@ha
 ; CHECK-DAG: ld r[[REG0]], .LC0 at toc@l(r[[REG0]])
 ; CHECK-DAG: lxv v[[REG1:[0-9]+]], 16(r[[REG0]])
-; CHECK-DAG: lxv v[[REG:[0-9]+]], 0(r3)
 ; CHECK: xsaddqp v[[REG]], v[[REG]], v[[REG1]]
 ; CHECK-NEXT: xscvqpudz v[[CONV:[0-9]+]], v[[REG]]
 ; CHECK-NEXT: mfvsrd r3, v[[CONV]]
@@ -190,7 +190,7 @@ entry:
 
 ; CHECK-LABEL: qpConv2udw_testXForm
 ; CHECK: xscvqpudz v[[CONV:[0-9]+]],
-; CHECK-NEXT: stxsdx v[[CONV]], r3, r4
+; CHECK: stxsdx v[[CONV]], r3, r4
 ; CHECK-NEXT: blr
 }
 
@@ -240,10 +240,10 @@ entry:
   ret i32 %conv
 
 ; CHECK-LABEL: qpConv2sw_03
+; CHECK: lxv v[[REG:[0-9]+]], 0(r3)
 ; CHECK: addis r[[REG0:[0-9]+]], r2, .LC0 at toc@ha
 ; CHECK-DAG: ld r[[REG0]], .LC0 at toc@l(r[[REG0]])
 ; CHECK-DAG: lxv v[[REG1:[0-9]+]], 16(r[[REG0]])
-; CHECK-DAG: lxv v[[REG:[0-9]+]], 0(r3)
 ; CHECK-NEXT: xsaddqp v[[ADD:[0-9]+]], v[[REG]], v[[REG1]]
 ; CHECK-NEXT: xscvqpswz v[[CONV:[0-9]+]], v[[ADD]]
 ; CHECK-NEXT: mfvsrwz r[[REG2:[0-9]+]], v[[CONV]]
@@ -316,10 +316,10 @@ entry:
   ret i32 %conv
 
 ; CHECK-LABEL: qpConv2uw_03
+; CHECK: lxv v[[REG:[0-9]+]], 0(r3)
 ; CHECK: addis r[[REG0:[0-9]+]], r2, .LC0 at toc@ha
 ; CHECK-DAG: ld r[[REG0]], .LC0 at toc@l(r[[REG0]])
 ; CHECK-DAG: lxv v[[REG1:[0-9]+]], 16(r[[REG0]])
-; CHECK-DAG: lxv v[[REG:[0-9]+]], 0(r3)
 ; CHECK-NEXT: xsaddqp v[[ADD:[0-9]+]], v[[REG]], v[[REG1]]
 ; CHECK-NEXT: xscvqpuwz v[[CONV:[0-9]+]], v[[ADD]]
 ; CHECK-NEXT: mfvsrwz r3, v[[CONV]]
@@ -386,10 +386,10 @@ entry:
 define signext i16 @qpConv2shw_03(fp128* nocapture readonly %a) {
 ; CHECK-LABEL: qpConv2shw_03:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    lxv v2, 0(r3)
-; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-NEXT:    lxv v3, 16(r4)
+; CHECK-NEXT:    addis [[REG:r[0-9]+]], r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld [[REG1:r[0-9]+]], .LC0 at toc@l([[REG]])
+; CHECK-NEXT:    lxv v3, 16([[REG1]])
 ; CHECK-NEXT:    xsaddqp v2, v2, v3
 ; CHECK-NEXT:    xscvqpswz v2, v2
 ; CHECK-NEXT:    mfvsrwz r3, v2
@@ -463,10 +463,10 @@ entry:
 define zeroext i16 @qpConv2uhw_03(fp128* nocapture readonly %a) {
 ; CHECK-LABEL: qpConv2uhw_03:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    lxv v2, 0(r3)
-; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-NEXT:    lxv v3, 16(r4)
+; CHECK-NEXT:    addis [[REG:r[0-9]+]], r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld [[REG1:r[0-9]+]], .LC0 at toc@l([[REG]])
+; CHECK-NEXT:    lxv v3, 16([[REG1]])
 ; CHECK-NEXT:    xsaddqp v2, v2, v3
 ; CHECK-NEXT:    xscvqpswz v2, v2
 ; CHECK-NEXT:    mfvsrwz r3, v2
@@ -540,10 +540,10 @@ entry:
 define signext i8 @qpConv2sb_03(fp128* nocapture readonly %a) {
 ; CHECK-LABEL: qpConv2sb_03:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    lxv v2, 0(r3)
-; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-NEXT:    lxv v3, 16(r4)
+; CHECK-NEXT:    addis [[REG:r[0-9]+]], r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld [[REG1:r[0-9]+]], .LC0 at toc@l([[REG]])
+; CHECK-NEXT:    lxv v3, 16([[REG1]])
 ; CHECK-NEXT:    xsaddqp v2, v2, v3
 ; CHECK-NEXT:    xscvqpswz v2, v2
 ; CHECK-NEXT:    mfvsrwz r3, v2
@@ -617,10 +617,10 @@ entry:
 define zeroext i8 @qpConv2ub_03(fp128* nocapture readonly %a) {
 ; CHECK-LABEL: qpConv2ub_03:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    lxv v2, 0(r3)
-; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-NEXT:    lxv v3, 16(r4)
+; CHECK-NEXT:    addis [[REG:r[0-9]+]], r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld [[REG1:r[0-9]+]], .LC0 at toc@l([[REG]])
+; CHECK-NEXT:    lxv v3, 16([[REG1]])
 ; CHECK-NEXT:    xsaddqp v2, v2, v3
 ; CHECK-NEXT:    xscvqpswz v2, v2
 ; CHECK-NEXT:    mfvsrwz r3, v2

Modified: llvm/trunk/test/CodeGen/PowerPC/mi-scheduling-lhs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/mi-scheduling-lhs.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/mi-scheduling-lhs.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/mi-scheduling-lhs.ll Wed Jan  2 21:04:18 2019
@@ -40,8 +40,8 @@ for.end:
 ; CHECK: li [[REG4:[0-9]+]], 5
 ; CHECK: [[LAB:[a-z0-9A-Z_.]+]]:
 ; CHECK: ld [[REG2:[0-9]+]], a at toc@l([[REG1]])
-; CHECK: ld [[REG3:[0-9]+]], 0([[REG2]])
 ; CHECK: stw [[REG4]], 8([[REG2]])
+; CHECK: ld [[REG3:[0-9]+]], 0([[REG2]])
 ; CHECK: stw [[REG4]], 8([[REG3]]) 
 ; CHECK: std [[REG3]], 0([[REG3]])
 ; CHECK: bdnz [[LAB]]

Modified: llvm/trunk/test/CodeGen/PowerPC/mulld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/mulld.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/mulld.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/mulld.ll Wed Jan  2 21:04:18 2019
@@ -11,8 +11,8 @@ define void @bn_mul_comba8(i64* nocaptur
 ; CHECK-LABEL: bn_mul_comba8:
 ; CHECK:    mulhdu
 ; CHECK-NEXT:    mulld
-; CHECK-NEXT:    mulhdu
-; CHECK-NEXT:    mulld
+; CHECK:         mulhdu
+; CHECK:         mulld
 ; CHECK-NEXT:    mulhdu
 
 

Modified: llvm/trunk/test/CodeGen/PowerPC/pre-inc-disable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/pre-inc-disable.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/pre-inc-disable.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/pre-inc-disable.ll Wed Jan  2 21:04:18 2019
@@ -10,78 +10,78 @@
 define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 signext %i_stride_pix1, i8* nocapture readonly %pix2) {
 ; CHECK-LABEL: test_pre_inc_disable_1:
 ; CHECK:   # %bb.0: # %entry
-; CHECK:    addis r6, r2
-; CHECK:    addis r7, r2,
 ; CHECK:    lfd f0, 0(r5)
-; CHECK:    xxlxor v4, v4, v4
-; CHECK:    addi r5, r6,
-; CHECK:    addi r6, r7,
+; CHECK:    addis r5, r2
+; CHECK:    addi r5, r5,
 ; CHECK:    lxvx v2, 0, r5
-; CHECK:    lxvx v3, 0, r6
+; CHECK:    addis r5, r2,
+; CHECK:    addi r5, r5,
+; CHECK:    lxvx v4, 0, r5
 ; CHECK:    xxpermdi v5, f0, f0, 2
-; CHECK-DAG: vperm v[[VR1:[0-9]+]], v4, v5, v2
-; CHECK-DAG: vperm v[[VR2:[0-9]+]], v5, v4, v3
+; CHECK:    xxlxor v3, v3, v3
+; CHECK-DAG: vperm v[[VR1:[0-9]+]], v5, v3, v4
+; CHECK-DAG: vperm v[[VR2:[0-9]+]], v3, v5, v2
 ; CHECK-DAG: xvnegsp v[[VR3:[0-9]+]], v[[VR1]]
 ; CHECK-DAG: xvnegsp v[[VR4:[0-9]+]], v[[VR2]]
 
 ; CHECK:  .LBB0_1: # %for.cond1.preheader
 ; CHECK:    lfd f0, 0(r3)
 ; CHECK:    xxpermdi v1, f0, f0, 2
-; CHECK:    vperm v6, v1, v4, v3
-; CHECK:    vperm v1, v4, v1, v2
+; CHECK:    vperm v6, v3, v1, v2
+; CHECK:    vperm v1, v1, v3, v4
 ; CHECK-DAG:    xvnegsp v6, v6
 ; CHECK-DAG:    xvnegsp v1, v1
 ; CHECK-DAG: vabsduw v1, v1, v[[VR3]]
 ; CHECK-DAG: vabsduw v6, v6, v[[VR4]]
-; CHECK:    vadduwm v1, v6, v1
+; CHECK:    vadduwm v1, v1, v6
 ; CHECK:    xxswapd v6, v1
 ; CHECK:    vadduwm v1, v1, v6
 ; CHECK:    xxspltw v6, v1, 2
 ; CHECK:    vadduwm v1, v1, v6
-; CHECK:    vextuwrx r7, r6, v1
+; CHECK:    vextuwrx r7, r5, v1
 ; CHECK:    ldux r8, r3, r4
 ; CHECK:    add r3, r3, r4
-; CHECK:    add r5, r7, r5
+; CHECK:    add r6, r7, r6
 ; CHECK:    mtvsrd f0, r8
 ; CHECK:    xxswapd v1, vs0
-; CHECK:    vperm v6, v1, v4, v3
-; CHECK:    vperm v1, v4, v1, v2
+; CHECK:    vperm v6, v3, v1, v2
+; CHECK:    vperm v1, v1, v3, v4
 ; CHECK-DAG: xvnegsp v6, v6
 ; CHECK-DAG: xvnegsp v1, v1
 ; CHECK-DAG: vabsduw v1, v1, v[[VR3]]
 ; CHECK-DAG: vabsduw v6, v6, v[[VR4]]
-; CHECK:    vadduwm v1, v6, v1
+; CHECK:    vadduwm v1, v1, v6
 ; CHECK:    xxswapd v6, v1
 ; CHECK:    vadduwm v1, v1, v6
 ; CHECK:    xxspltw v6, v1, 2
 ; CHECK:    vadduwm v1, v1, v6
-; CHECK:    vextuwrx r8, r6, v1
-; CHECK:    add r5, r8, r5
+; CHECK:    vextuwrx r7, r5, v1
+; CHECK:    add r6, r7, r6
 ; CHECK:    bdnz .LBB0_1
-; CHECK:    extsw r3, r5
+; CHECK:    extsw r3, r6
 ; CHECK:    blr
 
 ; P9BE-LABEL: test_pre_inc_disable_1:
-; P9BE:    addis r6, r2,
-; P9BE:    addis r7, r2,
 ; P9BE:    lfd f0, 0(r5)
-; P9BE:    xxlxor v4, v4, v4
-; P9BE:    addi r5, r6,
-; P9BE:    addi r6, r7,
+; P9BE:    addis r5, r2,
+; P9BE:    addi r5, r5,
 ; P9BE:    lxvx v2, 0, r5
-; P9BE:    lxvx v3, 0, r6
+; P9BE:    addis r5, r2,
+; P9BE:    addi r5, r5,
+; P9BE:    lxvx v4, 0, r5
 ; P9BE:    xxlor v5, vs0, vs0
-; P9BE:    li r6, 0
-; P9BE-DAG: vperm v[[VR1:[0-9]+]], v4, v5, v2
-; P9BE-DAG: vperm v[[VR2:[0-9]+]], v4, v5, v3
+; P9BE:    xxlxor v3, v3, v3
+; P9BE-DAG: li r5, 0
+; P9BE-DAG: vperm v[[VR1:[0-9]+]], v3, v5, v2
+; P9BE-DAG: vperm v[[VR2:[0-9]+]], v3, v5, v4
 ; P9BE-DAG: xvnegsp v[[VR3:[0-9]+]], v[[VR1]]
 ; P9BE-DAG: xvnegsp v[[VR4:[0-9]+]], v[[VR2]]
 
 ; P9BE:  .LBB0_1: # %for.cond1.preheader
 ; P9BE:    lfd f0, 0(r3)
 ; P9BE:    xxlor v1, vs0, vs0
-; P9BE:    vperm v6, v4, v1, v3
-; P9BE:    vperm v1, v4, v1, v2
+; P9BE:    vperm v6, v3, v1, v4
+; P9BE:    vperm v1, v3, v1, v2
 ; P9BE-DAG: xvnegsp v6, v6
 ; P9BE-DAG: xvnegsp v1, v1
 ; P9BE-DAG: vabsduw v1, v1, v[[VR3]]
@@ -91,26 +91,26 @@ define signext i32 @test_pre_inc_disable
 ; P9BE:    vadduwm v1, v1, v6
 ; P9BE:    xxspltw v6, v1, 1
 ; P9BE:    vadduwm v1, v1, v6
-; P9BE:    vextuwlx r[[GR1:[0-9]+]], r6, v1
+; P9BE:    vextuwlx r[[GR1:[0-9]+]], r5, v1
+; P9BE:    add r6, r[[GR1]], r6
 ; P9BE:    ldux r[[GR2:[0-9]+]], r3, r4
 ; P9BE:    add r3, r3, r4
-; P9BE:    add r5, r[[GR1]], r5
 ; P9BE:    mtvsrd v1, r[[GR2]]
-; P9BE:    vperm v6, v4, v1, v3
-; P9BE:    vperm v1, v4, v1, v2
+; P9BE:    vperm v6, v3, v1, v2
+; P9BE:    vperm v1, v3, v1, v4
 ; P9BE-DAG: xvnegsp v6, v6
 ; P9BE-DAG: xvnegsp v1, v1
-; P9BE-DAG: vabsduw v1, v1, v[[VR3]]
-; P9BE-DAG: vabsduw v6, v6, v[[VR4]]
-; P9BE:    vadduwm v1, v6, v1
+; P9BE-DAG: vabsduw v1, v1, v[[VR4]]
+; P9BE-DAG: vabsduw v6, v6, v[[VR3]]
+; P9BE:    vadduwm v1, v1, v6
 ; P9BE:    xxswapd v6, v1
 ; P9BE:    vadduwm v1, v1, v6
 ; P9BE:    xxspltw v6, v1, 1
 ; P9BE:    vadduwm v1, v1, v6
-; P9BE:    vextuwlx r8, r6, v1
-; P9BE:    add r5, r8, r5
+; P9BE:    vextuwlx r7, r5, v1
+; P9BE:    add r6, r7, r6
 ; P9BE:    bdnz .LBB0_1
-; P9BE:    extsw r3, r5
+; P9BE:    extsw r3, r6
 ; P9BE:    blr
 entry:
   %idx.ext = sext i32 %i_stride_pix1 to i64
@@ -166,24 +166,24 @@ for.cond.cleanup:
 ; Function Attrs: norecurse nounwind readonly
 define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* nocapture readonly %pix2) {
 ; CHECK-LABEL: test_pre_inc_disable_2:
-; CHECK:    addis r5, r2,
-; CHECK:    addis r6, r2,
 ; CHECK:    lfd f0, 0(r3)
-; CHECK:    lfd f1, 0(r4)
-; CHECK:    xxlxor v0, v0, v0
-; CHECK:    addi r3, r5, .LCPI1_0 at toc@l
-; CHECK:    addi r4, r6, .LCPI1_1 at toc@l
-; CHECK:    lxvx v2, 0, r3
-; CHECK:    lxvx v3, 0, r4
-; CHECK:    xxpermdi v4, f0, f0, 2
-; CHECK:    xxpermdi v5, f1, f1, 2
-; CHECK:    vperm v1, v4, v0, v2
-; CHECK:    vperm v4, v0, v4, v3
-; CHECK:    vperm v2, v5, v0, v2
-; CHECK:    vperm v3, v0, v5, v3
-; CHECK:    vabsduw v3, v4, v3
-; CHECK:    vabsduw v2, v1, v2
-; CHECK:    vadduwm v2, v2, v3
+; CHECK:    addis r3, r2,
+; CHECK:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK:    lxvx v4, 0, r3
+; CHECK:    addis r3, r2,
+; CHECK:    xxpermdi v2, f0, f0, 2
+; CHECK:    lfd f0, 0(r4)
+; CHECK:    addi r3, r3, .LCPI1_1 at toc@l
+; CHECK:    xxlxor v3, v3, v3
+; CHECK:    lxvx v0, 0, r3
+; CHECK:    xxpermdi v1, f0, f0, 2
+; CHECK:    vperm v5, v2, v3, v4
+; CHECK:    vperm v2, v3, v2, v0
+; CHECK:    vperm v0, v3, v1, v0
+; CHECK:    vperm v3, v1, v3, v4
+; CHECK:    vabsduw v2, v2, v0
+; CHECK:    vabsduw v3, v5, v3
+; CHECK:    vadduwm v2, v3, v2
 ; CHECK:    xxswapd v3, v2
 ; CHECK:    vadduwm v2, v2, v3
 ; CHECK:    xxspltw v3, v2, 2
@@ -193,24 +193,24 @@ define signext i32 @test_pre_inc_disable
 ; CHECK:    blr
 
 ; P9BE-LABEL: test_pre_inc_disable_2:
-; P9BE:    addis r5, r2,
-; P9BE:    addis r6, r2,
 ; P9BE:    lfd f0, 0(r3)
-; P9BE:    lfd f1, 0(r4)
-; P9BE:    xxlxor v5, v5, v5
-; P9BE:    addi r3, r5,
-; P9BE:    addi r4, r6,
-; P9BE:    lxvx v2, 0, r3
-; P9BE:    lxvx v3, 0, r4
-; P9BE:    xxlor v4, vs0, vs0
-; P9BE:    xxlor v0, vs1, vs1
-; P9BE:    vperm v1, v5, v4, v2
-; P9BE:    vperm v4, v5, v4, v3
-; P9BE:    vperm v2, v5, v0, v2
-; P9BE:    vperm v3, v5, v0, v3
-; P9BE:    vabsduw v3, v4, v3
-; P9BE:    vabsduw v2, v1, v2
-; P9BE:    vadduwm v2, v2, v3
+; P9BE:    addis r3, r2,
+; P9BE:    addi r3, r3,
+; P9BE:    lxvx v4, 0, r3
+; P9BE:    addis r3, r2,
+; P9BE:    addi r3, r3,
+; P9BE:    xxlor v2, vs0, vs0
+; P9BE:    lfd f0, 0(r4)
+; P9BE:    lxvx v0, 0, r3
+; P9BE:    xxlxor v3, v3, v3
+; P9BE:    xxlor v1, vs0, vs0
+; P9BE:    vperm v5, v3, v2, v4
+; P9BE:    vperm v2, v3, v2, v0
+; P9BE:    vperm v0, v3, v1, v0
+; P9BE:    vperm v3, v3, v1, v4
+; P9BE:    vabsduw v2, v2, v0
+; P9BE:    vabsduw v3, v5, v3
+; P9BE:    vadduwm v2, v3, v2
 ; P9BE:    xxswapd v3, v2
 ; P9BE:    vadduwm v2, v2, v3
 ; P9BE:    xxspltw v3, v2, 1

Modified: llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll Wed Jan  2 21:04:18 2019
@@ -65,12 +65,12 @@ define <1 x float>* @test_liwzx2(<1 x fl
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    lfiwzx f0, 0, r3
 ; P9LE-NEXT:    lfiwzx f1, 0, r4
-; P9LE-NEXT:    mr r3, r5
 ; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
 ; P9LE-NEXT:    xxpermdi vs1, f1, f1, 2
 ; P9LE-NEXT:    xvsubsp vs0, vs0, vs1
 ; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
 ; P9LE-NEXT:    xscvspdpn f0, vs0
+; P9LE-NEXT:    mr r3, r5
 ; P9LE-NEXT:    stfs f0, 0(r5)
 ; P9LE-NEXT:    blr
 
@@ -78,11 +78,11 @@ define <1 x float>* @test_liwzx2(<1 x fl
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    lfiwzx f0, 0, r3
 ; P9BE-NEXT:    lfiwzx f1, 0, r4
-; P9BE-NEXT:    mr r3, r5
 ; P9BE-NEXT:    xxsldwi vs0, f0, f0, 1
 ; P9BE-NEXT:    xxsldwi vs1, f1, f1, 1
 ; P9BE-NEXT:    xvsubsp vs0, vs0, vs1
 ; P9BE-NEXT:    xscvspdpn f0, vs0
+; P9BE-NEXT:    mr r3, r5
 ; P9BE-NEXT:    stfs f0, 0(r5)
 ; P9BE-NEXT:    blr
 

Modified: llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll Wed Jan  2 21:04:18 2019
@@ -172,8 +172,8 @@ define <2 x float> @s2v_test_f2(float* n
 ; P9LE-LABEL: s2v_test_f2:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    addi r3, r3, 4
-; P9LE-NEXT:    xxspltw v2, v2, 2
-; P9LE-NEXT:    lfiwzx f0, 0, r3
+; P9LE-DAG:     xxspltw v2, v2, 2
+; P9LE-DAG:     lfiwzx f0, 0, r3
 ; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
 ; P9LE-NEXT:    vmrglw v2, v2, v3
 ; P9LE-NEXT:    blr
@@ -181,8 +181,8 @@ define <2 x float> @s2v_test_f2(float* n
 ; P9BE-LABEL: s2v_test_f2:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE:       addi r3, r3, 4
-; P9BE:       xxspltw v2, v2, 1
-; P9BE:       lfiwzx f0, 0, r3
+; P9BE-DAG:   xxspltw v2, v2, 1
+; P9BE-DAG:   lfiwzx f0, 0, r3
 ; P9BE-NEXT:  xxsldwi v3, f0, f0, 1
 ; P9BE:       vmrghw v2, v3, v2
 ; P9BE-NEXT:  blr
@@ -216,18 +216,18 @@ define <2 x float> @s2v_test_f3(float* n
 ; P9LE-LABEL: s2v_test_f3:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    sldi r4, r7, 2
-; P9LE-NEXT:    xxspltw v2, v2, 2
 ; P9LE-NEXT:    lfiwzx f0, r3, r4
-; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-DAG:     xxspltw v2, v2, 2
+; P9LE-DAG:     xxpermdi v3, f0, f0, 2
 ; P9LE-NEXT:    vmrglw v2, v2, v3
 ; P9LE-NEXT:    blr
 
 ; P9BE-LABEL: s2v_test_f3:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE:         sldi r4, r7, 2
-; P9BE:         xxspltw v2, v2, 1
 ; P9BE:         lfiwzx f0, r3, r4
-; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
+; P9BE-DAG:     xxspltw v2, v2, 1
+; P9BE-DAG:     xxsldwi v3, f0, f0, 1
 ; P9BE:         vmrghw v2, v3, v2
 ; P9BE-NEXT:    blr
 
@@ -261,18 +261,18 @@ define <2 x float> @s2v_test_f4(float* n
 ; P9LE-LABEL: s2v_test_f4:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    addi r3, r3, 4
-; P9LE-NEXT:    xxspltw v2, v2, 2
 ; P9LE-NEXT:    lfiwzx f0, 0, r3
-; P9LE-NEXT:    xxpermdi v3, f0, f0, 2
+; P9LE-DAG:    xxspltw v2, v2, 2
+; P9LE-DAG:    xxpermdi v3, f0, f0, 2
 ; P9LE-NEXT:    vmrglw v2, v2, v3
 ; P9LE-NEXT:    blr
 
 ; P9BE-LABEL: s2v_test_f4:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE:         addi r3, r3, 4
-; P9BE:         xxspltw v2, v2, 1
 ; P9BE:         lfiwzx f0, 0, r3
-; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
+; P9BE-DAG:     xxspltw v2, v2, 1
+; P9BE-DAG:     xxsldwi v3, f0, f0, 1
 ; P9BE:         vmrghw v2, v3, v2
 ; P9BE-NEXT:    blr
 

Modified: llvm/trunk/test/CodeGen/PowerPC/store_fptoi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/store_fptoi.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/store_fptoi.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/store_fptoi.ll Wed Jan  2 21:04:18 2019
@@ -296,8 +296,8 @@ entry:
 
 ; CHECK-LABEL: spConv2sdw_x
 ; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 3
-; CHECK-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG:  sldi [[REG:[0-9]+]], 5, 3
+; CHECK-DAG:  xscvdpsxds [[CONV:[0-9]+]], [[LD]]
 ; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]]
 ; CHECK-NEXT: blr
 
@@ -322,8 +322,8 @@ entry:
 
 ; CHECK-LABEL: spConv2sw_x
 ; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 2
-; CHECK-NEXT: xscvdpsxws [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 2
+; CHECK-DAG: xscvdpsxws [[CONV:[0-9]+]], [[LD]]
 ; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]]
 ; CHECK-NEXT: blr
 
@@ -348,8 +348,8 @@ entry:
 
 ; CHECK-LABEL: spConv2shw_x
 ; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK: sldi [[REG:[0-9]+]], 5, 1
-; CHECK: xscvdpsxws [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 1
+; CHECK-DAG: xscvdpsxws [[CONV:[0-9]+]], [[LD]]
 ; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]]
 ; CHECK-NEXT: blr
 
@@ -680,8 +680,8 @@ entry:
 
 ; CHECK-LABEL: spConv2udw_x
 ; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 3
-; CHECK-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 3
+; CHECK-DAG: xscvdpuxds [[CONV:[0-9]+]], [[LD]]
 ; CHECK-NEXT: stxsdx [[CONV]], 4, [[REG]]
 ; CHECK-NEXT: blr
 
@@ -706,8 +706,8 @@ entry:
 
 ; CHECK-LABEL: spConv2uw_x
 ; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK-NEXT: sldi [[REG:[0-9]+]], 5, 2
-; CHECK-NEXT: xscvdpuxws [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 2
+; CHECK-DAG: xscvdpuxws [[CONV:[0-9]+]], [[LD]]
 ; CHECK-NEXT: stfiwx [[CONV]], 4, [[REG]]
 ; CHECK-NEXT: blr
 
@@ -732,8 +732,8 @@ entry:
 
 ; CHECK-LABEL: spConv2uhw_x
 ; CHECK: lfs [[LD:[0-9]+]], 0(3)
-; CHECK: sldi [[REG:[0-9]+]], 5, 1
-; CHECK: xscvdpuxws [[CONV:[0-9]+]], [[LD]]
+; CHECK-DAG: sldi [[REG:[0-9]+]], 5, 1
+; CHECK-DAG: xscvdpuxws [[CONV:[0-9]+]], [[LD]]
 ; CHECK-NEXT: stxsihx [[CONV]], 4, [[REG]]
 ; CHECK-NEXT: blr
 

Modified: llvm/trunk/test/CodeGen/PowerPC/stwu-sched.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/stwu-sched.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/stwu-sched.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/stwu-sched.ll Wed Jan  2 21:04:18 2019
@@ -11,8 +11,8 @@
 ; Function Attrs: norecurse nounwind writeonly
 define void @initCombList(%0* nocapture, i32 signext) local_unnamed_addr #0 {
 ; CHECK-LABEL: initCombList:
-; CHECK:   addi 3, 3, -8
-; CHECK-NEXT: stwu 5, 64(4)
+; CHECK: addi 4, 4, -8
+; CHECK: stwu 5, 64(3)
 
 ; CHECK-ITIN-LABEL: initCombList:
 ; CHECK-ITIN: stwu 5, 64(4)

Modified: llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll Wed Jan  2 21:04:18 2019
@@ -9,7 +9,7 @@
 
 ; RUN: llc -relocation-model=pic -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
 ; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
-; RUN:   -mattr=-power9-vector < %s | FileCheck %s
+; RUN:   -mattr=-power9-vector < %s | FileCheck %s --check-prefix=CHECK-P9-NOVECTOR
 
 ; These tests verify that VSX swap optimization works when loading a scalar
 ; into a vector register.
@@ -31,18 +31,29 @@ define void @bar0() {
 ; CHECK:     stxvd2x vs0, 0, r3
 ; CHECK:     blr
 ;
+; CHECK-P9-NOVECTOR-LABEL: bar0:
+; CHECK-P9-NOVECTOR:   # %bb.0: # %entry
+; CHECK-P9-NOVECTOR:     addis r3, r2, .LC0 at toc@ha
+; CHECK-P9-NOVECTOR:     ld r3, .LC0 at toc@l(r3)
+; CHECK-P9-NOVECTOR:     addis r3, r2, .LC1 at toc@ha
+; CHECK-P9-NOVECTOR:     addis r3, r2, .LC2 at toc@ha
+; CHECK-P9-NOVECTOR:     ld r3, .LC2 at toc@l(r3)
+; CHECK-P9-NOVECTOR:     xxpermdi vs0, vs1, vs0, 1
+; CHECK-P9-NOVECTOR:     stxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR:     blr
+;
 ; CHECK-P9-LABEL: bar0:
 ; CHECK-P9:   # %bb.0: # %entry
 ; CHECK-P9:     addis r3, r2, .LC0 at toc@ha
-; CHECK-P9:     addis r4, r2, .LC1 at toc@ha
 ; CHECK-P9:     ld r3, .LC0 at toc@l(r3)
-; CHECK-P9:     ld r4, .LC1 at toc@l(r4)
-; CHECK-P9:     lfd f0, 0(r3)
-; CHECK-P9:     lxvx vs1, 0, r4
+; CHECK-P9:     lxvx vs0, 0, r3
+; CHECK-P9:     addis r3, r2, .LC1 at toc@ha
+; CHECK-P9:     ld r3, .LC1 at toc@l(r3)
+; CHECK-P9:     lfd f1, 0(r3)
 ; CHECK-P9:     addis r3, r2, .LC2 at toc@ha
 ; CHECK-P9:     ld r3, .LC2 at toc@l(r3)
-; CHECK-P9:     xxpermdi vs0, f0, f0, 2
-; CHECK-P9:     xxpermdi vs0, vs1, vs0, 1
+; CHECK-P9:     xxpermdi vs1, f1, f1, 2
+; CHECK-P9:     xxpermdi vs0, vs0, vs1, 1
 ; CHECK-P9:     stxvx vs0, 0, r3
 ; CHECK-P9:     blr
 entry:
@@ -65,18 +76,29 @@ define void @bar1() {
 ; CHECK:     stxvd2x vs0, 0, r3
 ; CHECK:     blr
 ;
+; CHECK-P9-NOVECTOR-LABEL: bar1:
+; CHECK-P9-NOVECTOR:   # %bb.0: # %entry
+; CHECK-P9-NOVECTOR:     addis r3, r2, .LC0 at toc@ha
+; CHECK-P9-NOVECTOR:     ld r3, .LC0 at toc@l(r3)
+; CHECK-P9-NOVECTOR:     addis r3, r2, .LC1 at toc@ha
+; CHECK-P9-NOVECTOR:     addis r3, r2, .LC2 at toc@ha
+; CHECK-P9-NOVECTOR:     ld r3, .LC2 at toc@l(r3)
+; CHECK-P9-NOVECTOR:     xxmrghd vs0, vs0, vs1
+; CHECK-P9-NOVECTOR:     stxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR:     blr
+;
 ; CHECK-P9-LABEL: bar1:
 ; CHECK-P9:   # %bb.0: # %entry
 ; CHECK-P9:     addis r3, r2, .LC0 at toc@ha
-; CHECK-P9:     addis r4, r2, .LC1 at toc@ha
 ; CHECK-P9:     ld r3, .LC0 at toc@l(r3)
-; CHECK-P9:     ld r4, .LC1 at toc@l(r4)
-; CHECK-P9:     lfd f0, 0(r3)
-; CHECK-P9:     lxvx vs1, 0, r4
+; CHECK-P9:     lxvx vs0, 0, r3
+; CHECK-P9:     addis r3, r2, .LC1 at toc@ha
+; CHECK-P9:     ld r3, .LC1 at toc@l(r3)
+; CHECK-P9:     lfd f1, 0(r3)
 ; CHECK-P9:     addis r3, r2, .LC2 at toc@ha
 ; CHECK-P9:     ld r3, .LC2 at toc@l(r3)
-; CHECK-P9:     xxpermdi vs0, f0, f0, 2
-; CHECK-P9:     xxmrgld vs0, vs0, vs1
+; CHECK-P9:     xxpermdi vs1, f1, f1, 2
+; CHECK-P9:     xxmrgld vs0, vs1, vs0
 ; CHECK-P9:     stxvx vs0, 0, r3
 ; CHECK-P9:     blr
 entry:

Modified: llvm/trunk/test/CodeGen/PowerPC/vec-itofp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec-itofp.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec-itofp.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec-itofp.ll Wed Jan  2 21:04:18 2019
@@ -16,12 +16,12 @@ entry:
   ret void
 ; CHECK-P9-LABEL: @test8
 ; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vperm
 ; CHECK-P9: xvcvuxddp
+; CHECK-P9: vperm
 ; CHECK-P9: xvcvuxddp
+; CHECK-P9: vperm
 ; CHECK-P9: xvcvuxddp
+; CHECK-P9: vperm
 ; CHECK-P9: xvcvuxddp
 ; CHECK-P8-LABEL: @test8
 ; CHECK-P8: vperm
@@ -42,8 +42,8 @@ entry:
   ret void
 ; CHECK-P9-LABEL: @test4
 ; CHECK-P9: vperm
-; CHECK-P9: vperm
 ; CHECK-P9: xvcvuxddp
+; CHECK-P9: vperm
 ; CHECK-P9: xvcvuxddp
 ; CHECK-P8-LABEL: @test4
 ; CHECK-P8: vperm
@@ -113,16 +113,16 @@ entry:
   ret void
 ; CHECK-P9-LABEL: @stest8
 ; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vextsh2d
-; CHECK-P9: vextsh2d
-; CHECK-P9: vextsh2d
 ; CHECK-P9: vextsh2d
 ; CHECK-P9: xvcvsxddp
+; CHECK-P9: vperm
+; CHECK-P9: vextsh2d
 ; CHECK-P9: xvcvsxddp
+; CHECK-P9: vperm
+; CHECK-P9: vextsh2d
 ; CHECK-P9: xvcvsxddp
+; CHECK-P9: vperm
+; CHECK-P9: vextsh2d
 ; CHECK-P9: xvcvsxddp
 }
 
@@ -134,10 +134,10 @@ entry:
   ret void
 ; CHECK-P9-LABEL: @stest4
 ; CHECK-P9: vperm
-; CHECK-P9: vperm
-; CHECK-P9: vextsh2d
 ; CHECK-P9: vextsh2d
 ; CHECK-P9: xvcvsxddp
+; CHECK-P9: vperm
+; CHECK-P9: vextsh2d
 ; CHECK-P9: xvcvsxddp
 }
 

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll Wed Jan  2 21:04:18 2019
@@ -36,35 +36,35 @@ define i32 @test2elt(i64 %a.coerce) loca
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd f0, r3
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r4
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -111,62 +111,62 @@ define i64 @test4elt(<4 x float> %a) loc
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT:    xxswapd vs1, v2
-; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT:    xscvspdpn f3, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r5, f3
 ; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f1
-; CHECK-P9-NEXT:    mfvsrwz r6, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r5
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglh v3, v4, v5
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v2, v4, v2
+; CHECK-P9-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-BE-NEXT:    xxswapd vs1, v2
-; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-BE-NEXT:    xscvspdpn f3, v2
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
-; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mfvsrwz r3, f0
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
-; CHECK-BE-NEXT:    mfvsrwz r6, f2
-; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    xscvspdpn f0, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v4, v5
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v2, v4, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -238,119 +238,119 @@ define <8 x i16> @test8elt(<8 x float>*
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd vs6, vs0
-; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs5
-; CHECK-P9-NEXT:    xscvspdpn f6, vs6
-; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
-; CHECK-P9-NEXT:    mfvsrwz r5, f1
-; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mfvsrwz r4, f3
-; CHECK-P9-NEXT:    mfvsrwz r6, f4
-; CHECK-P9-NEXT:    mfvsrwz r7, f5
-; CHECK-P9-NEXT:    mfvsrwz r8, f6
-; CHECK-P9-NEXT:    mfvsrwz r10, f7
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    xxswapd v6, vs6
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    xxswapd v0, vs4
-; CHECK-P9-NEXT:    xxswapd v1, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglh v3, v4, v5
-; CHECK-P9-NEXT:    vmrglh v4, v1, v0
-; CHECK-P9-NEXT:    vmrglh v5, v6, v7
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglh v4, v4, v5
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
 ; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd vs6, vs0
-; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f6, vs6
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
-; CHECK-BE-NEXT:    mfvsrwz r9, f0
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    sldi r9, r9, 48
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mfvsrwz r3, f2
-; CHECK-BE-NEXT:    mfvsrwz r4, f3
-; CHECK-BE-NEXT:    mfvsrwz r6, f4
-; CHECK-BE-NEXT:    mfvsrwz r7, f5
-; CHECK-BE-NEXT:    mfvsrwz r8, f6
-; CHECK-BE-NEXT:    mfvsrwz r10, f7
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
-; CHECK-BE-NEXT:    sldi r7, r7, 48
-; CHECK-BE-NEXT:    sldi r8, r8, 48
-; CHECK-BE-NEXT:    sldi r10, r10, 48
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v4, v5
-; CHECK-BE-NEXT:    vmrghh v4, v1, v0
-; CHECK-BE-NEXT:    vmrghh v5, v6, v7
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghh v4, v4, v5
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -481,259 +481,235 @@ define void @test16elt(<16 x i16>* noali
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 16(r4)
-; CHECK-P9-NEXT:    lxv vs3, 0(r4)
-; CHECK-P9-NEXT:    lxv vs0, 48(r4)
-; CHECK-P9-NEXT:    lxv vs1, 32(r4)
-; CHECK-P9-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT:    xxswapd vs5, vs3
-; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 1
-; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxswapd vs8, vs2
-; CHECK-P9-NEXT:    xxsldwi vs9, vs2, vs2, 1
-; CHECK-P9-NEXT:    xxsldwi vs10, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs11, vs1
-; CHECK-P9-NEXT:    xxsldwi vs12, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs13, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    lxv vs3, 16(r4)
+; CHECK-P9-NEXT:    xscvspdpn f5, vs1
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xscvspdpn f8, vs3
+; CHECK-P9-NEXT:    xxswapd vs4, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 3
+; CHECK-P9-NEXT:    xxswapd vs7, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xxsldwi vs3, vs3, vs3, 1
 ; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xscvspdpn f8, vs8
-; CHECK-P9-NEXT:    xscvspdpn f9, vs9
-; CHECK-P9-NEXT:    xscvspdpn f10, vs10
-; CHECK-P9-NEXT:    xscvspdpn f11, vs11
-; CHECK-P9-NEXT:    xscvspdpn f12, vs12
-; CHECK-P9-NEXT:    xscvspdpn f13, vs13
-; CHECK-P9-NEXT:    xscvspdpn v2, v2
-; CHECK-P9-NEXT:    xscvspdpn v3, v3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    mfvsrwz r5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f7
-; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mtvsrd f5, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f8
+; CHECK-P9-NEXT:    mtvsrd f8, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    xxsldwi vs9, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs10, vs0
+; CHECK-P9-NEXT:    xscvspdpn f9, vs9
+; CHECK-P9-NEXT:    xscvspdpn f10, vs10
 ; CHECK-P9-NEXT:    xscvdpsxws f9, f9
 ; CHECK-P9-NEXT:    xscvdpsxws f10, f10
-; CHECK-P9-NEXT:    xscvdpsxws f11, f11
-; CHECK-P9-NEXT:    xscvdpsxws f12, f12
-; CHECK-P9-NEXT:    xscvdpsxws f13, f13
-; CHECK-P9-NEXT:    xscvdpsxws v2, v2
-; CHECK-P9-NEXT:    xscvdpsxws v3, v3
-; CHECK-P9-NEXT:    mfvsrwz r4, f3
-; CHECK-P9-NEXT:    mfvsrwz r5, f2
-; CHECK-P9-NEXT:    mfvsrwz r12, f1
-; CHECK-P9-NEXT:    mfvsrwz r0, f0
-; CHECK-P9-NEXT:    mfvsrwz r6, f4
-; CHECK-P9-NEXT:    mfvsrwz r7, f5
-; CHECK-P9-NEXT:    mfvsrwz r8, f6
-; CHECK-P9-NEXT:    mfvsrwz r9, f7
-; CHECK-P9-NEXT:    mfvsrwz r10, f8
-; CHECK-P9-NEXT:    mfvsrwz r11, f9
-; CHECK-P9-NEXT:    mfvsrwz r30, f10
-; CHECK-P9-NEXT:    mfvsrwz r29, f11
-; CHECK-P9-NEXT:    mfvsrwz r28, f12
-; CHECK-P9-NEXT:    mfvsrwz r27, f13
-; CHECK-P9-NEXT:    mfvsrwz r26, v2
-; CHECK-P9-NEXT:    mfvsrwz r25, v3
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f1
 ; CHECK-P9-NEXT:    mtvsrd f1, r5
-; CHECK-P9-NEXT:    mtvsrd f8, r12
-; CHECK-P9-NEXT:    mtvsrd f9, r0
-; CHECK-P9-NEXT:    mtvsrd f2, r6
-; CHECK-P9-NEXT:    mtvsrd f3, r7
-; CHECK-P9-NEXT:    mtvsrd f4, r8
-; CHECK-P9-NEXT:    mtvsrd f5, r9
-; CHECK-P9-NEXT:    mtvsrd f6, r10
-; CHECK-P9-NEXT:    mtvsrd f7, r11
-; CHECK-P9-NEXT:    mtvsrd f10, r30
-; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd f11, r29
-; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd f12, r28
-; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd f13, r27
-; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd v2, r26
-; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd v3, r25
-; CHECK-P9-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxswapd v5, vs2
+; CHECK-P9-NEXT:    mfvsrwz r5, f6
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    xxswapd v3, vs4
+; CHECK-P9-NEXT:    xscvspdpn f2, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mtvsrd f6, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f7
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    lxv vs1, 48(r4)
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v3, vs5
+; CHECK-P9-NEXT:    mtvsrd f7, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs6
+; CHECK-P9-NEXT:    xxswapd v5, vs7
+; CHECK-P9-NEXT:    mtvsrd f3, r5
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xxswapd v0, vs3
-; CHECK-P9-NEXT:    xxswapd v1, vs4
-; CHECK-P9-NEXT:    xxswapd v6, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs6
-; CHECK-P9-NEXT:    xxswapd v8, vs1
-; CHECK-P9-NEXT:    xxswapd v9, vs7
-; CHECK-P9-NEXT:    xxswapd v10, vs10
-; CHECK-P9-NEXT:    xxswapd v11, vs11
-; CHECK-P9-NEXT:    xxswapd v12, vs8
-; CHECK-P9-NEXT:    xxswapd v13, vs12
-; CHECK-P9-NEXT:    xxswapd v14, vs13
-; CHECK-P9-NEXT:    xxswapd v2, v2
-; CHECK-P9-NEXT:    xxswapd v15, vs9
-; CHECK-P9-NEXT:    xxswapd v3, v3
-; CHECK-P9-NEXT:    vmrglh v5, v0, v5
-; CHECK-P9-NEXT:    vmrglh v4, v4, v1
-; CHECK-P9-NEXT:    vmrglh v0, v7, v6
-; CHECK-P9-NEXT:    vmrglh v1, v8, v9
-; CHECK-P9-NEXT:    vmrglh v6, v11, v10
-; CHECK-P9-NEXT:    vmrglh v7, v12, v13
-; CHECK-P9-NEXT:    vmrglh v2, v2, v14
-; CHECK-P9-NEXT:    vmrglh v3, v15, v3
-; CHECK-P9-NEXT:    vmrglw v4, v4, v5
-; CHECK-P9-NEXT:    vmrglw v5, v1, v0
-; CHECK-P9-NEXT:    vmrglw v0, v7, v6
+; CHECK-P9-NEXT:    vmrglh v4, v5, v4
+; CHECK-P9-NEXT:    xxswapd v5, vs8
+; CHECK-P9-NEXT:    vmrglh v5, v5, v0
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r4
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    xxmrgld vs0, v5, v4
-; CHECK-P9-NEXT:    xxmrgld vs1, v2, v0
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxmrgld vs2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    vmrglh v2, v4, v2
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r5, f9
+; CHECK-P9-NEXT:    mtvsrd f9, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f10
+; CHECK-P9-NEXT:    mtvsrd f10, r5
+; CHECK-P9-NEXT:    xxswapd v0, vs9
+; CHECK-P9-NEXT:    xxswapd v1, vs10
+; CHECK-P9-NEXT:    vmrglh v0, v1, v0
+; CHECK-P9-NEXT:    vmrglw v2, v2, v0
+; CHECK-P9-NEXT:    stxv vs2, 0(r3)
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglh v4, v4, v5
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
+; CHECK-P9-NEXT:    xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 0(r4)
-; CHECK-BE-NEXT:    lxv vs3, 16(r4)
-; CHECK-BE-NEXT:    lxv vs0, 32(r4)
-; CHECK-BE-NEXT:    lxv vs1, 48(r4)
-; CHECK-BE-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT:    xxswapd vs5, vs3
-; CHECK-BE-NEXT:    xxsldwi vs6, vs3, vs3, 1
-; CHECK-BE-NEXT:    xxsldwi vs7, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxswapd vs8, vs2
-; CHECK-BE-NEXT:    xxsldwi vs9, vs2, vs2, 1
-; CHECK-BE-NEXT:    xxsldwi vs10, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs11, vs1
-; CHECK-BE-NEXT:    xxsldwi vs12, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs13, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd v2, vs0
-; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    lxv vs1, 16(r4)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f6, vs6
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvspdpn f8, vs8
-; CHECK-BE-NEXT:    xscvspdpn f9, vs9
-; CHECK-BE-NEXT:    xscvspdpn f10, vs10
-; CHECK-BE-NEXT:    xscvspdpn f11, vs11
-; CHECK-BE-NEXT:    xscvspdpn f12, vs12
-; CHECK-BE-NEXT:    xscvspdpn f13, vs13
-; CHECK-BE-NEXT:    xscvspdpn v2, v2
-; CHECK-BE-NEXT:    xscvspdpn v3, v3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    xscvspdpn f4, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v2, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f4
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v5, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    lxv vs1, 48(r4)
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    lxv vs0, 32(r4)
+; CHECK-BE-NEXT:    xscvspdpn f5, vs1
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    xscvdpsxws f8, f8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f9
-; CHECK-BE-NEXT:    xscvdpsxws f10, f10
-; CHECK-BE-NEXT:    xscvdpsxws f11, f11
-; CHECK-BE-NEXT:    xscvdpsxws f12, f12
-; CHECK-BE-NEXT:    xscvdpsxws f13, f13
-; CHECK-BE-NEXT:    xscvdpsxws v2, v2
-; CHECK-BE-NEXT:    xscvdpsxws v3, v3
-; CHECK-BE-NEXT:    mfvsrwz r4, f3
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
-; CHECK-BE-NEXT:    mfvsrwz r12, f1
-; CHECK-BE-NEXT:    mfvsrwz r0, f0
-; CHECK-BE-NEXT:    mfvsrwz r6, f4
-; CHECK-BE-NEXT:    mfvsrwz r7, f5
-; CHECK-BE-NEXT:    mfvsrwz r8, f6
-; CHECK-BE-NEXT:    mfvsrwz r9, f7
-; CHECK-BE-NEXT:    mfvsrwz r10, f8
-; CHECK-BE-NEXT:    mfvsrwz r11, f9
-; CHECK-BE-NEXT:    mfvsrwz r30, f10
-; CHECK-BE-NEXT:    mfvsrwz r29, f11
-; CHECK-BE-NEXT:    mfvsrwz r28, f12
-; CHECK-BE-NEXT:    mfvsrwz r27, f13
-; CHECK-BE-NEXT:    mfvsrwz r26, v2
-; CHECK-BE-NEXT:    mfvsrwz r25, v3
-; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    sldi r12, r12, 48
-; CHECK-BE-NEXT:    sldi r0, r0, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
-; CHECK-BE-NEXT:    sldi r7, r7, 48
-; CHECK-BE-NEXT:    sldi r8, r8, 48
-; CHECK-BE-NEXT:    sldi r9, r9, 48
-; CHECK-BE-NEXT:    sldi r10, r10, 48
-; CHECK-BE-NEXT:    sldi r11, r11, 48
-; CHECK-BE-NEXT:    sldi r30, r30, 48
-; CHECK-BE-NEXT:    sldi r29, r29, 48
-; CHECK-BE-NEXT:    sldi r28, r28, 48
-; CHECK-BE-NEXT:    sldi r27, r27, 48
-; CHECK-BE-NEXT:    sldi r26, r26, 48
-; CHECK-BE-NEXT:    sldi r25, r25, 48
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    mtvsrd v0, r5
+; CHECK-BE-NEXT:    vmrghh v5, v5, v0
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mfvsrwz r4, f5
+; CHECK-BE-NEXT:    xxmrghd vs4, v3, v2
+; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
-; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mtvsrd v10, r12
-; CHECK-BE-NEXT:    mtvsrd v14, r0
-; CHECK-BE-NEXT:    mtvsrd v4, r6
-; CHECK-BE-NEXT:    mtvsrd v5, r7
-; CHECK-BE-NEXT:    mtvsrd v0, r8
-; CHECK-BE-NEXT:    mtvsrd v1, r9
-; CHECK-BE-NEXT:    mtvsrd v6, r10
-; CHECK-BE-NEXT:    mtvsrd v7, r11
-; CHECK-BE-NEXT:    mtvsrd v8, r30
-; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v9, r29
-; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v11, r28
-; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v12, r27
-; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v13, r26
-; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v15, r25
-; CHECK-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    vmrghh v2, v2, v0
-; CHECK-BE-NEXT:    vmrghh v5, v6, v1
-; CHECK-BE-NEXT:    vmrghh v3, v3, v7
-; CHECK-BE-NEXT:    vmrghh v0, v9, v8
-; CHECK-BE-NEXT:    vmrghh v1, v10, v11
-; CHECK-BE-NEXT:    vmrghh v6, v13, v12
-; CHECK-BE-NEXT:    vmrghh v7, v14, v15
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    vmrghw v3, v3, v5
-; CHECK-BE-NEXT:    vmrghw v4, v1, v0
-; CHECK-BE-NEXT:    vmrghw v5, v7, v6
-; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
-; CHECK-BE-NEXT:    xxmrghd vs1, v5, v4
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    blr
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r4
+; CHECK-BE-NEXT:    vmrghh v4, v4, v5
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
+; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x float>, <16 x float>* %0, align 64
   %1 = fptoui <16 x float> %a to <16 x i16>
@@ -768,35 +744,35 @@ define i32 @test2elt_signed(i64 %a.coerc
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd f0, r3
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r4
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -843,62 +819,62 @@ define i64 @test4elt_signed(<4 x float>
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT:    xxswapd vs1, v2
-; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT:    xscvspdpn f3, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r5, f3
 ; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f1
-; CHECK-P9-NEXT:    mfvsrwz r6, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r5
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglh v3, v4, v5
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglh v2, v4, v2
+; CHECK-P9-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-BE-NEXT:    xxswapd vs1, v2
-; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-BE-NEXT:    xscvspdpn f3, v2
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
-; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mfvsrwz r3, f0
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
-; CHECK-BE-NEXT:    mfvsrwz r6, f2
-; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    xscvspdpn f0, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v4, v5
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghh v2, v4, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -970,119 +946,119 @@ define <8 x i16> @test8elt_signed(<8 x f
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd vs6, vs0
-; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs5
-; CHECK-P9-NEXT:    xscvspdpn f6, vs6
-; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
-; CHECK-P9-NEXT:    mfvsrwz r5, f1
-; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mfvsrwz r4, f3
-; CHECK-P9-NEXT:    mfvsrwz r6, f4
-; CHECK-P9-NEXT:    mfvsrwz r7, f5
-; CHECK-P9-NEXT:    mfvsrwz r8, f6
-; CHECK-P9-NEXT:    mfvsrwz r10, f7
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    xxswapd v6, vs6
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    xxswapd v0, vs4
-; CHECK-P9-NEXT:    xxswapd v1, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglh v3, v4, v5
-; CHECK-P9-NEXT:    vmrglh v4, v1, v0
-; CHECK-P9-NEXT:    vmrglh v5, v6, v7
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglh v4, v4, v5
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
 ; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd vs6, vs0
-; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f6, vs6
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
-; CHECK-BE-NEXT:    mfvsrwz r9, f0
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    sldi r9, r9, 48
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mfvsrwz r3, f2
-; CHECK-BE-NEXT:    mfvsrwz r4, f3
-; CHECK-BE-NEXT:    mfvsrwz r6, f4
-; CHECK-BE-NEXT:    mfvsrwz r7, f5
-; CHECK-BE-NEXT:    mfvsrwz r8, f6
-; CHECK-BE-NEXT:    mfvsrwz r10, f7
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v6, r9
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
-; CHECK-BE-NEXT:    sldi r7, r7, 48
-; CHECK-BE-NEXT:    sldi r8, r8, 48
-; CHECK-BE-NEXT:    sldi r10, r10, 48
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v4, v5
-; CHECK-BE-NEXT:    vmrghh v4, v1, v0
-; CHECK-BE-NEXT:    vmrghh v5, v6, v7
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghh v4, v4, v5
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -1213,258 +1189,234 @@ define void @test16elt_signed(<16 x i16>
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 16(r4)
-; CHECK-P9-NEXT:    lxv vs3, 0(r4)
-; CHECK-P9-NEXT:    lxv vs0, 48(r4)
-; CHECK-P9-NEXT:    lxv vs1, 32(r4)
-; CHECK-P9-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT:    xxswapd vs5, vs3
-; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 1
-; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxswapd vs8, vs2
-; CHECK-P9-NEXT:    xxsldwi vs9, vs2, vs2, 1
-; CHECK-P9-NEXT:    xxsldwi vs10, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs11, vs1
-; CHECK-P9-NEXT:    xxsldwi vs12, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs13, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    lxv vs3, 16(r4)
+; CHECK-P9-NEXT:    xscvspdpn f5, vs1
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xscvspdpn f8, vs3
+; CHECK-P9-NEXT:    xxswapd vs4, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs5
+; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 3
+; CHECK-P9-NEXT:    xxswapd vs7, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xxsldwi vs3, vs3, vs3, 1
 ; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xscvspdpn f8, vs8
-; CHECK-P9-NEXT:    xscvspdpn f9, vs9
-; CHECK-P9-NEXT:    xscvspdpn f10, vs10
-; CHECK-P9-NEXT:    xscvspdpn f11, vs11
-; CHECK-P9-NEXT:    xscvspdpn f12, vs12
-; CHECK-P9-NEXT:    xscvspdpn f13, vs13
-; CHECK-P9-NEXT:    xscvspdpn v2, v2
-; CHECK-P9-NEXT:    xscvspdpn v3, v3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    mfvsrwz r5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f7
-; CHECK-P9-NEXT:    xscvdpsxws f8, f8
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mtvsrd f5, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f8
+; CHECK-P9-NEXT:    mtvsrd f8, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    xxsldwi vs9, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs10, vs0
+; CHECK-P9-NEXT:    xscvspdpn f9, vs9
+; CHECK-P9-NEXT:    xscvspdpn f10, vs10
 ; CHECK-P9-NEXT:    xscvdpsxws f9, f9
 ; CHECK-P9-NEXT:    xscvdpsxws f10, f10
-; CHECK-P9-NEXT:    xscvdpsxws f11, f11
-; CHECK-P9-NEXT:    xscvdpsxws f12, f12
-; CHECK-P9-NEXT:    xscvdpsxws f13, f13
-; CHECK-P9-NEXT:    xscvdpsxws v2, v2
-; CHECK-P9-NEXT:    xscvdpsxws v3, v3
-; CHECK-P9-NEXT:    mfvsrwz r4, f3
-; CHECK-P9-NEXT:    mfvsrwz r5, f2
-; CHECK-P9-NEXT:    mfvsrwz r12, f1
-; CHECK-P9-NEXT:    mfvsrwz r0, f0
-; CHECK-P9-NEXT:    mfvsrwz r6, f4
-; CHECK-P9-NEXT:    mfvsrwz r7, f5
-; CHECK-P9-NEXT:    mfvsrwz r8, f6
-; CHECK-P9-NEXT:    mfvsrwz r9, f7
-; CHECK-P9-NEXT:    mfvsrwz r10, f8
-; CHECK-P9-NEXT:    mfvsrwz r11, f9
-; CHECK-P9-NEXT:    mfvsrwz r30, f10
-; CHECK-P9-NEXT:    mfvsrwz r29, f11
-; CHECK-P9-NEXT:    mfvsrwz r28, f12
-; CHECK-P9-NEXT:    mfvsrwz r27, f13
-; CHECK-P9-NEXT:    mfvsrwz r26, v2
-; CHECK-P9-NEXT:    mfvsrwz r25, v3
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f1
 ; CHECK-P9-NEXT:    mtvsrd f1, r5
-; CHECK-P9-NEXT:    mtvsrd f8, r12
-; CHECK-P9-NEXT:    mtvsrd f9, r0
-; CHECK-P9-NEXT:    mtvsrd f2, r6
-; CHECK-P9-NEXT:    mtvsrd f3, r7
-; CHECK-P9-NEXT:    mtvsrd f4, r8
-; CHECK-P9-NEXT:    mtvsrd f5, r9
-; CHECK-P9-NEXT:    mtvsrd f6, r10
-; CHECK-P9-NEXT:    mtvsrd f7, r11
-; CHECK-P9-NEXT:    mtvsrd f10, r30
-; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd f11, r29
-; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd f12, r28
-; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd f13, r27
-; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd v2, r26
-; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd v3, r25
-; CHECK-P9-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxswapd v5, vs2
+; CHECK-P9-NEXT:    mfvsrwz r5, f6
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    xxswapd v3, vs4
+; CHECK-P9-NEXT:    xscvspdpn f2, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mtvsrd f6, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f7
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    lxv vs1, 48(r4)
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v3, vs5
+; CHECK-P9-NEXT:    mtvsrd f7, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs6
+; CHECK-P9-NEXT:    xxswapd v5, vs7
+; CHECK-P9-NEXT:    mtvsrd f3, r5
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xxswapd v0, vs3
-; CHECK-P9-NEXT:    xxswapd v1, vs4
-; CHECK-P9-NEXT:    xxswapd v6, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs6
-; CHECK-P9-NEXT:    xxswapd v8, vs1
-; CHECK-P9-NEXT:    xxswapd v9, vs7
-; CHECK-P9-NEXT:    xxswapd v10, vs10
-; CHECK-P9-NEXT:    xxswapd v11, vs11
-; CHECK-P9-NEXT:    xxswapd v12, vs8
-; CHECK-P9-NEXT:    xxswapd v13, vs12
-; CHECK-P9-NEXT:    xxswapd v14, vs13
-; CHECK-P9-NEXT:    xxswapd v2, v2
-; CHECK-P9-NEXT:    xxswapd v15, vs9
-; CHECK-P9-NEXT:    xxswapd v3, v3
-; CHECK-P9-NEXT:    vmrglh v5, v0, v5
-; CHECK-P9-NEXT:    vmrglh v4, v4, v1
-; CHECK-P9-NEXT:    vmrglh v0, v7, v6
-; CHECK-P9-NEXT:    vmrglh v1, v8, v9
-; CHECK-P9-NEXT:    vmrglh v6, v11, v10
-; CHECK-P9-NEXT:    vmrglh v7, v12, v13
-; CHECK-P9-NEXT:    vmrglh v2, v2, v14
-; CHECK-P9-NEXT:    vmrglh v3, v15, v3
-; CHECK-P9-NEXT:    vmrglw v4, v4, v5
-; CHECK-P9-NEXT:    vmrglw v5, v1, v0
-; CHECK-P9-NEXT:    vmrglw v0, v7, v6
+; CHECK-P9-NEXT:    vmrglh v4, v5, v4
+; CHECK-P9-NEXT:    xxswapd v5, vs8
+; CHECK-P9-NEXT:    vmrglh v5, v5, v0
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r4
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    xxmrgld vs0, v5, v4
-; CHECK-P9-NEXT:    xxmrgld vs1, v2, v0
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxmrgld vs2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, vs1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    vmrglh v2, v4, v2
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r5, f9
+; CHECK-P9-NEXT:    mtvsrd f9, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f10
+; CHECK-P9-NEXT:    mtvsrd f10, r5
+; CHECK-P9-NEXT:    xxswapd v0, vs9
+; CHECK-P9-NEXT:    xxswapd v1, vs10
+; CHECK-P9-NEXT:    vmrglh v0, v1, v0
+; CHECK-P9-NEXT:    vmrglw v2, v2, v0
+; CHECK-P9-NEXT:    stxv vs2, 0(r3)
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglh v4, v4, v5
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
+; CHECK-P9-NEXT:    xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 0(r4)
-; CHECK-BE-NEXT:    lxv vs3, 16(r4)
-; CHECK-BE-NEXT:    lxv vs0, 32(r4)
-; CHECK-BE-NEXT:    lxv vs1, 48(r4)
-; CHECK-BE-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT:    xxswapd vs5, vs3
-; CHECK-BE-NEXT:    xxsldwi vs6, vs3, vs3, 1
-; CHECK-BE-NEXT:    xxsldwi vs7, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxswapd vs8, vs2
-; CHECK-BE-NEXT:    xxsldwi vs9, vs2, vs2, 1
-; CHECK-BE-NEXT:    xxsldwi vs10, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs11, vs1
-; CHECK-BE-NEXT:    xxsldwi vs12, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs13, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd v2, vs0
-; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    lxv vs1, 16(r4)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f6, vs6
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvspdpn f8, vs8
-; CHECK-BE-NEXT:    xscvspdpn f9, vs9
-; CHECK-BE-NEXT:    xscvspdpn f10, vs10
-; CHECK-BE-NEXT:    xscvspdpn f11, vs11
-; CHECK-BE-NEXT:    xscvspdpn f12, vs12
-; CHECK-BE-NEXT:    xscvspdpn f13, vs13
-; CHECK-BE-NEXT:    xscvspdpn v2, v2
-; CHECK-BE-NEXT:    xscvspdpn v3, v3
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    xscvspdpn f4, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v2, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f4
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v5, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    lxv vs1, 48(r4)
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    lxv vs0, 32(r4)
+; CHECK-BE-NEXT:    xscvspdpn f5, vs1
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    xscvdpsxws f8, f8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f9
-; CHECK-BE-NEXT:    xscvdpsxws f10, f10
-; CHECK-BE-NEXT:    xscvdpsxws f11, f11
-; CHECK-BE-NEXT:    xscvdpsxws f12, f12
-; CHECK-BE-NEXT:    xscvdpsxws f13, f13
-; CHECK-BE-NEXT:    xscvdpsxws v2, v2
-; CHECK-BE-NEXT:    xscvdpsxws v3, v3
-; CHECK-BE-NEXT:    mfvsrwz r4, f3
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
-; CHECK-BE-NEXT:    mfvsrwz r12, f1
-; CHECK-BE-NEXT:    mfvsrwz r0, f0
-; CHECK-BE-NEXT:    mfvsrwz r6, f4
-; CHECK-BE-NEXT:    mfvsrwz r7, f5
-; CHECK-BE-NEXT:    mfvsrwz r8, f6
-; CHECK-BE-NEXT:    mfvsrwz r9, f7
-; CHECK-BE-NEXT:    mfvsrwz r10, f8
-; CHECK-BE-NEXT:    mfvsrwz r11, f9
-; CHECK-BE-NEXT:    mfvsrwz r30, f10
-; CHECK-BE-NEXT:    mfvsrwz r29, f11
-; CHECK-BE-NEXT:    mfvsrwz r28, f12
-; CHECK-BE-NEXT:    mfvsrwz r27, f13
-; CHECK-BE-NEXT:    mfvsrwz r26, v2
-; CHECK-BE-NEXT:    mfvsrwz r25, v3
-; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    sldi r12, r12, 48
-; CHECK-BE-NEXT:    sldi r0, r0, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
-; CHECK-BE-NEXT:    sldi r7, r7, 48
-; CHECK-BE-NEXT:    sldi r8, r8, 48
-; CHECK-BE-NEXT:    sldi r9, r9, 48
-; CHECK-BE-NEXT:    sldi r10, r10, 48
-; CHECK-BE-NEXT:    sldi r11, r11, 48
-; CHECK-BE-NEXT:    sldi r30, r30, 48
-; CHECK-BE-NEXT:    sldi r29, r29, 48
-; CHECK-BE-NEXT:    sldi r28, r28, 48
-; CHECK-BE-NEXT:    sldi r27, r27, 48
-; CHECK-BE-NEXT:    sldi r26, r26, 48
-; CHECK-BE-NEXT:    sldi r25, r25, 48
+; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    mtvsrd v0, r5
+; CHECK-BE-NEXT:    vmrghh v5, v5, v0
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mfvsrwz r4, f5
+; CHECK-BE-NEXT:    xxmrghd vs4, v3, v2
+; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
-; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mtvsrd v10, r12
-; CHECK-BE-NEXT:    mtvsrd v14, r0
-; CHECK-BE-NEXT:    mtvsrd v4, r6
-; CHECK-BE-NEXT:    mtvsrd v5, r7
-; CHECK-BE-NEXT:    mtvsrd v0, r8
-; CHECK-BE-NEXT:    mtvsrd v1, r9
-; CHECK-BE-NEXT:    mtvsrd v6, r10
-; CHECK-BE-NEXT:    mtvsrd v7, r11
-; CHECK-BE-NEXT:    mtvsrd v8, r30
-; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v9, r29
-; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v11, r28
-; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v12, r27
-; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v13, r26
-; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v15, r25
-; CHECK-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    vmrghh v2, v2, v0
-; CHECK-BE-NEXT:    vmrghh v5, v6, v1
-; CHECK-BE-NEXT:    vmrghh v3, v3, v7
-; CHECK-BE-NEXT:    vmrghh v0, v9, v8
-; CHECK-BE-NEXT:    vmrghh v1, v10, v11
-; CHECK-BE-NEXT:    vmrghh v6, v13, v12
-; CHECK-BE-NEXT:    vmrghh v7, v14, v15
-; CHECK-BE-NEXT:    vmrghw v2, v2, v4
-; CHECK-BE-NEXT:    vmrghw v3, v3, v5
-; CHECK-BE-NEXT:    vmrghw v4, v1, v0
-; CHECK-BE-NEXT:    vmrghw v5, v7, v6
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghh v2, v2, v4
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r4
+; CHECK-BE-NEXT:    vmrghh v4, v4, v5
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
-; CHECK-BE-NEXT:    xxmrghd vs1, v5, v4
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x float>, <16 x float>* %0, align 64

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll Wed Jan  2 21:04:18 2019
@@ -35,10 +35,10 @@ define <2 x i64> @test2elt(i64 %a.coerce
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd f0, r3
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-BE-NEXT:    xvcvdpuxds v2, vs0
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -72,34 +72,34 @@ define void @test4elt(<4 x i64>* noalias
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-P9-NEXT:    xxswapd vs1, v2
-; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT:    xscvspdpn f3, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xscvspdpn f1, v2
+; CHECK-P9-NEXT:    xxmrghd vs1, vs1, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
 ; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 1
+; CHECK-BE-NEXT:    xscvspdpn f0, v2
 ; CHECK-BE-NEXT:    xxswapd vs2, v2
-; CHECK-BE-NEXT:    xscvspdpn f3, v2
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xxmrghd vs0, vs3, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs2, vs1
 ; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs2, vs1
 ; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = fptoui <4 x float> %a to <4 x i64>
@@ -149,66 +149,66 @@ define void @test8elt(<8 x i64>* noalias
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r4)
-; CHECK-P9-NEXT:    lxv vs1, 0(r4)
-; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd vs6, vs0
-; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-P9-NEXT:    lxv vs0, 0(r4)
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs2, vs1
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    xxmrghd vs0, vs3, vs0
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxswapd vs4, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs5
-; CHECK-P9-NEXT:    xscvspdpn f6, vs6
-; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs1, vs1, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs6, vs5
-; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs7
-; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
-; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
-; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    xxmrghd vs2, vs4, vs2
+; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
 ; CHECK-P9-NEXT:    stxv vs3, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs2, 0(r3)
+; CHECK-P9-NEXT:    stxv vs2, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 16(r4)
 ; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    xxsldwi vs4, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs4, vs1
-; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 1
-; CHECK-BE-NEXT:    xxsldwi vs6, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs0
+; CHECK-BE-NEXT:    xxmrghd vs3, vs3, vs4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f6, vs6
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs4, vs3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs5
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs4
 ; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
-; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
 ; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs0, 32(r3)
-; CHECK-BE-NEXT:    stxv vs2, 16(r3)
-; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, <8 x float>* %0, align 32
@@ -295,130 +295,122 @@ define void @test16elt(<16 x i64>* noali
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r4)
-; CHECK-P9-NEXT:    lxv vs1, 0(r4)
-; CHECK-P9-NEXT:    lxv vs2, 48(r4)
-; CHECK-P9-NEXT:    lxv vs3, 32(r4)
-; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs5, vs1
-; CHECK-P9-NEXT:    xxsldwi vs6, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd vs8, vs0
-; CHECK-P9-NEXT:    xxsldwi vs9, vs0, vs0, 1
-; CHECK-P9-NEXT:    xxsldwi vs10, vs3, vs3, 3
-; CHECK-P9-NEXT:    xxswapd vs11, vs3
-; CHECK-P9-NEXT:    xxsldwi vs12, vs3, vs3, 1
-; CHECK-P9-NEXT:    xxsldwi vs13, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxswapd v2, vs2
-; CHECK-P9-NEXT:    xxsldwi v3, vs2, vs2, 1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    lxv vs4, 16(r4)
+; CHECK-P9-NEXT:    xxsldwi vs5, vs4, vs4, 3
+; CHECK-P9-NEXT:    xxswapd vs6, vs4
+; CHECK-P9-NEXT:    lxv vs0, 0(r4)
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f5, vs5
 ; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xxmrghd vs5, vs6, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs4
+; CHECK-P9-NEXT:    xxsldwi vs4, vs4, vs4, 1
+; CHECK-P9-NEXT:    lxv vs3, 32(r4)
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xxswapd vs7, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xscvspdpn f8, vs8
-; CHECK-P9-NEXT:    xscvspdpn f9, vs9
-; CHECK-P9-NEXT:    xscvspdpn f10, vs10
-; CHECK-P9-NEXT:    xscvspdpn f11, vs11
-; CHECK-P9-NEXT:    xscvspdpn f12, vs12
-; CHECK-P9-NEXT:    xscvspdpn f13, vs13
-; CHECK-P9-NEXT:    xscvspdpn f31, v2
-; CHECK-P9-NEXT:    xscvspdpn f30, v3
-; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs1, vs1, vs6
-; CHECK-P9-NEXT:    xxmrghd vs5, vs8, vs7
-; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs9
-; CHECK-P9-NEXT:    xxmrghd vs6, vs11, vs10
-; CHECK-P9-NEXT:    xxmrghd vs3, vs3, vs12
-; CHECK-P9-NEXT:    xxmrghd vs7, vs31, vs13
-; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxmrghd vs2, vs2, vs30
-; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xxmrghd vs1, vs2, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs2, vs0
+; CHECK-P9-NEXT:    xxmrghd vs4, vs6, vs4
+; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 3
 ; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
 ; CHECK-P9-NEXT:    xvcvdpuxds vs5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs3
+; CHECK-P9-NEXT:    xxsldwi vs3, vs3, vs3, 1
+; CHECK-P9-NEXT:    lxv vs2, 48(r4)
+; CHECK-P9-NEXT:    xxswapd vs8, vs2
+; CHECK-P9-NEXT:    xscvspdpn f8, vs8
 ; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    stxv vs5, 32(r3)
 ; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs3
+; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xxmrghd vs7, vs8, vs7
+; CHECK-P9-NEXT:    xscvspdpn f8, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT:    stxv vs6, 64(r3)
+; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
 ; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
 ; CHECK-P9-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    stxv vs3, 80(r3)
+; CHECK-P9-NEXT:    xxmrghd vs2, vs8, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
-; CHECK-P9-NEXT:    stxv vs0, 48(r3)
-; CHECK-P9-NEXT:    stxv vs5, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs4, 0(r3)
 ; CHECK-P9-NEXT:    stxv vs2, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs7, 96(r3)
-; CHECK-P9-NEXT:    stxv vs3, 80(r3)
-; CHECK-P9-NEXT:    stxv vs6, 64(r3)
+; CHECK-P9-NEXT:    stxv vs4, 48(r3)
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 16(r4)
-; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    lxv vs2, 48(r4)
-; CHECK-BE-NEXT:    lxv vs3, 32(r4)
-; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs5, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs6, vs1
-; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT:    xxsldwi vs8, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd vs9, vs0
-; CHECK-BE-NEXT:    xxsldwi vs10, vs3, vs3, 1
-; CHECK-BE-NEXT:    xxsldwi vs11, vs3, vs3, 3
-; CHECK-BE-NEXT:    xxswapd vs12, vs3
-; CHECK-BE-NEXT:    xxsldwi vs13, vs2, vs2, 1
-; CHECK-BE-NEXT:    xxsldwi v2, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxswapd v3, vs2
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    lxv vs4, 16(r4)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xxsldwi vs6, vs4, vs4, 1
 ; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs5
+; CHECK-BE-NEXT:    xscvspdpn f5, vs4
+; CHECK-BE-NEXT:    lxv vs3, 32(r4)
+; CHECK-BE-NEXT:    xxsldwi vs7, vs3, vs3, 1
 ; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvspdpn f8, vs8
-; CHECK-BE-NEXT:    xscvspdpn f9, vs9
-; CHECK-BE-NEXT:    xscvspdpn f10, vs10
-; CHECK-BE-NEXT:    xscvspdpn f11, vs11
-; CHECK-BE-NEXT:    xscvspdpn f12, vs12
-; CHECK-BE-NEXT:    xscvspdpn f13, vs13
-; CHECK-BE-NEXT:    xscvspdpn f31, v2
-; CHECK-BE-NEXT:    xscvspdpn f30, v3
-; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs4
-; CHECK-BE-NEXT:    xxmrghd vs4, vs6, vs5
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs7
-; CHECK-BE-NEXT:    xxmrghd vs5, vs9, vs8
-; CHECK-BE-NEXT:    xxmrghd vs3, vs3, vs10
-; CHECK-BE-NEXT:    xxmrghd vs6, vs12, vs11
-; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs13
-; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xxmrghd vs5, vs5, vs6
+; CHECK-BE-NEXT:    xxsldwi vs6, vs4, vs4, 3
+; CHECK-BE-NEXT:    xxswapd vs4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs2
+; CHECK-BE-NEXT:    lxv vs2, 48(r4)
+; CHECK-BE-NEXT:    xxsldwi vs8, vs2, vs2, 1
 ; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT:    xvcvdpuxds vs4, vs4
 ; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
 ; CHECK-BE-NEXT:    xvcvdpuxds vs5, vs5
-; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs6
+; CHECK-BE-NEXT:    xscvspdpn f6, vs3
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    xxmrghd vs6, vs6, vs7
+; CHECK-BE-NEXT:    xxsldwi vs7, vs3, vs3, 3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xxmrghd vs3, vs3, vs7
+; CHECK-BE-NEXT:    xscvspdpn f7, vs2
+; CHECK-BE-NEXT:    xxmrghd vs7, vs7, vs8
+; CHECK-BE-NEXT:    xxsldwi vs8, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs8
+; CHECK-BE-NEXT:    stxv vs5, 32(r3)
+; CHECK-BE-NEXT:    xvcvdpuxds vs4, vs4
 ; CHECK-BE-NEXT:    xvcvdpuxds vs6, vs6
-; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
 ; CHECK-BE-NEXT:    xvcvdpuxds vs7, vs7
-; CHECK-BE-NEXT:    stxv vs5, 48(r3)
-; CHECK-BE-NEXT:    stxv vs0, 32(r3)
-; CHECK-BE-NEXT:    stxv vs4, 16(r3)
+; CHECK-BE-NEXT:    stxv vs3, 80(r3)
+; CHECK-BE-NEXT:    stxv vs7, 96(r3)
+; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT:    stxv vs2, 112(r3)
+; CHECK-BE-NEXT:    stxv vs6, 64(r3)
+; CHECK-BE-NEXT:    stxv vs4, 48(r3)
 ; CHECK-BE-NEXT:    stxv vs1, 0(r3)
-; CHECK-BE-NEXT:    stxv vs7, 112(r3)
-; CHECK-BE-NEXT:    stxv vs2, 96(r3)
-; CHECK-BE-NEXT:    stxv vs6, 80(r3)
-; CHECK-BE-NEXT:    stxv vs3, 64(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x float>, <16 x float>* %0, align 64
@@ -453,10 +445,10 @@ define <2 x i64> @test2elt_signed(i64 %a
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd f0, r3
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-BE-NEXT:    xvcvdpuxds v2, vs0
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -490,34 +482,34 @@ define void @test4elt_signed(<4 x i64>*
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-P9-NEXT:    xxswapd vs1, v2
-; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT:    xscvspdpn f3, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs1, vs3, vs2
+; CHECK-P9-NEXT:    xscvspdpn f1, v2
+; CHECK-P9-NEXT:    xxmrghd vs1, vs1, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
 ; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 1
+; CHECK-BE-NEXT:    xscvspdpn f0, v2
 ; CHECK-BE-NEXT:    xxswapd vs2, v2
-; CHECK-BE-NEXT:    xscvspdpn f3, v2
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xxmrghd vs0, vs3, vs0
-; CHECK-BE-NEXT:    xxmrghd vs1, vs2, vs1
 ; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xxmrghd vs1, vs2, vs1
 ; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = fptoui <4 x float> %a to <4 x i64>
@@ -567,66 +559,66 @@ define void @test8elt_signed(<8 x i64>*
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r4)
-; CHECK-P9-NEXT:    lxv vs1, 0(r4)
-; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd vs6, vs0
-; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
+; CHECK-P9-NEXT:    lxv vs0, 0(r4)
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xxmrghd vs1, vs2, vs1
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    xxmrghd vs0, vs3, vs0
+; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
+; CHECK-P9-NEXT:    xxswapd vs4, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs5
-; CHECK-P9-NEXT:    xscvspdpn f6, vs6
-; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs1, vs1, vs4
-; CHECK-P9-NEXT:    xxmrghd vs3, vs6, vs5
-; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs7
-; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
-; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    xxmrghd vs3, vs4, vs3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
-; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT:    stxv vs0, 48(r3)
+; CHECK-P9-NEXT:    xxmrghd vs2, vs4, vs2
+; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
 ; CHECK-P9-NEXT:    stxv vs3, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs2, 0(r3)
+; CHECK-P9-NEXT:    stxv vs2, 48(r3)
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 16(r4)
 ; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 1
+; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    xxsldwi vs4, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs4, vs1
-; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 1
-; CHECK-BE-NEXT:    xxsldwi vs6, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs0
+; CHECK-BE-NEXT:    xxmrghd vs3, vs3, vs4
+; CHECK-BE-NEXT:    xxsldwi vs4, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f6, vs6
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs4, vs3
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs5
-; CHECK-BE-NEXT:    xxmrghd vs3, vs7, vs6
-; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs4
 ; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
-; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
 ; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs0, 32(r3)
-; CHECK-BE-NEXT:    stxv vs2, 16(r3)
-; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x float>, <8 x float>* %0, align 32
@@ -713,130 +705,122 @@ define void @test16elt_signed(<16 x i64>
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r4)
-; CHECK-P9-NEXT:    lxv vs1, 0(r4)
-; CHECK-P9-NEXT:    lxv vs2, 48(r4)
-; CHECK-P9-NEXT:    lxv vs3, 32(r4)
-; CHECK-P9-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs5, vs1
-; CHECK-P9-NEXT:    xxsldwi vs6, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd vs8, vs0
-; CHECK-P9-NEXT:    xxsldwi vs9, vs0, vs0, 1
-; CHECK-P9-NEXT:    xxsldwi vs10, vs3, vs3, 3
-; CHECK-P9-NEXT:    xxswapd vs11, vs3
-; CHECK-P9-NEXT:    xxsldwi vs12, vs3, vs3, 1
-; CHECK-P9-NEXT:    xxsldwi vs13, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxswapd v2, vs2
-; CHECK-P9-NEXT:    xxsldwi v3, vs2, vs2, 1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    lxv vs4, 16(r4)
+; CHECK-P9-NEXT:    xxsldwi vs5, vs4, vs4, 3
+; CHECK-P9-NEXT:    xxswapd vs6, vs4
+; CHECK-P9-NEXT:    lxv vs0, 0(r4)
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xxswapd vs2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f5, vs5
 ; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xxmrghd vs5, vs6, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs4
+; CHECK-P9-NEXT:    xxsldwi vs4, vs4, vs4, 1
+; CHECK-P9-NEXT:    lxv vs3, 32(r4)
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xxswapd vs7, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xscvspdpn f8, vs8
-; CHECK-P9-NEXT:    xscvspdpn f9, vs9
-; CHECK-P9-NEXT:    xscvspdpn f10, vs10
-; CHECK-P9-NEXT:    xscvspdpn f11, vs11
-; CHECK-P9-NEXT:    xscvspdpn f12, vs12
-; CHECK-P9-NEXT:    xscvspdpn f13, vs13
-; CHECK-P9-NEXT:    xscvspdpn f31, v2
-; CHECK-P9-NEXT:    xscvspdpn f30, v3
-; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
-; CHECK-P9-NEXT:    xxmrghd vs1, vs1, vs6
-; CHECK-P9-NEXT:    xxmrghd vs5, vs8, vs7
-; CHECK-P9-NEXT:    xxmrghd vs0, vs0, vs9
-; CHECK-P9-NEXT:    xxmrghd vs6, vs11, vs10
-; CHECK-P9-NEXT:    xxmrghd vs3, vs3, vs12
-; CHECK-P9-NEXT:    xxmrghd vs7, vs31, vs13
-; CHECK-P9-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxmrghd vs2, vs2, vs30
-; CHECK-P9-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xxmrghd vs1, vs2, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs2, vs0
+; CHECK-P9-NEXT:    xxmrghd vs4, vs6, vs4
+; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 3
 ; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
 ; CHECK-P9-NEXT:    xvcvdpuxds vs5, vs5
+; CHECK-P9-NEXT:    xscvspdpn f6, vs6
+; CHECK-P9-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-P9-NEXT:    xscvspdpn f7, vs3
+; CHECK-P9-NEXT:    xxsldwi vs3, vs3, vs3, 1
+; CHECK-P9-NEXT:    lxv vs2, 48(r4)
+; CHECK-P9-NEXT:    xxswapd vs8, vs2
+; CHECK-P9-NEXT:    xscvspdpn f8, vs8
 ; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
+; CHECK-P9-NEXT:    stxv vs5, 32(r3)
 ; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xxmrghd vs3, vs7, vs3
+; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
+; CHECK-P9-NEXT:    xscvspdpn f7, vs7
+; CHECK-P9-NEXT:    xxmrghd vs7, vs8, vs7
+; CHECK-P9-NEXT:    xscvspdpn f8, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT:    stxv vs6, 64(r3)
+; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
 ; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
 ; CHECK-P9-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    stxv vs3, 80(r3)
+; CHECK-P9-NEXT:    xxmrghd vs2, vs8, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
-; CHECK-P9-NEXT:    stxv vs0, 48(r3)
-; CHECK-P9-NEXT:    stxv vs5, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs4, 0(r3)
 ; CHECK-P9-NEXT:    stxv vs2, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs7, 96(r3)
-; CHECK-P9-NEXT:    stxv vs3, 80(r3)
-; CHECK-P9-NEXT:    stxv vs6, 64(r3)
+; CHECK-P9-NEXT:    stxv vs4, 48(r3)
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 16(r4)
-; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    lxv vs2, 48(r4)
-; CHECK-BE-NEXT:    lxv vs3, 32(r4)
-; CHECK-BE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs5, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs6, vs1
-; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT:    xxsldwi vs8, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd vs9, vs0
-; CHECK-BE-NEXT:    xxsldwi vs10, vs3, vs3, 1
-; CHECK-BE-NEXT:    xxsldwi vs11, vs3, vs3, 3
-; CHECK-BE-NEXT:    xxswapd vs12, vs3
-; CHECK-BE-NEXT:    xxsldwi vs13, vs2, vs2, 1
-; CHECK-BE-NEXT:    xxsldwi v2, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxswapd v3, vs2
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    lxv vs4, 16(r4)
+; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f5, vs5
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    xxsldwi vs6, vs4, vs4, 1
 ; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs5
+; CHECK-BE-NEXT:    xscvspdpn f5, vs4
+; CHECK-BE-NEXT:    lxv vs3, 32(r4)
+; CHECK-BE-NEXT:    xxsldwi vs7, vs3, vs3, 1
 ; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvspdpn f8, vs8
-; CHECK-BE-NEXT:    xscvspdpn f9, vs9
-; CHECK-BE-NEXT:    xscvspdpn f10, vs10
-; CHECK-BE-NEXT:    xscvspdpn f11, vs11
-; CHECK-BE-NEXT:    xscvspdpn f12, vs12
-; CHECK-BE-NEXT:    xscvspdpn f13, vs13
-; CHECK-BE-NEXT:    xscvspdpn f31, v2
-; CHECK-BE-NEXT:    xscvspdpn f30, v3
-; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs4
-; CHECK-BE-NEXT:    xxmrghd vs4, vs6, vs5
-; CHECK-BE-NEXT:    xxmrghd vs0, vs0, vs7
-; CHECK-BE-NEXT:    xxmrghd vs5, vs9, vs8
-; CHECK-BE-NEXT:    xxmrghd vs3, vs3, vs10
-; CHECK-BE-NEXT:    xxmrghd vs6, vs12, vs11
-; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs13
-; CHECK-BE-NEXT:    xxmrghd vs7, vs30, vs31
-; CHECK-BE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT:    xxmrghd vs5, vs5, vs6
+; CHECK-BE-NEXT:    xxsldwi vs6, vs4, vs4, 3
+; CHECK-BE-NEXT:    xxswapd vs4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f6, vs6
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xxmrghd vs1, vs1, vs2
+; CHECK-BE-NEXT:    lxv vs2, 48(r4)
+; CHECK-BE-NEXT:    xxsldwi vs8, vs2, vs2, 1
 ; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
-; CHECK-BE-NEXT:    xvcvdpuxds vs4, vs4
 ; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
 ; CHECK-BE-NEXT:    xvcvdpuxds vs5, vs5
-; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xxmrghd vs4, vs4, vs6
+; CHECK-BE-NEXT:    xscvspdpn f6, vs3
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    xxmrghd vs6, vs6, vs7
+; CHECK-BE-NEXT:    xxsldwi vs7, vs3, vs3, 3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    xscvspdpn f7, vs7
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xxmrghd vs3, vs3, vs7
+; CHECK-BE-NEXT:    xscvspdpn f7, vs2
+; CHECK-BE-NEXT:    xxmrghd vs7, vs7, vs8
+; CHECK-BE-NEXT:    xxsldwi vs8, vs2, vs2, 3
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f8, vs8
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs2, vs8
+; CHECK-BE-NEXT:    stxv vs5, 32(r3)
+; CHECK-BE-NEXT:    xvcvdpuxds vs4, vs4
 ; CHECK-BE-NEXT:    xvcvdpuxds vs6, vs6
-; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
 ; CHECK-BE-NEXT:    xvcvdpuxds vs7, vs7
-; CHECK-BE-NEXT:    stxv vs5, 48(r3)
-; CHECK-BE-NEXT:    stxv vs0, 32(r3)
-; CHECK-BE-NEXT:    stxv vs4, 16(r3)
+; CHECK-BE-NEXT:    stxv vs3, 80(r3)
+; CHECK-BE-NEXT:    stxv vs7, 96(r3)
+; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-BE-NEXT:    stxv vs2, 112(r3)
+; CHECK-BE-NEXT:    stxv vs6, 64(r3)
+; CHECK-BE-NEXT:    stxv vs4, 48(r3)
 ; CHECK-BE-NEXT:    stxv vs1, 0(r3)
-; CHECK-BE-NEXT:    stxv vs7, 112(r3)
-; CHECK-BE-NEXT:    stxv vs2, 96(r3)
-; CHECK-BE-NEXT:    stxv vs6, 80(r3)
-; CHECK-BE-NEXT:    stxv vs3, 64(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x float>, <16 x float>* %0, align 64

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll Wed Jan  2 21:04:18 2019
@@ -39,18 +39,18 @@ define i16 @test2elt(i64 %a.coerce) loca
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    addi r3, r1, -2
-; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
+; CHECK-P9-NEXT:    addi r3, r1, -2
 ; CHECK-P9-NEXT:    stxsihx v2, 0, r3
 ; CHECK-P9-NEXT:    lhz r3, -2(r1)
 ; CHECK-P9-NEXT:    blr
@@ -58,18 +58,18 @@ define i16 @test2elt(i64 %a.coerce) loca
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd f0, r3
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r4
 ; CHECK-BE-NEXT:    vmrghb v2, v2, v3
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
@@ -118,64 +118,64 @@ define i32 @test4elt(<4 x float> %a) loc
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT:    xxswapd vs1, v2
-; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT:    xscvspdpn f3, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r5, f3
 ; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f1
-; CHECK-P9-NEXT:    mfvsrwz r6, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r5
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    vmrglb v2, v3, v2
-; CHECK-P9-NEXT:    vmrglb v3, v4, v5
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglb v2, v4, v2
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-BE-NEXT:    xxswapd vs1, v2
-; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-BE-NEXT:    xscvspdpn f3, v2
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
-; CHECK-BE-NEXT:    sldi r5, r5, 56
 ; CHECK-BE-NEXT:    mfvsrwz r3, f0
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
-; CHECK-BE-NEXT:    mfvsrwz r6, f2
-; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    xscvspdpn f0, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vmrghb v2, v3, v2
-; CHECK-BE-NEXT:    vmrghb v3, v4, v5
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghb v2, v4, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -249,120 +249,120 @@ define i64 @test8elt(<8 x float>* nocapt
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd vs6, vs0
-; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs5
-; CHECK-P9-NEXT:    xscvspdpn f6, vs6
-; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
-; CHECK-P9-NEXT:    mfvsrwz r5, f1
-; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mfvsrwz r4, f3
-; CHECK-P9-NEXT:    mfvsrwz r6, f4
-; CHECK-P9-NEXT:    mfvsrwz r7, f5
-; CHECK-P9-NEXT:    mfvsrwz r8, f6
-; CHECK-P9-NEXT:    mfvsrwz r10, f7
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    xxswapd v6, vs6
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    xxswapd v0, vs4
-; CHECK-P9-NEXT:    xxswapd v1, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    vmrglb v2, v3, v2
-; CHECK-P9-NEXT:    vmrglb v3, v4, v5
-; CHECK-P9-NEXT:    vmrglb v4, v1, v0
-; CHECK-P9-NEXT:    vmrglb v5, v6, v7
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglh v3, v5, v4
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd vs6, vs0
-; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f6, vs6
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
-; CHECK-BE-NEXT:    mfvsrwz r9, f0
-; CHECK-BE-NEXT:    sldi r5, r5, 56
-; CHECK-BE-NEXT:    sldi r9, r9, 56
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mfvsrwz r3, f2
-; CHECK-BE-NEXT:    mfvsrwz r4, f3
-; CHECK-BE-NEXT:    mfvsrwz r6, f4
-; CHECK-BE-NEXT:    mfvsrwz r7, f5
-; CHECK-BE-NEXT:    mfvsrwz r8, f6
-; CHECK-BE-NEXT:    mfvsrwz r10, f7
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v6, r9
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
-; CHECK-BE-NEXT:    sldi r7, r7, 56
-; CHECK-BE-NEXT:    sldi r8, r8, 56
-; CHECK-BE-NEXT:    sldi r10, r10, 56
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghb v2, v3, v2
-; CHECK-BE-NEXT:    vmrghb v3, v4, v5
-; CHECK-BE-NEXT:    vmrghb v4, v1, v0
-; CHECK-BE-NEXT:    vmrghb v5, v6, v7
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v5, v4
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    vmrghb v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -494,251 +494,231 @@ define <16 x i8> @test16elt(<16 x float>
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    lxv vs0, 48(r3)
-; CHECK-P9-NEXT:    lxv vs1, 32(r3)
-; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT:    xxswapd vs5, vs3
-; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 1
-; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxswapd vs8, vs2
-; CHECK-P9-NEXT:    xxsldwi vs9, vs2, vs2, 1
-; CHECK-P9-NEXT:    xxsldwi vs10, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs11, vs1
-; CHECK-P9-NEXT:    xxsldwi vs12, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs13, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    lxv vs2, 48(r3)
+; CHECK-P9-NEXT:    lxv vs3, 32(r3)
+; CHECK-P9-NEXT:    lxv vs4, 16(r3)
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs5
-; CHECK-P9-NEXT:    xscvspdpn f6, vs6
-; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xscvspdpn f8, vs8
-; CHECK-P9-NEXT:    xscvspdpn f9, vs9
-; CHECK-P9-NEXT:    xscvspdpn f10, vs10
-; CHECK-P9-NEXT:    xscvspdpn f11, vs11
-; CHECK-P9-NEXT:    xscvspdpn f12, vs12
-; CHECK-P9-NEXT:    xscvspdpn f13, vs13
-; CHECK-P9-NEXT:    xscvspdpn v2, v2
-; CHECK-P9-NEXT:    xscvspdpn v3, v3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
-; CHECK-P9-NEXT:    xscvdpsxws f8, f8
-; CHECK-P9-NEXT:    xscvdpsxws f9, f9
-; CHECK-P9-NEXT:    xscvdpsxws f10, f10
-; CHECK-P9-NEXT:    xscvdpsxws f11, f11
-; CHECK-P9-NEXT:    xscvdpsxws f12, f12
-; CHECK-P9-NEXT:    xscvdpsxws f13, f13
-; CHECK-P9-NEXT:    xscvdpsxws v2, v2
-; CHECK-P9-NEXT:    xscvdpsxws v3, v3
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
-; CHECK-P9-NEXT:    mfvsrwz r11, f1
-; CHECK-P9-NEXT:    mfvsrwz r12, f0
-; CHECK-P9-NEXT:    mfvsrwz r5, f4
-; CHECK-P9-NEXT:    mfvsrwz r6, f5
-; CHECK-P9-NEXT:    mfvsrwz r7, f6
-; CHECK-P9-NEXT:    mfvsrwz r8, f7
-; CHECK-P9-NEXT:    mfvsrwz r9, f8
-; CHECK-P9-NEXT:    mfvsrwz r10, f9
-; CHECK-P9-NEXT:    mfvsrwz r0, f10
-; CHECK-P9-NEXT:    mfvsrwz r30, f11
-; CHECK-P9-NEXT:    mfvsrwz r29, f12
-; CHECK-P9-NEXT:    mfvsrwz r28, f13
-; CHECK-P9-NEXT:    mfvsrwz r27, v2
-; CHECK-P9-NEXT:    mfvsrwz r26, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f8, r11
-; CHECK-P9-NEXT:    mtvsrd f9, r12
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    mtvsrd f10, r0
-; CHECK-P9-NEXT:    mtvsrd f11, r30
-; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd f12, r29
-; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd f13, r28
-; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd v2, r27
-; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd v3, r26
-; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxswapd v5, vs2
-; CHECK-P9-NEXT:    xxswapd v0, vs3
-; CHECK-P9-NEXT:    xxswapd v1, vs4
-; CHECK-P9-NEXT:    xxswapd v6, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs6
-; CHECK-P9-NEXT:    xxswapd v8, vs1
-; CHECK-P9-NEXT:    xxswapd v9, vs7
-; CHECK-P9-NEXT:    xxswapd v10, vs10
-; CHECK-P9-NEXT:    xxswapd v11, vs11
-; CHECK-P9-NEXT:    xxswapd v12, vs8
-; CHECK-P9-NEXT:    xxswapd v13, vs12
-; CHECK-P9-NEXT:    xxswapd v14, vs13
-; CHECK-P9-NEXT:    xxswapd v2, v2
-; CHECK-P9-NEXT:    xxswapd v15, vs9
-; CHECK-P9-NEXT:    xxswapd v3, v3
-; CHECK-P9-NEXT:    vmrglb v5, v0, v5
-; CHECK-P9-NEXT:    vmrglb v4, v4, v1
-; CHECK-P9-NEXT:    vmrglb v0, v7, v6
-; CHECK-P9-NEXT:    vmrglb v1, v8, v9
-; CHECK-P9-NEXT:    vmrglb v6, v11, v10
-; CHECK-P9-NEXT:    vmrglb v7, v12, v13
-; CHECK-P9-NEXT:    vmrglb v2, v2, v14
-; CHECK-P9-NEXT:    vmrglb v3, v15, v3
-; CHECK-P9-NEXT:    vmrglh v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v5, v1, v0
-; CHECK-P9-NEXT:    vmrglh v0, v7, v6
+; CHECK-P9-NEXT:    xxsldwi vs0, vs4, vs4, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglw v3, v5, v4
-; CHECK-P9-NEXT:    vmrglw v2, v2, v0
-; CHECK-P9-NEXT:    xxmrgld v2, v2, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, vs4
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs4, vs4, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs3, vs3, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, vs3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs3, vs3, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs2, vs2, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, vs2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v4, v5, v4
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs2, vs2, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v0, vs0
+; CHECK-P9-NEXT:    vmrglb v5, v5, v0
+; CHECK-P9-NEXT:    vmrglh v4, v5, v4
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
+; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT:    xxswapd vs5, vs3
-; CHECK-BE-NEXT:    xxsldwi vs6, vs3, vs3, 1
-; CHECK-BE-NEXT:    xxsldwi vs7, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxswapd vs8, vs2
-; CHECK-BE-NEXT:    xxsldwi vs9, vs2, vs2, 1
-; CHECK-BE-NEXT:    xxsldwi vs10, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs11, vs1
-; CHECK-BE-NEXT:    xxsldwi vs12, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs13, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd v2, vs0
-; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f6, vs6
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvspdpn f8, vs8
-; CHECK-BE-NEXT:    xscvspdpn f9, vs9
-; CHECK-BE-NEXT:    xscvspdpn f10, vs10
-; CHECK-BE-NEXT:    xscvspdpn f11, vs11
-; CHECK-BE-NEXT:    xscvspdpn f12, vs12
-; CHECK-BE-NEXT:    xscvspdpn f13, vs13
-; CHECK-BE-NEXT:    xscvspdpn v2, v2
-; CHECK-BE-NEXT:    xscvspdpn v3, v3
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    xscvspdpn f4, vs3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vmrghb v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vmrghb v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghb v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    xscvdpsxws f8, f8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f9
-; CHECK-BE-NEXT:    xscvdpsxws f10, f10
-; CHECK-BE-NEXT:    xscvdpsxws f11, f11
-; CHECK-BE-NEXT:    xscvdpsxws f12, f12
-; CHECK-BE-NEXT:    xscvdpsxws f13, f13
-; CHECK-BE-NEXT:    xscvdpsxws v2, v2
-; CHECK-BE-NEXT:    xscvdpsxws v3, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
-; CHECK-BE-NEXT:    mfvsrwz r11, f1
-; CHECK-BE-NEXT:    mfvsrwz r12, f0
-; CHECK-BE-NEXT:    mfvsrwz r5, f4
-; CHECK-BE-NEXT:    mfvsrwz r6, f5
-; CHECK-BE-NEXT:    mfvsrwz r7, f6
-; CHECK-BE-NEXT:    mfvsrwz r8, f7
-; CHECK-BE-NEXT:    mfvsrwz r9, f8
-; CHECK-BE-NEXT:    mfvsrwz r10, f9
-; CHECK-BE-NEXT:    mfvsrwz r0, f10
-; CHECK-BE-NEXT:    mfvsrwz r30, f11
-; CHECK-BE-NEXT:    mfvsrwz r29, f12
-; CHECK-BE-NEXT:    mfvsrwz r28, f13
-; CHECK-BE-NEXT:    mfvsrwz r27, v2
-; CHECK-BE-NEXT:    mfvsrwz r26, v3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r11, r11, 56
-; CHECK-BE-NEXT:    sldi r12, r12, 56
-; CHECK-BE-NEXT:    sldi r5, r5, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
-; CHECK-BE-NEXT:    sldi r7, r7, 56
-; CHECK-BE-NEXT:    sldi r8, r8, 56
-; CHECK-BE-NEXT:    sldi r9, r9, 56
-; CHECK-BE-NEXT:    sldi r10, r10, 56
-; CHECK-BE-NEXT:    sldi r0, r0, 56
-; CHECK-BE-NEXT:    sldi r30, r30, 56
-; CHECK-BE-NEXT:    sldi r29, r29, 56
-; CHECK-BE-NEXT:    sldi r28, r28, 56
-; CHECK-BE-NEXT:    sldi r27, r27, 56
-; CHECK-BE-NEXT:    sldi r26, r26, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v10, r11
-; CHECK-BE-NEXT:    mtvsrd v14, r12
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v6, r9
-; CHECK-BE-NEXT:    mtvsrd v7, r10
-; CHECK-BE-NEXT:    mtvsrd v8, r0
-; CHECK-BE-NEXT:    mtvsrd v9, r30
-; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v11, r29
-; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v12, r28
-; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v13, r27
-; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v15, r26
-; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
 ; CHECK-BE-NEXT:    vmrghb v4, v5, v4
-; CHECK-BE-NEXT:    vmrghb v2, v2, v0
-; CHECK-BE-NEXT:    vmrghb v5, v6, v1
-; CHECK-BE-NEXT:    vmrghb v3, v3, v7
-; CHECK-BE-NEXT:    vmrghb v0, v9, v8
-; CHECK-BE-NEXT:    vmrghb v1, v10, v11
-; CHECK-BE-NEXT:    vmrghb v6, v13, v12
-; CHECK-BE-NEXT:    vmrghb v7, v14, v15
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    vmrghh v3, v3, v5
-; CHECK-BE-NEXT:    vmrghh v4, v1, v0
-; CHECK-BE-NEXT:    vmrghh v5, v7, v6
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v0, r3
+; CHECK-BE-NEXT:    vmrghb v5, v5, v0
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -777,18 +757,18 @@ define i16 @test2elt_signed(i64 %a.coerc
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    addi r3, r1, -2
-; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
+; CHECK-P9-NEXT:    addi r3, r1, -2
 ; CHECK-P9-NEXT:    stxsihx v2, 0, r3
 ; CHECK-P9-NEXT:    lhz r3, -2(r1)
 ; CHECK-P9-NEXT:    blr
@@ -796,18 +776,18 @@ define i16 @test2elt_signed(i64 %a.coerc
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd f0, r3
-; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r4
 ; CHECK-BE-NEXT:    vmrghb v2, v2, v3
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
@@ -856,64 +836,64 @@ define i32 @test4elt_signed(<4 x float>
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P9-NEXT:    xxswapd vs1, v2
-; CHECK-P9-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-P9-NEXT:    xscvspdpn f3, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r5, f3
 ; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f1
-; CHECK-P9-NEXT:    mfvsrwz r6, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r5
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, v2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, v2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    vmrglb v2, v3, v2
-; CHECK-P9-NEXT:    vmrglb v3, v4, v5
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    vmrglb v2, v4, v2
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-BE-NEXT:    xxswapd vs1, v2
-; CHECK-BE-NEXT:    xxsldwi vs2, v2, v2, 1
-; CHECK-BE-NEXT:    xscvspdpn f3, v2
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
-; CHECK-BE-NEXT:    sldi r5, r5, 56
 ; CHECK-BE-NEXT:    mfvsrwz r3, f0
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
-; CHECK-BE-NEXT:    mfvsrwz r6, f2
-; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    xxswapd vs0, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    xscvspdpn f0, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    vmrghb v2, v3, v2
-; CHECK-BE-NEXT:    vmrghb v3, v4, v5
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    vmrghb v2, v4, v2
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -987,120 +967,120 @@ define i64 @test8elt_signed(<8 x float>*
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs3, vs1
-; CHECK-P9-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs5, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd vs6, vs0
-; CHECK-P9-NEXT:    xxsldwi vs7, vs0, vs0, 1
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs5
-; CHECK-P9-NEXT:    xscvspdpn f6, vs6
-; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
-; CHECK-P9-NEXT:    mfvsrwz r5, f1
-; CHECK-P9-NEXT:    mfvsrwz r9, f0
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mfvsrwz r4, f3
-; CHECK-P9-NEXT:    mfvsrwz r6, f4
-; CHECK-P9-NEXT:    mfvsrwz r7, f5
-; CHECK-P9-NEXT:    mfvsrwz r8, f6
-; CHECK-P9-NEXT:    mfvsrwz r10, f7
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    xxswapd v6, vs6
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    xxswapd v0, vs4
-; CHECK-P9-NEXT:    xxswapd v1, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    vmrglb v2, v3, v2
-; CHECK-P9-NEXT:    vmrglb v3, v4, v5
-; CHECK-P9-NEXT:    vmrglb v4, v1, v0
-; CHECK-P9-NEXT:    vmrglb v5, v6, v7
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglh v3, v5, v4
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xxsldwi vs4, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs5, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd vs6, vs0
-; CHECK-BE-NEXT:    xxsldwi vs7, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f6, vs6
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
-; CHECK-BE-NEXT:    mfvsrwz r9, f0
-; CHECK-BE-NEXT:    sldi r5, r5, 56
-; CHECK-BE-NEXT:    sldi r9, r9, 56
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mfvsrwz r3, f2
-; CHECK-BE-NEXT:    mfvsrwz r4, f3
-; CHECK-BE-NEXT:    mfvsrwz r6, f4
-; CHECK-BE-NEXT:    mfvsrwz r7, f5
-; CHECK-BE-NEXT:    mfvsrwz r8, f6
-; CHECK-BE-NEXT:    mfvsrwz r10, f7
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v6, r9
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
-; CHECK-BE-NEXT:    sldi r7, r7, 56
-; CHECK-BE-NEXT:    sldi r8, r8, 56
-; CHECK-BE-NEXT:    sldi r10, r10, 56
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghb v2, v3, v2
-; CHECK-BE-NEXT:    vmrghb v3, v4, v5
-; CHECK-BE-NEXT:    vmrghb v4, v1, v0
-; CHECK-BE-NEXT:    vmrghb v5, v6, v7
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v5, v4
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    vmrghb v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -1232,251 +1212,231 @@ define <16 x i8> @test16elt_signed(<16 x
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    lxv vs0, 48(r3)
-; CHECK-P9-NEXT:    lxv vs1, 32(r3)
-; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT:    xxswapd vs5, vs3
-; CHECK-P9-NEXT:    xxsldwi vs6, vs3, vs3, 1
-; CHECK-P9-NEXT:    xxsldwi vs7, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxswapd vs8, vs2
-; CHECK-P9-NEXT:    xxsldwi vs9, vs2, vs2, 1
-; CHECK-P9-NEXT:    xxsldwi vs10, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxswapd vs11, vs1
-; CHECK-P9-NEXT:    xxsldwi vs12, vs1, vs1, 1
-; CHECK-P9-NEXT:    xxsldwi vs13, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
-; CHECK-P9-NEXT:    xscvspdpn f3, vs3
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    xxswapd vs1, vs0
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    lxv vs2, 48(r3)
+; CHECK-P9-NEXT:    lxv vs3, 32(r3)
+; CHECK-P9-NEXT:    lxv vs4, 16(r3)
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs5
-; CHECK-P9-NEXT:    xscvspdpn f6, vs6
-; CHECK-P9-NEXT:    xscvspdpn f7, vs7
-; CHECK-P9-NEXT:    xscvspdpn f8, vs8
-; CHECK-P9-NEXT:    xscvspdpn f9, vs9
-; CHECK-P9-NEXT:    xscvspdpn f10, vs10
-; CHECK-P9-NEXT:    xscvspdpn f11, vs11
-; CHECK-P9-NEXT:    xscvspdpn f12, vs12
-; CHECK-P9-NEXT:    xscvspdpn f13, vs13
-; CHECK-P9-NEXT:    xscvspdpn v2, v2
-; CHECK-P9-NEXT:    xscvspdpn v3, v3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
-; CHECK-P9-NEXT:    xscvdpsxws f8, f8
-; CHECK-P9-NEXT:    xscvdpsxws f9, f9
-; CHECK-P9-NEXT:    xscvdpsxws f10, f10
-; CHECK-P9-NEXT:    xscvdpsxws f11, f11
-; CHECK-P9-NEXT:    xscvdpsxws f12, f12
-; CHECK-P9-NEXT:    xscvdpsxws f13, f13
-; CHECK-P9-NEXT:    xscvdpsxws v2, v2
-; CHECK-P9-NEXT:    xscvdpsxws v3, v3
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
-; CHECK-P9-NEXT:    mfvsrwz r11, f1
-; CHECK-P9-NEXT:    mfvsrwz r12, f0
-; CHECK-P9-NEXT:    mfvsrwz r5, f4
-; CHECK-P9-NEXT:    mfvsrwz r6, f5
-; CHECK-P9-NEXT:    mfvsrwz r7, f6
-; CHECK-P9-NEXT:    mfvsrwz r8, f7
-; CHECK-P9-NEXT:    mfvsrwz r9, f8
-; CHECK-P9-NEXT:    mfvsrwz r10, f9
-; CHECK-P9-NEXT:    mfvsrwz r0, f10
-; CHECK-P9-NEXT:    mfvsrwz r30, f11
-; CHECK-P9-NEXT:    mfvsrwz r29, f12
-; CHECK-P9-NEXT:    mfvsrwz r28, f13
-; CHECK-P9-NEXT:    mfvsrwz r27, v2
-; CHECK-P9-NEXT:    mfvsrwz r26, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f8, r11
-; CHECK-P9-NEXT:    mtvsrd f9, r12
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    mtvsrd f10, r0
-; CHECK-P9-NEXT:    mtvsrd f11, r30
-; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd f12, r29
-; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd f13, r28
-; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd v2, r27
-; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    mtvsrd v3, r26
-; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxswapd v5, vs2
-; CHECK-P9-NEXT:    xxswapd v0, vs3
-; CHECK-P9-NEXT:    xxswapd v1, vs4
-; CHECK-P9-NEXT:    xxswapd v6, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs6
-; CHECK-P9-NEXT:    xxswapd v8, vs1
-; CHECK-P9-NEXT:    xxswapd v9, vs7
-; CHECK-P9-NEXT:    xxswapd v10, vs10
-; CHECK-P9-NEXT:    xxswapd v11, vs11
-; CHECK-P9-NEXT:    xxswapd v12, vs8
-; CHECK-P9-NEXT:    xxswapd v13, vs12
-; CHECK-P9-NEXT:    xxswapd v14, vs13
-; CHECK-P9-NEXT:    xxswapd v2, v2
-; CHECK-P9-NEXT:    xxswapd v15, vs9
-; CHECK-P9-NEXT:    xxswapd v3, v3
-; CHECK-P9-NEXT:    vmrglb v5, v0, v5
-; CHECK-P9-NEXT:    vmrglb v4, v4, v1
-; CHECK-P9-NEXT:    vmrglb v0, v7, v6
-; CHECK-P9-NEXT:    vmrglb v1, v8, v9
-; CHECK-P9-NEXT:    vmrglb v6, v11, v10
-; CHECK-P9-NEXT:    vmrglb v7, v12, v13
-; CHECK-P9-NEXT:    vmrglb v2, v2, v14
-; CHECK-P9-NEXT:    vmrglb v3, v15, v3
-; CHECK-P9-NEXT:    vmrglh v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v5, v1, v0
-; CHECK-P9-NEXT:    vmrglh v0, v7, v6
+; CHECK-P9-NEXT:    xxsldwi vs0, vs4, vs4, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglw v3, v5, v4
-; CHECK-P9-NEXT:    vmrglw v2, v2, v0
-; CHECK-P9-NEXT:    xxmrgld v2, v2, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, vs4
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs4, vs4, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs3, vs3, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, vs3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs3, vs3, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs2, vs2, 3
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    xxswapd vs0, vs2
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    xscvspdpn f0, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v4, v5, v4
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    xxsldwi vs0, vs2, vs2, 1
+; CHECK-P9-NEXT:    xscvspdpn f0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v0, vs0
+; CHECK-P9-NEXT:    vmrglb v5, v5, v0
+; CHECK-P9-NEXT:    vmrglh v4, v5, v4
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
+; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT:    xxswapd vs5, vs3
-; CHECK-BE-NEXT:    xxsldwi vs6, vs3, vs3, 1
-; CHECK-BE-NEXT:    xxsldwi vs7, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxswapd vs8, vs2
-; CHECK-BE-NEXT:    xxsldwi vs9, vs2, vs2, 1
-; CHECK-BE-NEXT:    xxsldwi vs10, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxswapd vs11, vs1
-; CHECK-BE-NEXT:    xxsldwi vs12, vs1, vs1, 1
-; CHECK-BE-NEXT:    xxsldwi vs13, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxswapd v2, vs0
-; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 1
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvspdpn f5, vs5
-; CHECK-BE-NEXT:    xscvspdpn f6, vs6
-; CHECK-BE-NEXT:    xscvspdpn f7, vs7
-; CHECK-BE-NEXT:    xscvspdpn f8, vs8
-; CHECK-BE-NEXT:    xscvspdpn f9, vs9
-; CHECK-BE-NEXT:    xscvspdpn f10, vs10
-; CHECK-BE-NEXT:    xscvspdpn f11, vs11
-; CHECK-BE-NEXT:    xscvspdpn f12, vs12
-; CHECK-BE-NEXT:    xscvspdpn f13, vs13
-; CHECK-BE-NEXT:    xscvspdpn v2, v2
-; CHECK-BE-NEXT:    xscvspdpn v3, v3
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    xscvspdpn f4, vs3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vmrghb v2, v3, v2
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xscvspdpn f3, vs2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vmrghb v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvspdpn f2, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghb v3, v4, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvspdpn f1, vs0
+; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    xscvspdpn f0, vs0
+; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    xscvdpsxws f8, f8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f9
-; CHECK-BE-NEXT:    xscvdpsxws f10, f10
-; CHECK-BE-NEXT:    xscvdpsxws f11, f11
-; CHECK-BE-NEXT:    xscvdpsxws f12, f12
-; CHECK-BE-NEXT:    xscvdpsxws f13, f13
-; CHECK-BE-NEXT:    xscvdpsxws v2, v2
-; CHECK-BE-NEXT:    xscvdpsxws v3, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
-; CHECK-BE-NEXT:    mfvsrwz r11, f1
-; CHECK-BE-NEXT:    mfvsrwz r12, f0
-; CHECK-BE-NEXT:    mfvsrwz r5, f4
-; CHECK-BE-NEXT:    mfvsrwz r6, f5
-; CHECK-BE-NEXT:    mfvsrwz r7, f6
-; CHECK-BE-NEXT:    mfvsrwz r8, f7
-; CHECK-BE-NEXT:    mfvsrwz r9, f8
-; CHECK-BE-NEXT:    mfvsrwz r10, f9
-; CHECK-BE-NEXT:    mfvsrwz r0, f10
-; CHECK-BE-NEXT:    mfvsrwz r30, f11
-; CHECK-BE-NEXT:    mfvsrwz r29, f12
-; CHECK-BE-NEXT:    mfvsrwz r28, f13
-; CHECK-BE-NEXT:    mfvsrwz r27, v2
-; CHECK-BE-NEXT:    mfvsrwz r26, v3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r11, r11, 56
-; CHECK-BE-NEXT:    sldi r12, r12, 56
-; CHECK-BE-NEXT:    sldi r5, r5, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
-; CHECK-BE-NEXT:    sldi r7, r7, 56
-; CHECK-BE-NEXT:    sldi r8, r8, 56
-; CHECK-BE-NEXT:    sldi r9, r9, 56
-; CHECK-BE-NEXT:    sldi r10, r10, 56
-; CHECK-BE-NEXT:    sldi r0, r0, 56
-; CHECK-BE-NEXT:    sldi r30, r30, 56
-; CHECK-BE-NEXT:    sldi r29, r29, 56
-; CHECK-BE-NEXT:    sldi r28, r28, 56
-; CHECK-BE-NEXT:    sldi r27, r27, 56
-; CHECK-BE-NEXT:    sldi r26, r26, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v10, r11
-; CHECK-BE-NEXT:    mtvsrd v14, r12
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v6, r9
-; CHECK-BE-NEXT:    mtvsrd v7, r10
-; CHECK-BE-NEXT:    mtvsrd v8, r0
-; CHECK-BE-NEXT:    mtvsrd v9, r30
-; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v11, r29
-; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v12, r28
-; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v13, r27
-; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v15, r26
-; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
 ; CHECK-BE-NEXT:    vmrghb v4, v5, v4
-; CHECK-BE-NEXT:    vmrghb v2, v2, v0
-; CHECK-BE-NEXT:    vmrghb v5, v6, v1
-; CHECK-BE-NEXT:    vmrghb v3, v3, v7
-; CHECK-BE-NEXT:    vmrghb v0, v9, v8
-; CHECK-BE-NEXT:    vmrghb v1, v10, v11
-; CHECK-BE-NEXT:    vmrghb v6, v13, v12
-; CHECK-BE-NEXT:    vmrghb v7, v14, v15
-; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    vmrghh v3, v3, v5
-; CHECK-BE-NEXT:    vmrghh v4, v1, v0
-; CHECK-BE-NEXT:    vmrghh v5, v7, v6
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v0, r3
+; CHECK-BE-NEXT:    vmrghb v5, v5, v0
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
 ; CHECK-BE-NEXT:    blr
 entry:

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll Wed Jan  2 21:04:18 2019
@@ -28,33 +28,33 @@ define i32 @test2elt(<2 x double> %a) lo
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvdpsxws f0, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
 ; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    mtvsrd f1, r4
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mfvsrwz r4, f0
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -96,56 +96,56 @@ define i64 @test4elt(<4 x double>* nocap
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
-; CHECK-P9-NEXT:    mfvsrwz r6, f3
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
-; CHECK-P9-NEXT:    vmrglh v3, v4, v5
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
-; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
-; CHECK-BE-NEXT:    mfvsrwz r6, f3
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v5
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -213,103 +213,103 @@ define <8 x i16> @test8elt(<8 x double>*
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs3, 0(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs4, vs3
-; CHECK-P9-NEXT:    xxswapd vs5, vs2
-; CHECK-P9-NEXT:    xxswapd vs6, vs1
-; CHECK-P9-NEXT:    xxswapd vs7, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    xxswapd v2, vs4
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mfvsrwz r5, f2
-; CHECK-P9-NEXT:    mfvsrwz r7, f1
-; CHECK-P9-NEXT:    mfvsrwz r9, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mfvsrwz r4, f4
-; CHECK-P9-NEXT:    mfvsrwz r6, f5
-; CHECK-P9-NEXT:    mfvsrwz r8, f6
-; CHECK-P9-NEXT:    mfvsrwz r10, f7
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    xxswapd v0, vs4
-; CHECK-P9-NEXT:    xxswapd v6, vs6
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    xxswapd v1, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
-; CHECK-P9-NEXT:    vmrglh v3, v4, v5
-; CHECK-P9-NEXT:    vmrglh v4, v0, v1
-; CHECK-P9-NEXT:    vmrglh v5, v6, v7
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglh v4, v4, v5
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
 ; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    xxswapd vs4, vs3
-; CHECK-BE-NEXT:    xxswapd vs5, vs2
-; CHECK-BE-NEXT:    xxswapd vs6, vs1
-; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
-; CHECK-BE-NEXT:    mfvsrwz r7, f1
-; CHECK-BE-NEXT:    mfvsrwz r9, f0
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    sldi r7, r7, 48
-; CHECK-BE-NEXT:    sldi r9, r9, 48
-; CHECK-BE-NEXT:    mfvsrwz r4, f4
-; CHECK-BE-NEXT:    mfvsrwz r6, f5
-; CHECK-BE-NEXT:    mfvsrwz r8, f6
-; CHECK-BE-NEXT:    mfvsrwz r10, f7
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v6, r9
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
-; CHECK-BE-NEXT:    sldi r8, r8, 48
-; CHECK-BE-NEXT:    sldi r10, r10, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v5
-; CHECK-BE-NEXT:    vmrghh v4, v0, v1
-; CHECK-BE-NEXT:    vmrghh v5, v6, v7
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghh v4, v4, v5
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -428,226 +428,202 @@ define void @test16elt(<16 x i16>* noali
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 48(r4)
-; CHECK-P9-NEXT:    lxv vs4, 32(r4)
-; CHECK-P9-NEXT:    lxv vs5, 16(r4)
-; CHECK-P9-NEXT:    lxv vs6, 0(r4)
-; CHECK-P9-NEXT:    lxv vs0, 112(r4)
-; CHECK-P9-NEXT:    lxv vs1, 96(r4)
-; CHECK-P9-NEXT:    lxv vs3, 80(r4)
-; CHECK-P9-NEXT:    lxv vs7, 64(r4)
-; CHECK-P9-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxswapd vs8, vs6
-; CHECK-P9-NEXT:    xxswapd vs9, vs5
-; CHECK-P9-NEXT:    xxswapd vs10, vs4
-; CHECK-P9-NEXT:    xxswapd vs11, vs2
-; CHECK-P9-NEXT:    xxswapd vs12, vs7
-; CHECK-P9-NEXT:    xxswapd vs13, vs3
-; CHECK-P9-NEXT:    xxswapd v2, vs1
-; CHECK-P9-NEXT:    xxswapd v3, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    lxv vs4, 0(r4)
+; CHECK-P9-NEXT:    lxv vs3, 16(r4)
+; CHECK-P9-NEXT:    lxv vs2, 32(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f5, f4
+; CHECK-P9-NEXT:    lxv vs1, 48(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f6, f3
+; CHECK-P9-NEXT:    lxv vs0, 64(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f7, f2
+; CHECK-P9-NEXT:    xscvdpsxws f8, f1
+; CHECK-P9-NEXT:    xxswapd vs4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    mfvsrwz r5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f9, f0
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mtvsrd f5, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f6
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mtvsrd f6, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f7
+; CHECK-P9-NEXT:    mtvsrd f7, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f8
+; CHECK-P9-NEXT:    mtvsrd f8, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f9
+; CHECK-P9-NEXT:    mtvsrd f9, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xxswapd v2, vs5
+; CHECK-P9-NEXT:    xxswapd v5, vs8
+; CHECK-P9-NEXT:    xxswapd v0, vs9
+; CHECK-P9-NEXT:    mtvsrd f3, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f8, f8
-; CHECK-P9-NEXT:    xscvdpsxws f9, f9
-; CHECK-P9-NEXT:    xscvdpsxws f10, f10
-; CHECK-P9-NEXT:    xscvdpsxws f11, f11
-; CHECK-P9-NEXT:    xscvdpsxws f12, f12
-; CHECK-P9-NEXT:    xscvdpsxws f13, f13
-; CHECK-P9-NEXT:    xscvdpsxws v2, v2
-; CHECK-P9-NEXT:    xscvdpsxws v3, v3
-; CHECK-P9-NEXT:    mfvsrwz r4, f6
-; CHECK-P9-NEXT:    mfvsrwz r5, f5
-; CHECK-P9-NEXT:    mfvsrwz r6, f4
-; CHECK-P9-NEXT:    mfvsrwz r7, f2
-; CHECK-P9-NEXT:    mfvsrwz r12, f7
-; CHECK-P9-NEXT:    mfvsrwz r0, f3
-; CHECK-P9-NEXT:    mfvsrwz r30, f1
-; CHECK-P9-NEXT:    mfvsrwz r29, f0
-; CHECK-P9-NEXT:    mfvsrwz r8, f8
-; CHECK-P9-NEXT:    mfvsrwz r9, f9
-; CHECK-P9-NEXT:    mfvsrwz r10, f10
-; CHECK-P9-NEXT:    mfvsrwz r11, f11
-; CHECK-P9-NEXT:    mfvsrwz r28, f12
-; CHECK-P9-NEXT:    mfvsrwz r27, f13
-; CHECK-P9-NEXT:    mfvsrwz r26, v2
-; CHECK-P9-NEXT:    mfvsrwz r25, v3
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    xxswapd v1, vs2
+; CHECK-P9-NEXT:    lxv vs2, 80(r4)
+; CHECK-P9-NEXT:    xxswapd v3, vs4
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    xxswapd v3, vs6
+; CHECK-P9-NEXT:    xxswapd v4, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
+; CHECK-P9-NEXT:    mfvsrwz r5, f1
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs7
 ; CHECK-P9-NEXT:    mtvsrd f1, r5
-; CHECK-P9-NEXT:    mtvsrd f2, r6
-; CHECK-P9-NEXT:    mtvsrd f3, r7
-; CHECK-P9-NEXT:    mtvsrd f8, r12
-; CHECK-P9-NEXT:    mtvsrd f9, r0
-; CHECK-P9-NEXT:    mtvsrd f10, r30
-; CHECK-P9-NEXT:    mtvsrd f11, r29
-; CHECK-P9-NEXT:    mtvsrd f4, r8
-; CHECK-P9-NEXT:    mtvsrd f5, r9
-; CHECK-P9-NEXT:    mtvsrd f6, r10
-; CHECK-P9-NEXT:    mtvsrd f7, r11
-; CHECK-P9-NEXT:    mtvsrd f12, r28
-; CHECK-P9-NEXT:    mtvsrd f13, r27
-; CHECK-P9-NEXT:    mtvsrd v2, r26
-; CHECK-P9-NEXT:    mtvsrd v3, r25
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxswapd v5, vs1
-; CHECK-P9-NEXT:    xxswapd v0, vs2
-; CHECK-P9-NEXT:    xxswapd v1, vs3
-; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxswapd v6, vs4
-; CHECK-P9-NEXT:    xxswapd v7, vs5
-; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxswapd v8, vs6
-; CHECK-P9-NEXT:    xxswapd v9, vs7
-; CHECK-P9-NEXT:    xxswapd v10, vs8
-; CHECK-P9-NEXT:    xxswapd v11, vs12
-; CHECK-P9-NEXT:    xxswapd v12, vs9
-; CHECK-P9-NEXT:    xxswapd v13, vs13
-; CHECK-P9-NEXT:    xxswapd v14, vs10
-; CHECK-P9-NEXT:    xxswapd v2, v2
-; CHECK-P9-NEXT:    xxswapd v15, vs11
-; CHECK-P9-NEXT:    xxswapd v3, v3
-; CHECK-P9-NEXT:    vmrglh v4, v4, v6
-; CHECK-P9-NEXT:    vmrglh v5, v5, v7
-; CHECK-P9-NEXT:    vmrglh v0, v0, v8
-; CHECK-P9-NEXT:    vmrglh v1, v1, v9
-; CHECK-P9-NEXT:    vmrglh v6, v10, v11
-; CHECK-P9-NEXT:    vmrglh v7, v12, v13
-; CHECK-P9-NEXT:    vmrglh v2, v14, v2
-; CHECK-P9-NEXT:    vmrglh v3, v15, v3
-; CHECK-P9-NEXT:    vmrglw v4, v5, v4
-; CHECK-P9-NEXT:    vmrglw v5, v1, v0
-; CHECK-P9-NEXT:    vmrglw v0, v7, v6
+; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    vmrglh v4, v4, v1
+; CHECK-P9-NEXT:    xxswapd v1, vs1
+; CHECK-P9-NEXT:    mtvsrd f0, r5
+; CHECK-P9-NEXT:    vmrglh v5, v5, v1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xxswapd v1, vs0
+; CHECK-P9-NEXT:    lxv vs0, 112(r4)
+; CHECK-P9-NEXT:    lxv vs1, 96(r4)
+; CHECK-P9-NEXT:    mfvsrwz r4, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r4
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    xxmrgld vs0, v5, v4
-; CHECK-P9-NEXT:    xxmrgld vs1, v2, v0
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    xxmrgld vs4, v3, v2
+; CHECK-P9-NEXT:    xxswapd v2, vs3
+; CHECK-P9-NEXT:    vmrglh v0, v0, v1
+; CHECK-P9-NEXT:    mtvsrd f2, r4
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r4
+; CHECK-P9-NEXT:    mfvsrwz r4, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    vmrglw v2, v2, v0
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglh v4, v4, v5
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
+; CHECK-P9-NEXT:    xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    stxv vs4, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 0(r4)
-; CHECK-BE-NEXT:    lxv vs4, 16(r4)
-; CHECK-BE-NEXT:    lxv vs5, 32(r4)
-; CHECK-BE-NEXT:    lxv vs6, 48(r4)
-; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    lxv vs1, 80(r4)
-; CHECK-BE-NEXT:    lxv vs3, 96(r4)
-; CHECK-BE-NEXT:    lxv vs7, 112(r4)
-; CHECK-BE-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxswapd vs8, vs6
-; CHECK-BE-NEXT:    xxswapd vs9, vs5
-; CHECK-BE-NEXT:    xxswapd vs10, vs4
-; CHECK-BE-NEXT:    xxswapd vs11, vs2
-; CHECK-BE-NEXT:    xxswapd vs12, vs7
-; CHECK-BE-NEXT:    xxswapd vs13, vs3
-; CHECK-BE-NEXT:    xxswapd v2, vs1
-; CHECK-BE-NEXT:    xxswapd v3, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f5, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs4
+; CHECK-BE-NEXT:    lxv vs3, 32(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f6, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mfvsrwz r5, f5
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f7, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrd v2, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f4
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f8, f8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f9
-; CHECK-BE-NEXT:    xscvdpsxws f10, f10
-; CHECK-BE-NEXT:    xscvdpsxws f11, f11
-; CHECK-BE-NEXT:    xscvdpsxws f12, f12
-; CHECK-BE-NEXT:    xscvdpsxws f13, f13
-; CHECK-BE-NEXT:    xscvdpsxws v2, v2
-; CHECK-BE-NEXT:    xscvdpsxws v3, v3
-; CHECK-BE-NEXT:    mfvsrwz r4, f6
-; CHECK-BE-NEXT:    mfvsrwz r5, f5
-; CHECK-BE-NEXT:    mfvsrwz r6, f4
-; CHECK-BE-NEXT:    mfvsrwz r7, f2
-; CHECK-BE-NEXT:    mfvsrwz r12, f7
-; CHECK-BE-NEXT:    mfvsrwz r0, f3
-; CHECK-BE-NEXT:    mfvsrwz r30, f1
-; CHECK-BE-NEXT:    mfvsrwz r29, f0
-; CHECK-BE-NEXT:    mfvsrwz r8, f8
-; CHECK-BE-NEXT:    mfvsrwz r9, f9
-; CHECK-BE-NEXT:    mfvsrwz r10, f10
-; CHECK-BE-NEXT:    mfvsrwz r11, f11
-; CHECK-BE-NEXT:    mfvsrwz r28, f12
-; CHECK-BE-NEXT:    mfvsrwz r27, f13
-; CHECK-BE-NEXT:    mfvsrwz r26, v2
-; CHECK-BE-NEXT:    mfvsrwz r25, v3
-; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f6
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
-; CHECK-BE-NEXT:    sldi r7, r7, 48
-; CHECK-BE-NEXT:    sldi r12, r12, 48
-; CHECK-BE-NEXT:    sldi r0, r0, 48
-; CHECK-BE-NEXT:    sldi r30, r30, 48
-; CHECK-BE-NEXT:    sldi r29, r29, 48
-; CHECK-BE-NEXT:    sldi r8, r8, 48
-; CHECK-BE-NEXT:    sldi r9, r9, 48
-; CHECK-BE-NEXT:    sldi r10, r10, 48
-; CHECK-BE-NEXT:    sldi r11, r11, 48
-; CHECK-BE-NEXT:    sldi r28, r28, 48
-; CHECK-BE-NEXT:    sldi r27, r27, 48
-; CHECK-BE-NEXT:    sldi r26, r26, 48
-; CHECK-BE-NEXT:    sldi r25, r25, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    lxv vs0, 112(r4)
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mtvsrd v4, r6
-; CHECK-BE-NEXT:    mtvsrd v5, r7
-; CHECK-BE-NEXT:    mtvsrd v8, r12
-; CHECK-BE-NEXT:    mtvsrd v10, r0
-; CHECK-BE-NEXT:    mtvsrd v12, r30
-; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v0, r8
-; CHECK-BE-NEXT:    mtvsrd v1, r9
-; CHECK-BE-NEXT:    mtvsrd v6, r10
-; CHECK-BE-NEXT:    mtvsrd v7, r11
-; CHECK-BE-NEXT:    mtvsrd v9, r28
-; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v11, r27
-; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v13, r26
-; CHECK-BE-NEXT:    mtvsrd v14, r29
-; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v15, r25
-; CHECK-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    vmrghh v2, v2, v0
-; CHECK-BE-NEXT:    vmrghh v3, v3, v1
-; CHECK-BE-NEXT:    vmrghh v4, v4, v6
-; CHECK-BE-NEXT:    vmrghh v5, v5, v7
-; CHECK-BE-NEXT:    vmrghh v0, v8, v9
-; CHECK-BE-NEXT:    vmrghh v1, v10, v11
-; CHECK-BE-NEXT:    vmrghh v6, v12, v13
-; CHECK-BE-NEXT:    vmrghh v7, v14, v15
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f7
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f4
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v0, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    lxv vs2, 96(r4)
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v1, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    lxv vs1, 80(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    vmrghh v4, v4, v1
+; CHECK-BE-NEXT:    mtvsrd v1, r5
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vmrghh v5, v5, v1
+; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    lxv vs0, 64(r4)
+; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    vmrghw v3, v5, v4
-; CHECK-BE-NEXT:    vmrghw v4, v1, v0
-; CHECK-BE-NEXT:    vmrghw v5, v7, v6
+; CHECK-BE-NEXT:    xxmrghd vs3, v3, v2
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v1, r5
+; CHECK-BE-NEXT:    vmrghh v0, v0, v1
+; CHECK-BE-NEXT:    vmrghw v2, v2, v0
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r4
+; CHECK-BE-NEXT:    vmrghh v4, v4, v5
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
-; CHECK-BE-NEXT:    xxmrghd vs1, v5, v4
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, <16 x double>* %0, align 128
@@ -675,33 +651,33 @@ define i32 @test2elt_signed(<2 x double>
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvdpsxws f0, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
 ; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    mtvsrd f1, r4
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    mfvsrwz r4, f0
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -743,56 +719,56 @@ define i64 @test4elt_signed(<4 x double>
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
-; CHECK-P9-NEXT:    mfvsrwz r6, f3
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
-; CHECK-P9-NEXT:    vmrglh v3, v4, v5
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
-; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
-; CHECK-BE-NEXT:    mfvsrwz r6, f3
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v5
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -860,103 +836,103 @@ define <8 x i16> @test8elt_signed(<8 x d
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs3, 0(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs4, vs3
-; CHECK-P9-NEXT:    xxswapd vs5, vs2
-; CHECK-P9-NEXT:    xxswapd vs6, vs1
-; CHECK-P9-NEXT:    xxswapd vs7, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    xxswapd v2, vs4
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mfvsrwz r5, f2
-; CHECK-P9-NEXT:    mfvsrwz r7, f1
-; CHECK-P9-NEXT:    mfvsrwz r9, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mfvsrwz r4, f4
-; CHECK-P9-NEXT:    mfvsrwz r6, f5
-; CHECK-P9-NEXT:    mfvsrwz r8, f6
-; CHECK-P9-NEXT:    mfvsrwz r10, f7
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    xxswapd v0, vs4
-; CHECK-P9-NEXT:    xxswapd v6, vs6
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    xxswapd v1, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
-; CHECK-P9-NEXT:    vmrglh v3, v4, v5
-; CHECK-P9-NEXT:    vmrglh v4, v0, v1
-; CHECK-P9-NEXT:    vmrglh v5, v6, v7
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglh v4, v4, v5
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
 ; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    xxswapd vs4, vs3
-; CHECK-BE-NEXT:    xxswapd vs5, vs2
-; CHECK-BE-NEXT:    xxswapd vs6, vs1
-; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
-; CHECK-BE-NEXT:    mfvsrwz r7, f1
-; CHECK-BE-NEXT:    mfvsrwz r9, f0
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    sldi r7, r7, 48
-; CHECK-BE-NEXT:    sldi r9, r9, 48
-; CHECK-BE-NEXT:    mfvsrwz r4, f4
-; CHECK-BE-NEXT:    mfvsrwz r6, f5
-; CHECK-BE-NEXT:    mfvsrwz r8, f6
-; CHECK-BE-NEXT:    mfvsrwz r10, f7
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v6, r9
-; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
-; CHECK-BE-NEXT:    sldi r8, r8, 48
-; CHECK-BE-NEXT:    sldi r10, r10, 48
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    vmrghh v3, v4, v5
-; CHECK-BE-NEXT:    vmrghh v4, v0, v1
-; CHECK-BE-NEXT:    vmrghh v5, v6, v7
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghh v4, v4, v5
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -1075,226 +1051,202 @@ define void @test16elt_signed(<16 x i16>
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 48(r4)
-; CHECK-P9-NEXT:    lxv vs4, 32(r4)
-; CHECK-P9-NEXT:    lxv vs5, 16(r4)
-; CHECK-P9-NEXT:    lxv vs6, 0(r4)
-; CHECK-P9-NEXT:    lxv vs0, 112(r4)
-; CHECK-P9-NEXT:    lxv vs1, 96(r4)
-; CHECK-P9-NEXT:    lxv vs3, 80(r4)
-; CHECK-P9-NEXT:    lxv vs7, 64(r4)
-; CHECK-P9-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxswapd vs8, vs6
-; CHECK-P9-NEXT:    xxswapd vs9, vs5
-; CHECK-P9-NEXT:    xxswapd vs10, vs4
-; CHECK-P9-NEXT:    xxswapd vs11, vs2
-; CHECK-P9-NEXT:    xxswapd vs12, vs7
-; CHECK-P9-NEXT:    xxswapd vs13, vs3
-; CHECK-P9-NEXT:    xxswapd v2, vs1
-; CHECK-P9-NEXT:    xxswapd v3, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    lxv vs4, 0(r4)
+; CHECK-P9-NEXT:    lxv vs3, 16(r4)
+; CHECK-P9-NEXT:    lxv vs2, 32(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f5, f4
+; CHECK-P9-NEXT:    lxv vs1, 48(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f6, f3
+; CHECK-P9-NEXT:    lxv vs0, 64(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f7, f2
+; CHECK-P9-NEXT:    xscvdpsxws f8, f1
+; CHECK-P9-NEXT:    xxswapd vs4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
+; CHECK-P9-NEXT:    mfvsrwz r5, f5
+; CHECK-P9-NEXT:    xscvdpsxws f9, f0
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mtvsrd f5, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f6
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mtvsrd f6, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f7
+; CHECK-P9-NEXT:    mtvsrd f7, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f8
+; CHECK-P9-NEXT:    mtvsrd f8, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f9
+; CHECK-P9-NEXT:    mtvsrd f9, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xxswapd v2, vs5
+; CHECK-P9-NEXT:    xxswapd v5, vs8
+; CHECK-P9-NEXT:    xxswapd v0, vs9
+; CHECK-P9-NEXT:    mtvsrd f3, r5
+; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f8, f8
-; CHECK-P9-NEXT:    xscvdpsxws f9, f9
-; CHECK-P9-NEXT:    xscvdpsxws f10, f10
-; CHECK-P9-NEXT:    xscvdpsxws f11, f11
-; CHECK-P9-NEXT:    xscvdpsxws f12, f12
-; CHECK-P9-NEXT:    xscvdpsxws f13, f13
-; CHECK-P9-NEXT:    xscvdpsxws v2, v2
-; CHECK-P9-NEXT:    xscvdpsxws v3, v3
-; CHECK-P9-NEXT:    mfvsrwz r4, f6
-; CHECK-P9-NEXT:    mfvsrwz r5, f5
-; CHECK-P9-NEXT:    mfvsrwz r6, f4
-; CHECK-P9-NEXT:    mfvsrwz r7, f2
-; CHECK-P9-NEXT:    mfvsrwz r12, f7
-; CHECK-P9-NEXT:    mfvsrwz r0, f3
-; CHECK-P9-NEXT:    mfvsrwz r30, f1
-; CHECK-P9-NEXT:    mfvsrwz r29, f0
-; CHECK-P9-NEXT:    mfvsrwz r8, f8
-; CHECK-P9-NEXT:    mfvsrwz r9, f9
-; CHECK-P9-NEXT:    mfvsrwz r10, f10
-; CHECK-P9-NEXT:    mfvsrwz r11, f11
-; CHECK-P9-NEXT:    mfvsrwz r28, f12
-; CHECK-P9-NEXT:    mfvsrwz r27, f13
-; CHECK-P9-NEXT:    mfvsrwz r26, v2
-; CHECK-P9-NEXT:    mfvsrwz r25, v3
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    xxswapd v1, vs2
+; CHECK-P9-NEXT:    lxv vs2, 80(r4)
+; CHECK-P9-NEXT:    xxswapd v3, vs4
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    xxswapd v3, vs6
+; CHECK-P9-NEXT:    xxswapd v4, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
+; CHECK-P9-NEXT:    mfvsrwz r5, f1
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs7
 ; CHECK-P9-NEXT:    mtvsrd f1, r5
-; CHECK-P9-NEXT:    mtvsrd f2, r6
-; CHECK-P9-NEXT:    mtvsrd f3, r7
-; CHECK-P9-NEXT:    mtvsrd f8, r12
-; CHECK-P9-NEXT:    mtvsrd f9, r0
-; CHECK-P9-NEXT:    mtvsrd f10, r30
-; CHECK-P9-NEXT:    mtvsrd f11, r29
-; CHECK-P9-NEXT:    mtvsrd f4, r8
-; CHECK-P9-NEXT:    mtvsrd f5, r9
-; CHECK-P9-NEXT:    mtvsrd f6, r10
-; CHECK-P9-NEXT:    mtvsrd f7, r11
-; CHECK-P9-NEXT:    mtvsrd f12, r28
-; CHECK-P9-NEXT:    mtvsrd f13, r27
-; CHECK-P9-NEXT:    mtvsrd v2, r26
-; CHECK-P9-NEXT:    mtvsrd v3, r25
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxswapd v5, vs1
-; CHECK-P9-NEXT:    xxswapd v0, vs2
-; CHECK-P9-NEXT:    xxswapd v1, vs3
-; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxswapd v6, vs4
-; CHECK-P9-NEXT:    xxswapd v7, vs5
-; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxswapd v8, vs6
-; CHECK-P9-NEXT:    xxswapd v9, vs7
-; CHECK-P9-NEXT:    xxswapd v10, vs8
-; CHECK-P9-NEXT:    xxswapd v11, vs12
-; CHECK-P9-NEXT:    xxswapd v12, vs9
-; CHECK-P9-NEXT:    xxswapd v13, vs13
-; CHECK-P9-NEXT:    xxswapd v14, vs10
-; CHECK-P9-NEXT:    xxswapd v2, v2
-; CHECK-P9-NEXT:    xxswapd v15, vs11
-; CHECK-P9-NEXT:    xxswapd v3, v3
-; CHECK-P9-NEXT:    vmrglh v4, v4, v6
-; CHECK-P9-NEXT:    vmrglh v5, v5, v7
-; CHECK-P9-NEXT:    vmrglh v0, v0, v8
-; CHECK-P9-NEXT:    vmrglh v1, v1, v9
-; CHECK-P9-NEXT:    vmrglh v6, v10, v11
-; CHECK-P9-NEXT:    vmrglh v7, v12, v13
-; CHECK-P9-NEXT:    vmrglh v2, v14, v2
-; CHECK-P9-NEXT:    vmrglh v3, v15, v3
-; CHECK-P9-NEXT:    vmrglw v4, v5, v4
-; CHECK-P9-NEXT:    vmrglw v5, v1, v0
-; CHECK-P9-NEXT:    vmrglw v0, v7, v6
+; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    vmrglh v4, v4, v1
+; CHECK-P9-NEXT:    xxswapd v1, vs1
+; CHECK-P9-NEXT:    mtvsrd f0, r5
+; CHECK-P9-NEXT:    vmrglh v5, v5, v1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xxswapd v1, vs0
+; CHECK-P9-NEXT:    lxv vs0, 112(r4)
+; CHECK-P9-NEXT:    lxv vs1, 96(r4)
+; CHECK-P9-NEXT:    mfvsrwz r4, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r4
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    xxmrgld vs0, v5, v4
-; CHECK-P9-NEXT:    xxmrgld vs1, v2, v0
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    vmrglw v3, v5, v4
+; CHECK-P9-NEXT:    xxmrgld vs4, v3, v2
+; CHECK-P9-NEXT:    xxswapd v2, vs3
+; CHECK-P9-NEXT:    vmrglh v0, v0, v1
+; CHECK-P9-NEXT:    mtvsrd f2, r4
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r4
+; CHECK-P9-NEXT:    mfvsrwz r4, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r4, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    vmrglh v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    vmrglw v2, v2, v0
+; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglh v4, v4, v5
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
+; CHECK-P9-NEXT:    xxmrgld vs0, v3, v2
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    stxv vs4, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 0(r4)
-; CHECK-BE-NEXT:    lxv vs4, 16(r4)
-; CHECK-BE-NEXT:    lxv vs5, 32(r4)
-; CHECK-BE-NEXT:    lxv vs6, 48(r4)
-; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    lxv vs1, 80(r4)
-; CHECK-BE-NEXT:    lxv vs3, 96(r4)
-; CHECK-BE-NEXT:    lxv vs7, 112(r4)
-; CHECK-BE-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxswapd vs8, vs6
-; CHECK-BE-NEXT:    xxswapd vs9, vs5
-; CHECK-BE-NEXT:    xxswapd vs10, vs4
-; CHECK-BE-NEXT:    xxswapd vs11, vs2
-; CHECK-BE-NEXT:    xxswapd vs12, vs7
-; CHECK-BE-NEXT:    xxswapd vs13, vs3
-; CHECK-BE-NEXT:    xxswapd v2, vs1
-; CHECK-BE-NEXT:    xxswapd v3, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f5, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs4
+; CHECK-BE-NEXT:    lxv vs3, 32(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f6, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    mfvsrwz r5, f5
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f7, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrd v2, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f4
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f8, f8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f9
-; CHECK-BE-NEXT:    xscvdpsxws f10, f10
-; CHECK-BE-NEXT:    xscvdpsxws f11, f11
-; CHECK-BE-NEXT:    xscvdpsxws f12, f12
-; CHECK-BE-NEXT:    xscvdpsxws f13, f13
-; CHECK-BE-NEXT:    xscvdpsxws v2, v2
-; CHECK-BE-NEXT:    xscvdpsxws v3, v3
-; CHECK-BE-NEXT:    mfvsrwz r4, f6
-; CHECK-BE-NEXT:    mfvsrwz r5, f5
-; CHECK-BE-NEXT:    mfvsrwz r6, f4
-; CHECK-BE-NEXT:    mfvsrwz r7, f2
-; CHECK-BE-NEXT:    mfvsrwz r12, f7
-; CHECK-BE-NEXT:    mfvsrwz r0, f3
-; CHECK-BE-NEXT:    mfvsrwz r30, f1
-; CHECK-BE-NEXT:    mfvsrwz r29, f0
-; CHECK-BE-NEXT:    mfvsrwz r8, f8
-; CHECK-BE-NEXT:    mfvsrwz r9, f9
-; CHECK-BE-NEXT:    mfvsrwz r10, f10
-; CHECK-BE-NEXT:    mfvsrwz r11, f11
-; CHECK-BE-NEXT:    mfvsrwz r28, f12
-; CHECK-BE-NEXT:    mfvsrwz r27, f13
-; CHECK-BE-NEXT:    mfvsrwz r26, v2
-; CHECK-BE-NEXT:    mfvsrwz r25, v3
-; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f6
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    sldi r6, r6, 48
-; CHECK-BE-NEXT:    sldi r7, r7, 48
-; CHECK-BE-NEXT:    sldi r12, r12, 48
-; CHECK-BE-NEXT:    sldi r0, r0, 48
-; CHECK-BE-NEXT:    sldi r30, r30, 48
-; CHECK-BE-NEXT:    sldi r29, r29, 48
-; CHECK-BE-NEXT:    sldi r8, r8, 48
-; CHECK-BE-NEXT:    sldi r9, r9, 48
-; CHECK-BE-NEXT:    sldi r10, r10, 48
-; CHECK-BE-NEXT:    sldi r11, r11, 48
-; CHECK-BE-NEXT:    sldi r28, r28, 48
-; CHECK-BE-NEXT:    sldi r27, r27, 48
-; CHECK-BE-NEXT:    sldi r26, r26, 48
-; CHECK-BE-NEXT:    sldi r25, r25, 48
-; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    lxv vs0, 112(r4)
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mtvsrd v4, r6
-; CHECK-BE-NEXT:    mtvsrd v5, r7
-; CHECK-BE-NEXT:    mtvsrd v8, r12
-; CHECK-BE-NEXT:    mtvsrd v10, r0
-; CHECK-BE-NEXT:    mtvsrd v12, r30
-; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v0, r8
-; CHECK-BE-NEXT:    mtvsrd v1, r9
-; CHECK-BE-NEXT:    mtvsrd v6, r10
-; CHECK-BE-NEXT:    mtvsrd v7, r11
-; CHECK-BE-NEXT:    mtvsrd v9, r28
-; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v11, r27
-; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v13, r26
-; CHECK-BE-NEXT:    mtvsrd v14, r29
-; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v15, r25
-; CHECK-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    vmrghh v2, v2, v0
-; CHECK-BE-NEXT:    vmrghh v3, v3, v1
-; CHECK-BE-NEXT:    vmrghh v4, v4, v6
-; CHECK-BE-NEXT:    vmrghh v5, v5, v7
-; CHECK-BE-NEXT:    vmrghh v0, v8, v9
-; CHECK-BE-NEXT:    vmrghh v1, v10, v11
-; CHECK-BE-NEXT:    vmrghh v6, v12, v13
-; CHECK-BE-NEXT:    vmrghh v7, v14, v15
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f7
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f4
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v0, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    lxv vs2, 96(r4)
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v1, r5
+; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    lxv vs1, 80(r4)
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    vmrghh v4, v4, v1
+; CHECK-BE-NEXT:    mtvsrd v1, r5
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    vmrghh v5, v5, v1
+; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    lxv vs0, 64(r4)
+; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    vmrghw v3, v5, v4
-; CHECK-BE-NEXT:    vmrghw v4, v1, v0
-; CHECK-BE-NEXT:    vmrghw v5, v7, v6
+; CHECK-BE-NEXT:    xxmrghd vs3, v3, v2
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v4, r4
+; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    mtvsrd v1, r5
+; CHECK-BE-NEXT:    vmrghh v0, v0, v1
+; CHECK-BE-NEXT:    vmrghw v2, v2, v0
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v5, r4
+; CHECK-BE-NEXT:    vmrghh v4, v4, v5
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd vs0, v3, v2
-; CHECK-BE-NEXT:    xxmrghd vs1, v5, v4
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, <16 x double>* %0, align 128

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll Wed Jan  2 21:04:18 2019
@@ -28,27 +28,27 @@ define i64 @test2elt(<2 x double> %a) lo
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvdpuxws f0, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    xscvdpuxws f1, v2
+; CHECK-P9-NEXT:    mtvsrws v3, r3
 ; CHECK-P9-NEXT:    xscvdpuxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
-; CHECK-P9-NEXT:    mtvsrws v3, r4
-; CHECK-P9-NEXT:    vmrglw v2, v2, v3
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xscvdpuxws f0, v2
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    xscvdpuxws f1, v2
+; CHECK-BE-NEXT:    mtvsrws v3, r3
 ; CHECK-BE-NEXT:    xscvdpuxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r4, f0
-; CHECK-BE-NEXT:    mtvsrws v3, r4
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -129,42 +129,42 @@ define void @test8elt(<8 x i32>* noalias
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
-; CHECK-P9-NEXT:    lxv vs1, 48(r4)
 ; CHECK-P9-NEXT:    lxv vs2, 0(r4)
 ; CHECK-P9-NEXT:    lxv vs3, 16(r4)
 ; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
 ; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT:    xxmrgld vs3, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    lxv vs1, 48(r4)
 ; CHECK-P9-NEXT:    xvcvdpuxws v2, vs4
 ; CHECK-P9-NEXT:    xvcvdpuxws v3, vs2
-; CHECK-P9-NEXT:    xvcvdpuxws v4, vs3
-; CHECK-P9-NEXT:    xvcvdpuxws v5, vs0
+; CHECK-P9-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xvcvdpuxws v4, vs0
 ; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    xvcvdpuxws v3, vs2
 ; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    vmrgew v3, v4, v3
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 48(r4)
-; CHECK-BE-NEXT:    lxv vs1, 32(r4)
 ; CHECK-BE-NEXT:    lxv vs2, 16(r4)
 ; CHECK-BE-NEXT:    lxv vs3, 0(r4)
 ; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
 ; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT:    xxmrgld vs3, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
 ; CHECK-BE-NEXT:    xvcvdpuxws v2, vs4
 ; CHECK-BE-NEXT:    xvcvdpuxws v3, vs2
-; CHECK-BE-NEXT:    xvcvdpuxws v4, vs3
-; CHECK-BE-NEXT:    xvcvdpuxws v5, vs0
+; CHECK-BE-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xvcvdpuxws v4, vs0
 ; CHECK-BE-NEXT:    vmrgew v2, v3, v2
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    xvcvdpuxws v3, vs2
 ; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    vmrgew v3, v4, v3
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, <8 x double>* %0, align 64
@@ -227,74 +227,74 @@ define void @test16elt(<16 x i32>* noali
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
-; CHECK-P9-NEXT:    lxv vs1, 48(r4)
-; CHECK-P9-NEXT:    lxv vs2, 0(r4)
-; CHECK-P9-NEXT:    lxv vs3, 16(r4)
-; CHECK-P9-NEXT:    lxv vs4, 96(r4)
-; CHECK-P9-NEXT:    lxv vs5, 112(r4)
-; CHECK-P9-NEXT:    lxv vs6, 64(r4)
-; CHECK-P9-NEXT:    lxv vs7, 80(r4)
-; CHECK-P9-NEXT:    xxmrgld vs8, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT:    xxmrgld vs3, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrgld vs1, vs7, vs6
+; CHECK-P9-NEXT:    lxv vs6, 0(r4)
+; CHECK-P9-NEXT:    lxv vs7, 16(r4)
+; CHECK-P9-NEXT:    xxmrgld vs8, vs7, vs6
 ; CHECK-P9-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-P9-NEXT:    lxv vs4, 32(r4)
+; CHECK-P9-NEXT:    lxv vs5, 48(r4)
 ; CHECK-P9-NEXT:    xxmrgld vs7, vs5, vs4
 ; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
 ; CHECK-P9-NEXT:    xvcvdpuxws v2, vs8
-; CHECK-P9-NEXT:    xvcvdpuxws v3, vs2
-; CHECK-P9-NEXT:    xvcvdpuxws v4, vs3
-; CHECK-P9-NEXT:    xvcvdpuxws v5, vs0
-; CHECK-P9-NEXT:    xvcvdpuxws v0, vs1
-; CHECK-P9-NEXT:    xvcvdpuxws v1, vs6
-; CHECK-P9-NEXT:    xvcvdpuxws v6, vs7
-; CHECK-P9-NEXT:    xvcvdpuxws v7, vs4
+; CHECK-P9-NEXT:    xvcvdpuxws v3, vs6
+; CHECK-P9-NEXT:    lxv vs2, 64(r4)
+; CHECK-P9-NEXT:    lxv vs3, 80(r4)
+; CHECK-P9-NEXT:    xvcvdpuxws v4, vs7
 ; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    vmrgew v4, v1, v0
-; CHECK-P9-NEXT:    vmrgew v5, v7, v6
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    xvcvdpuxws v3, vs4
+; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT:    lxv vs0, 96(r4)
+; CHECK-P9-NEXT:    lxv vs1, 112(r4)
 ; CHECK-P9-NEXT:    stxv v2, 0(r3)
-; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    xvcvdpuxws v5, vs2
+; CHECK-P9-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xvcvdpuxws v0, vs0
+; CHECK-P9-NEXT:    vmrgew v3, v3, v4
+; CHECK-P9-NEXT:    xvcvdpuxws v4, vs4
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    vmrgew v4, v5, v4
 ; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    xvcvdpuxws v5, vs2
+; CHECK-P9-NEXT:    vmrgew v5, v0, v5
+; CHECK-P9-NEXT:    stxv v5, 48(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 48(r4)
-; CHECK-BE-NEXT:    lxv vs1, 32(r4)
-; CHECK-BE-NEXT:    lxv vs2, 16(r4)
-; CHECK-BE-NEXT:    lxv vs3, 0(r4)
-; CHECK-BE-NEXT:    lxv vs4, 112(r4)
-; CHECK-BE-NEXT:    lxv vs5, 96(r4)
-; CHECK-BE-NEXT:    lxv vs6, 80(r4)
-; CHECK-BE-NEXT:    lxv vs7, 64(r4)
-; CHECK-BE-NEXT:    xxmrgld vs8, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT:    xxmrgld vs3, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrgld vs1, vs7, vs6
+; CHECK-BE-NEXT:    lxv vs6, 16(r4)
+; CHECK-BE-NEXT:    lxv vs7, 0(r4)
+; CHECK-BE-NEXT:    xxmrgld vs8, vs7, vs6
 ; CHECK-BE-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    lxv vs5, 32(r4)
 ; CHECK-BE-NEXT:    xxmrgld vs7, vs5, vs4
 ; CHECK-BE-NEXT:    xxmrghd vs4, vs5, vs4
 ; CHECK-BE-NEXT:    xvcvdpuxws v2, vs8
-; CHECK-BE-NEXT:    xvcvdpuxws v3, vs2
-; CHECK-BE-NEXT:    xvcvdpuxws v4, vs3
-; CHECK-BE-NEXT:    xvcvdpuxws v5, vs0
-; CHECK-BE-NEXT:    xvcvdpuxws v0, vs1
-; CHECK-BE-NEXT:    xvcvdpuxws v1, vs6
-; CHECK-BE-NEXT:    xvcvdpuxws v6, vs7
-; CHECK-BE-NEXT:    xvcvdpuxws v7, vs4
+; CHECK-BE-NEXT:    xvcvdpuxws v3, vs6
+; CHECK-BE-NEXT:    lxv vs2, 80(r4)
+; CHECK-BE-NEXT:    lxv vs3, 64(r4)
+; CHECK-BE-NEXT:    xvcvdpuxws v4, vs7
 ; CHECK-BE-NEXT:    vmrgew v2, v3, v2
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    vmrgew v4, v1, v0
-; CHECK-BE-NEXT:    vmrgew v5, v7, v6
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    xvcvdpuxws v3, vs4
+; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-BE-NEXT:    lxv vs0, 112(r4)
+; CHECK-BE-NEXT:    lxv vs1, 96(r4)
 ; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    xvcvdpuxws v5, vs2
+; CHECK-BE-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xvcvdpuxws v0, vs0
+; CHECK-BE-NEXT:    vmrgew v3, v3, v4
+; CHECK-BE-NEXT:    xvcvdpuxws v4, vs4
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    vmrgew v4, v5, v4
 ; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    xvcvdpuxws v5, vs2
+; CHECK-BE-NEXT:    vmrgew v5, v0, v5
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, <16 x double>* %0, align 128
@@ -322,27 +322,27 @@ define i64 @test2elt_signed(<2 x double>
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvdpsxws f0, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    xscvdpsxws f1, v2
+; CHECK-P9-NEXT:    mtvsrws v3, r3
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
-; CHECK-P9-NEXT:    mtvsrws v3, r4
-; CHECK-P9-NEXT:    vmrglw v2, v2, v3
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, v2
+; CHECK-BE-NEXT:    mtvsrws v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r4, f0
-; CHECK-BE-NEXT:    mtvsrws v3, r4
-; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -423,42 +423,42 @@ define void @test8elt_signed(<8 x i32>*
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
-; CHECK-P9-NEXT:    lxv vs1, 48(r4)
 ; CHECK-P9-NEXT:    lxv vs2, 0(r4)
 ; CHECK-P9-NEXT:    lxv vs3, 16(r4)
 ; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
 ; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT:    xxmrgld vs3, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
+; CHECK-P9-NEXT:    lxv vs1, 48(r4)
 ; CHECK-P9-NEXT:    xvcvdpsxws v2, vs4
 ; CHECK-P9-NEXT:    xvcvdpsxws v3, vs2
-; CHECK-P9-NEXT:    xvcvdpsxws v4, vs3
-; CHECK-P9-NEXT:    xvcvdpsxws v5, vs0
+; CHECK-P9-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xvcvdpsxws v4, vs0
 ; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    xvcvdpsxws v3, vs2
 ; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    vmrgew v3, v4, v3
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 48(r4)
-; CHECK-BE-NEXT:    lxv vs1, 32(r4)
 ; CHECK-BE-NEXT:    lxv vs2, 16(r4)
 ; CHECK-BE-NEXT:    lxv vs3, 0(r4)
 ; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
 ; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT:    xxmrgld vs3, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    lxv vs0, 48(r4)
+; CHECK-BE-NEXT:    lxv vs1, 32(r4)
 ; CHECK-BE-NEXT:    xvcvdpsxws v2, vs4
 ; CHECK-BE-NEXT:    xvcvdpsxws v3, vs2
-; CHECK-BE-NEXT:    xvcvdpsxws v4, vs3
-; CHECK-BE-NEXT:    xvcvdpsxws v5, vs0
+; CHECK-BE-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xvcvdpsxws v4, vs0
 ; CHECK-BE-NEXT:    vmrgew v2, v3, v2
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    xvcvdpsxws v3, vs2
 ; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    vmrgew v3, v4, v3
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x double>, <8 x double>* %0, align 64
@@ -521,74 +521,74 @@ define void @test16elt_signed(<16 x i32>
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
-; CHECK-P9-NEXT:    lxv vs1, 48(r4)
-; CHECK-P9-NEXT:    lxv vs2, 0(r4)
-; CHECK-P9-NEXT:    lxv vs3, 16(r4)
-; CHECK-P9-NEXT:    lxv vs4, 96(r4)
-; CHECK-P9-NEXT:    lxv vs5, 112(r4)
-; CHECK-P9-NEXT:    lxv vs6, 64(r4)
-; CHECK-P9-NEXT:    lxv vs7, 80(r4)
-; CHECK-P9-NEXT:    xxmrgld vs8, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT:    xxmrgld vs3, vs1, vs0
-; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-P9-NEXT:    xxmrgld vs1, vs7, vs6
+; CHECK-P9-NEXT:    lxv vs6, 0(r4)
+; CHECK-P9-NEXT:    lxv vs7, 16(r4)
+; CHECK-P9-NEXT:    xxmrgld vs8, vs7, vs6
 ; CHECK-P9-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-P9-NEXT:    lxv vs4, 32(r4)
+; CHECK-P9-NEXT:    lxv vs5, 48(r4)
 ; CHECK-P9-NEXT:    xxmrgld vs7, vs5, vs4
 ; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
 ; CHECK-P9-NEXT:    xvcvdpsxws v2, vs8
-; CHECK-P9-NEXT:    xvcvdpsxws v3, vs2
-; CHECK-P9-NEXT:    xvcvdpsxws v4, vs3
-; CHECK-P9-NEXT:    xvcvdpsxws v5, vs0
-; CHECK-P9-NEXT:    xvcvdpsxws v0, vs1
-; CHECK-P9-NEXT:    xvcvdpsxws v1, vs6
-; CHECK-P9-NEXT:    xvcvdpsxws v6, vs7
-; CHECK-P9-NEXT:    xvcvdpsxws v7, vs4
+; CHECK-P9-NEXT:    xvcvdpsxws v3, vs6
+; CHECK-P9-NEXT:    lxv vs2, 64(r4)
+; CHECK-P9-NEXT:    lxv vs3, 80(r4)
+; CHECK-P9-NEXT:    xvcvdpsxws v4, vs7
 ; CHECK-P9-NEXT:    vmrgew v2, v3, v2
-; CHECK-P9-NEXT:    vmrgew v3, v5, v4
-; CHECK-P9-NEXT:    vmrgew v4, v1, v0
-; CHECK-P9-NEXT:    vmrgew v5, v7, v6
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    xvcvdpsxws v3, vs4
+; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-P9-NEXT:    lxv vs0, 96(r4)
+; CHECK-P9-NEXT:    lxv vs1, 112(r4)
 ; CHECK-P9-NEXT:    stxv v2, 0(r3)
-; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    xvcvdpsxws v5, vs2
+; CHECK-P9-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-P9-NEXT:    xvcvdpsxws v0, vs0
+; CHECK-P9-NEXT:    vmrgew v3, v3, v4
+; CHECK-P9-NEXT:    xvcvdpsxws v4, vs4
+; CHECK-P9-NEXT:    stxv v3, 16(r3)
+; CHECK-P9-NEXT:    vmrgew v4, v5, v4
 ; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    xvcvdpsxws v5, vs2
+; CHECK-P9-NEXT:    vmrgew v5, v0, v5
+; CHECK-P9-NEXT:    stxv v5, 48(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 48(r4)
-; CHECK-BE-NEXT:    lxv vs1, 32(r4)
-; CHECK-BE-NEXT:    lxv vs2, 16(r4)
-; CHECK-BE-NEXT:    lxv vs3, 0(r4)
-; CHECK-BE-NEXT:    lxv vs4, 112(r4)
-; CHECK-BE-NEXT:    lxv vs5, 96(r4)
-; CHECK-BE-NEXT:    lxv vs6, 80(r4)
-; CHECK-BE-NEXT:    lxv vs7, 64(r4)
-; CHECK-BE-NEXT:    xxmrgld vs8, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT:    xxmrgld vs3, vs1, vs0
-; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
-; CHECK-BE-NEXT:    xxmrgld vs1, vs7, vs6
+; CHECK-BE-NEXT:    lxv vs6, 16(r4)
+; CHECK-BE-NEXT:    lxv vs7, 0(r4)
+; CHECK-BE-NEXT:    xxmrgld vs8, vs7, vs6
 ; CHECK-BE-NEXT:    xxmrghd vs6, vs7, vs6
+; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    lxv vs5, 32(r4)
 ; CHECK-BE-NEXT:    xxmrgld vs7, vs5, vs4
 ; CHECK-BE-NEXT:    xxmrghd vs4, vs5, vs4
 ; CHECK-BE-NEXT:    xvcvdpsxws v2, vs8
-; CHECK-BE-NEXT:    xvcvdpsxws v3, vs2
-; CHECK-BE-NEXT:    xvcvdpsxws v4, vs3
-; CHECK-BE-NEXT:    xvcvdpsxws v5, vs0
-; CHECK-BE-NEXT:    xvcvdpsxws v0, vs1
-; CHECK-BE-NEXT:    xvcvdpsxws v1, vs6
-; CHECK-BE-NEXT:    xvcvdpsxws v6, vs7
-; CHECK-BE-NEXT:    xvcvdpsxws v7, vs4
+; CHECK-BE-NEXT:    xvcvdpsxws v3, vs6
+; CHECK-BE-NEXT:    lxv vs2, 80(r4)
+; CHECK-BE-NEXT:    lxv vs3, 64(r4)
+; CHECK-BE-NEXT:    xvcvdpsxws v4, vs7
 ; CHECK-BE-NEXT:    vmrgew v2, v3, v2
-; CHECK-BE-NEXT:    vmrgew v3, v5, v4
-; CHECK-BE-NEXT:    vmrgew v4, v1, v0
-; CHECK-BE-NEXT:    vmrgew v5, v7, v6
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    xvcvdpsxws v3, vs4
+; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
+; CHECK-BE-NEXT:    lxv vs0, 112(r4)
+; CHECK-BE-NEXT:    lxv vs1, 96(r4)
 ; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    xvcvdpsxws v5, vs2
+; CHECK-BE-NEXT:    xxmrgld vs2, vs1, vs0
+; CHECK-BE-NEXT:    xxmrghd vs0, vs1, vs0
+; CHECK-BE-NEXT:    xvcvdpsxws v0, vs0
+; CHECK-BE-NEXT:    vmrgew v3, v3, v4
+; CHECK-BE-NEXT:    xvcvdpsxws v4, vs4
+; CHECK-BE-NEXT:    stxv v3, 16(r3)
+; CHECK-BE-NEXT:    vmrgew v4, v5, v4
 ; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    xvcvdpsxws v5, vs2
+; CHECK-BE-NEXT:    vmrgew v5, v0, v5
+; CHECK-BE-NEXT:    stxv v5, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, <16 x double>* %0, align 128

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll Wed Jan  2 21:04:18 2019
@@ -31,17 +31,17 @@ define i16 @test2elt(<2 x double> %a) lo
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvdpsxws f0, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
 ; CHECK-P9-NEXT:    addi r3, r1, -2
-; CHECK-P9-NEXT:    mtvsrd f1, r4
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
 ; CHECK-P9-NEXT:    stxsihx v2, 0, r3
 ; CHECK-P9-NEXT:    lhz r3, -2(r1)
@@ -49,17 +49,17 @@ define i16 @test2elt(<2 x double> %a) lo
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mfvsrwz r4, f0
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vmrghb v2, v3, v2
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
@@ -103,58 +103,58 @@ define i32 @test4elt(<4 x double>* nocap
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
-; CHECK-P9-NEXT:    mfvsrwz r6, f3
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
-; CHECK-P9-NEXT:    vmrglb v3, v4, v5
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrwz r3, f1
-; CHECK-BE-NEXT:    mfvsrwz r5, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r5, r5, 56
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
-; CHECK-BE-NEXT:    mfvsrwz r6, f3
-; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
-; CHECK-BE-NEXT:    vmrghb v3, v4, v5
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -224,104 +224,104 @@ define i64 @test8elt(<8 x double>* nocap
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs3, 0(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs4, vs3
-; CHECK-P9-NEXT:    xxswapd vs5, vs2
-; CHECK-P9-NEXT:    xxswapd vs6, vs1
-; CHECK-P9-NEXT:    xxswapd vs7, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    xxswapd v2, vs4
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mfvsrwz r5, f2
-; CHECK-P9-NEXT:    mfvsrwz r7, f1
-; CHECK-P9-NEXT:    mfvsrwz r9, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mfvsrwz r4, f4
-; CHECK-P9-NEXT:    mfvsrwz r6, f5
-; CHECK-P9-NEXT:    mfvsrwz r8, f6
-; CHECK-P9-NEXT:    mfvsrwz r10, f7
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    xxswapd v0, vs4
-; CHECK-P9-NEXT:    xxswapd v6, vs6
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    xxswapd v1, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
-; CHECK-P9-NEXT:    vmrglb v3, v4, v5
-; CHECK-P9-NEXT:    vmrglb v4, v0, v1
-; CHECK-P9-NEXT:    vmrglb v5, v6, v7
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglh v3, v5, v4
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    xxswapd vs4, vs3
-; CHECK-BE-NEXT:    xxswapd vs5, vs2
-; CHECK-BE-NEXT:    xxswapd vs6, vs1
-; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
-; CHECK-BE-NEXT:    mfvsrwz r7, f1
-; CHECK-BE-NEXT:    mfvsrwz r9, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r5, r5, 56
-; CHECK-BE-NEXT:    sldi r7, r7, 56
-; CHECK-BE-NEXT:    sldi r9, r9, 56
-; CHECK-BE-NEXT:    mfvsrwz r4, f4
-; CHECK-BE-NEXT:    mfvsrwz r6, f5
-; CHECK-BE-NEXT:    mfvsrwz r8, f6
-; CHECK-BE-NEXT:    mfvsrwz r10, f7
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v6, r9
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
-; CHECK-BE-NEXT:    sldi r8, r8, 56
-; CHECK-BE-NEXT:    sldi r10, r10, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghb v2, v2, v3
-; CHECK-BE-NEXT:    vmrghb v3, v4, v5
-; CHECK-BE-NEXT:    vmrghb v4, v0, v1
-; CHECK-BE-NEXT:    vmrghb v5, v6, v7
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v5, v4
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -441,219 +441,199 @@ define <16 x i8> @test16elt(<16 x double
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 48(r3)
-; CHECK-P9-NEXT:    lxv vs3, 32(r3)
-; CHECK-P9-NEXT:    lxv vs4, 16(r3)
-; CHECK-P9-NEXT:    lxv vs5, 0(r3)
+; CHECK-P9-NEXT:    lxv vs7, 0(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f8, f7
+; CHECK-P9-NEXT:    xxswapd vs7, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    lxv vs0, 112(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 96(r3)
-; CHECK-P9-NEXT:    lxv vs6, 80(r3)
-; CHECK-P9-NEXT:    lxv vs7, 64(r3)
-; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxswapd vs8, vs5
-; CHECK-P9-NEXT:    xxswapd vs9, vs4
-; CHECK-P9-NEXT:    xxswapd vs10, vs3
-; CHECK-P9-NEXT:    xxswapd vs11, vs2
-; CHECK-P9-NEXT:    xxswapd vs12, vs7
-; CHECK-P9-NEXT:    xxswapd vs13, vs6
-; CHECK-P9-NEXT:    xxswapd v2, vs1
-; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    lxv vs2, 80(r3)
+; CHECK-P9-NEXT:    lxv vs3, 64(r3)
+; CHECK-P9-NEXT:    lxv vs4, 48(r3)
+; CHECK-P9-NEXT:    lxv vs5, 32(r3)
+; CHECK-P9-NEXT:    lxv vs6, 16(r3)
+; CHECK-P9-NEXT:    mfvsrwz r3, f8
+; CHECK-P9-NEXT:    mtvsrd f8, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f7
+; CHECK-P9-NEXT:    xxswapd v2, vs8
+; CHECK-P9-NEXT:    mtvsrd f7, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f7, f6
+; CHECK-P9-NEXT:    xxswapd vs6, vs6
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    mfvsrwz r3, f7
+; CHECK-P9-NEXT:    mtvsrd f7, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f6
+; CHECK-P9-NEXT:    mtvsrd f6, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs6
+; CHECK-P9-NEXT:    xscvdpsxws f6, f5
+; CHECK-P9-NEXT:    xxswapd vs5, vs5
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    mfvsrwz r3, f6
+; CHECK-P9-NEXT:    mtvsrd f6, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f5
+; CHECK-P9-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-NEXT:    xxswapd v3, vs7
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v3, vs6
+; CHECK-P9-NEXT:    mtvsrd f5, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs5
+; CHECK-P9-NEXT:    xscvdpsxws f5, f4
+; CHECK-P9-NEXT:    xxswapd vs4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mfvsrwz r3, f5
+; CHECK-P9-NEXT:    mtvsrd f5, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs5
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v3, vs4
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs3
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f8, f8
-; CHECK-P9-NEXT:    xscvdpsxws f9, f9
-; CHECK-P9-NEXT:    xscvdpsxws f10, f10
-; CHECK-P9-NEXT:    xscvdpsxws f11, f11
-; CHECK-P9-NEXT:    xscvdpsxws f12, f12
-; CHECK-P9-NEXT:    xscvdpsxws f13, f13
-; CHECK-P9-NEXT:    xscvdpsxws v2, v2
-; CHECK-P9-NEXT:    xscvdpsxws v3, v3
-; CHECK-P9-NEXT:    mfvsrwz r3, f5
-; CHECK-P9-NEXT:    mfvsrwz r4, f4
-; CHECK-P9-NEXT:    mfvsrwz r5, f3
-; CHECK-P9-NEXT:    mfvsrwz r6, f2
-; CHECK-P9-NEXT:    mfvsrwz r11, f7
-; CHECK-P9-NEXT:    mfvsrwz r12, f6
-; CHECK-P9-NEXT:    mfvsrwz r0, f1
-; CHECK-P9-NEXT:    mfvsrwz r30, f0
-; CHECK-P9-NEXT:    mfvsrwz r7, f8
-; CHECK-P9-NEXT:    mfvsrwz r8, f9
-; CHECK-P9-NEXT:    mfvsrwz r9, f10
-; CHECK-P9-NEXT:    mfvsrwz r10, f11
-; CHECK-P9-NEXT:    mfvsrwz r29, f12
-; CHECK-P9-NEXT:    mfvsrwz r28, f13
-; CHECK-P9-NEXT:    mfvsrwz r27, v2
-; CHECK-P9-NEXT:    mfvsrwz r26, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f8, r11
-; CHECK-P9-NEXT:    mtvsrd f9, r12
-; CHECK-P9-NEXT:    mtvsrd f10, r0
-; CHECK-P9-NEXT:    mtvsrd f11, r30
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    mtvsrd f12, r29
-; CHECK-P9-NEXT:    mtvsrd f13, r28
-; CHECK-P9-NEXT:    mtvsrd v2, r27
-; CHECK-P9-NEXT:    mtvsrd v3, r26
-; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
-; CHECK-P9-NEXT:    xxswapd v0, vs2
-; CHECK-P9-NEXT:    xxswapd v1, vs3
-; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxswapd v6, vs4
-; CHECK-P9-NEXT:    xxswapd v7, vs5
-; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxswapd v8, vs6
-; CHECK-P9-NEXT:    xxswapd v9, vs7
-; CHECK-P9-NEXT:    xxswapd v10, vs8
-; CHECK-P9-NEXT:    xxswapd v11, vs12
-; CHECK-P9-NEXT:    xxswapd v12, vs9
-; CHECK-P9-NEXT:    xxswapd v13, vs13
-; CHECK-P9-NEXT:    xxswapd v14, vs10
-; CHECK-P9-NEXT:    xxswapd v2, v2
-; CHECK-P9-NEXT:    xxswapd v15, vs11
-; CHECK-P9-NEXT:    xxswapd v3, v3
-; CHECK-P9-NEXT:    vmrglb v4, v4, v6
-; CHECK-P9-NEXT:    vmrglb v5, v5, v7
-; CHECK-P9-NEXT:    vmrglb v0, v0, v8
-; CHECK-P9-NEXT:    vmrglb v1, v1, v9
-; CHECK-P9-NEXT:    vmrglb v6, v10, v11
-; CHECK-P9-NEXT:    vmrglb v7, v12, v13
-; CHECK-P9-NEXT:    vmrglb v2, v14, v2
-; CHECK-P9-NEXT:    vmrglb v3, v15, v3
+; CHECK-P9-NEXT:    xxswapd v0, vs0
+; CHECK-P9-NEXT:    vmrglb v5, v5, v0
 ; CHECK-P9-NEXT:    vmrglh v4, v5, v4
-; CHECK-P9-NEXT:    vmrglh v5, v1, v0
-; CHECK-P9-NEXT:    vmrglh v0, v7, v6
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglw v3, v5, v4
-; CHECK-P9-NEXT:    vmrglw v2, v2, v0
-; CHECK-P9-NEXT:    xxmrgld v2, v2, v3
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
+; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 64(r3)
-; CHECK-BE-NEXT:    lxv vs3, 80(r3)
-; CHECK-BE-NEXT:    lxv vs4, 96(r3)
-; CHECK-BE-NEXT:    lxv vs5, 112(r3)
+; CHECK-BE-NEXT:    lxv vs7, 112(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f8, f7
+; CHECK-BE-NEXT:    xxswapd vs7, vs7
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    lxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs6, 32(r3)
-; CHECK-BE-NEXT:    lxv vs7, 48(r3)
-; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxswapd vs8, vs5
-; CHECK-BE-NEXT:    xxswapd vs9, vs4
-; CHECK-BE-NEXT:    xxswapd vs10, vs3
-; CHECK-BE-NEXT:    xxswapd vs11, vs2
-; CHECK-BE-NEXT:    xxswapd vs12, vs7
-; CHECK-BE-NEXT:    xxswapd vs13, vs6
-; CHECK-BE-NEXT:    xxswapd v2, vs1
-; CHECK-BE-NEXT:    xxswapd v3, vs0
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    lxv vs4, 64(r3)
+; CHECK-BE-NEXT:    lxv vs5, 80(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f8
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f7
+; CHECK-BE-NEXT:    xscvdpsxws f7, f6
+; CHECK-BE-NEXT:    xxswapd vs6, vs6
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f7
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f6
+; CHECK-BE-NEXT:    xscvdpsxws f6, f5
+; CHECK-BE-NEXT:    xxswapd vs5, vs5
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f6
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs4
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f5
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f8, f8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f9
-; CHECK-BE-NEXT:    xscvdpsxws f10, f10
-; CHECK-BE-NEXT:    xscvdpsxws f11, f11
-; CHECK-BE-NEXT:    xscvdpsxws f12, f12
-; CHECK-BE-NEXT:    xscvdpsxws f13, f13
-; CHECK-BE-NEXT:    xscvdpsxws v2, v2
-; CHECK-BE-NEXT:    xscvdpsxws v3, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f5
-; CHECK-BE-NEXT:    mfvsrwz r4, f4
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
-; CHECK-BE-NEXT:    mfvsrwz r6, f2
-; CHECK-BE-NEXT:    mfvsrwz r11, f7
-; CHECK-BE-NEXT:    mfvsrwz r12, f6
-; CHECK-BE-NEXT:    mfvsrwz r0, f1
-; CHECK-BE-NEXT:    mfvsrwz r30, f0
-; CHECK-BE-NEXT:    mfvsrwz r7, f8
-; CHECK-BE-NEXT:    mfvsrwz r8, f9
-; CHECK-BE-NEXT:    mfvsrwz r9, f10
-; CHECK-BE-NEXT:    mfvsrwz r10, f11
-; CHECK-BE-NEXT:    mfvsrwz r29, f12
-; CHECK-BE-NEXT:    mfvsrwz r28, f13
-; CHECK-BE-NEXT:    mfvsrwz r27, v2
-; CHECK-BE-NEXT:    mfvsrwz r26, v3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r5, r5, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
-; CHECK-BE-NEXT:    sldi r11, r11, 56
-; CHECK-BE-NEXT:    sldi r12, r12, 56
-; CHECK-BE-NEXT:    sldi r0, r0, 56
-; CHECK-BE-NEXT:    sldi r30, r30, 56
-; CHECK-BE-NEXT:    sldi r7, r7, 56
-; CHECK-BE-NEXT:    sldi r8, r8, 56
-; CHECK-BE-NEXT:    sldi r9, r9, 56
-; CHECK-BE-NEXT:    sldi r10, r10, 56
-; CHECK-BE-NEXT:    sldi r29, r29, 56
-; CHECK-BE-NEXT:    sldi r28, r28, 56
-; CHECK-BE-NEXT:    sldi r27, r27, 56
-; CHECK-BE-NEXT:    sldi r26, r26, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v8, r11
-; CHECK-BE-NEXT:    mtvsrd v10, r12
-; CHECK-BE-NEXT:    mtvsrd v12, r0
-; CHECK-BE-NEXT:    mtvsrd v14, r30
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v6, r9
-; CHECK-BE-NEXT:    mtvsrd v7, r10
-; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v9, r29
-; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v11, r28
-; CHECK-BE-NEXT:    mtvsrd v13, r27
-; CHECK-BE-NEXT:    mtvsrd v15, r26
-; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    vmrghb v2, v2, v0
-; CHECK-BE-NEXT:    vmrghb v3, v3, v1
-; CHECK-BE-NEXT:    vmrghb v4, v4, v6
-; CHECK-BE-NEXT:    vmrghb v5, v5, v7
-; CHECK-BE-NEXT:    vmrghb v0, v8, v9
-; CHECK-BE-NEXT:    vmrghb v1, v10, v11
-; CHECK-BE-NEXT:    vmrghb v6, v12, v13
-; CHECK-BE-NEXT:    vmrghb v7, v14, v15
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v5, v4
-; CHECK-BE-NEXT:    vmrghh v4, v1, v0
-; CHECK-BE-NEXT:    vmrghh v5, v7, v6
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v0, r3
+; CHECK-BE-NEXT:    vmrghb v5, v5, v0
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -684,17 +664,17 @@ define i16 @test2elt_signed(<2 x double>
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvdpsxws f0, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
 ; CHECK-P9-NEXT:    addi r3, r1, -2
-; CHECK-P9-NEXT:    mtvsrd f1, r4
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 8
 ; CHECK-P9-NEXT:    stxsihx v2, 0, r3
 ; CHECK-P9-NEXT:    lhz r3, -2(r1)
@@ -702,17 +682,17 @@ define i16 @test2elt_signed(<2 x double>
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xscvdpsxws f0, v2
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    xscvdpsxws f1, v2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    mfvsrwz r4, f0
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    vmrghb v2, v3, v2
 ; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, 0, r3
 ; CHECK-BE-NEXT:    lhz r3, -2(r1)
@@ -756,58 +736,58 @@ define i32 @test4elt_signed(<4 x double>
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs2, vs1
-; CHECK-P9-NEXT:    xxswapd vs3, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    xxswapd v2, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
-; CHECK-P9-NEXT:    mfvsrwz r6, f3
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    xxswapd v2, vs0
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
-; CHECK-P9-NEXT:    vmrglb v3, v4, v5
+; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    xxswapd vs3, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrwz r3, f1
-; CHECK-BE-NEXT:    mfvsrwz r5, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r5, r5, 56
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
-; CHECK-BE-NEXT:    mfvsrwz r6, f3
-; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v4, r5
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    li r3, 0
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    vmrghb v2, v2, v3
-; CHECK-BE-NEXT:    vmrghb v3, v4, v5
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    vextuwlx r3, r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -877,104 +857,104 @@ define i64 @test8elt_signed(<8 x double>
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    lxv vs3, 0(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    xxswapd vs4, vs3
-; CHECK-P9-NEXT:    xxswapd vs5, vs2
-; CHECK-P9-NEXT:    xxswapd vs6, vs1
-; CHECK-P9-NEXT:    xxswapd vs7, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    xxswapd v2, vs4
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mfvsrwz r5, f2
-; CHECK-P9-NEXT:    mfvsrwz r7, f1
-; CHECK-P9-NEXT:    mfvsrwz r9, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mfvsrwz r4, f4
-; CHECK-P9-NEXT:    mfvsrwz r6, f5
-; CHECK-P9-NEXT:    mfvsrwz r8, f6
-; CHECK-P9-NEXT:    mfvsrwz r10, f7
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    xxswapd v0, vs4
-; CHECK-P9-NEXT:    xxswapd v6, vs6
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    xxswapd v5, vs3
-; CHECK-P9-NEXT:    xxswapd v1, vs5
-; CHECK-P9-NEXT:    xxswapd v7, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
-; CHECK-P9-NEXT:    vmrglb v3, v4, v5
-; CHECK-P9-NEXT:    vmrglb v4, v0, v1
-; CHECK-P9-NEXT:    vmrglb v5, v6, v7
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglh v3, v5, v4
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
+; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs0
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    xxswapd vs4, vs3
-; CHECK-BE-NEXT:    xxswapd vs5, vs2
-; CHECK-BE-NEXT:    xxswapd vs6, vs1
-; CHECK-BE-NEXT:    xxswapd vs7, vs0
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
-; CHECK-BE-NEXT:    mfvsrwz r7, f1
-; CHECK-BE-NEXT:    mfvsrwz r9, f0
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r5, r5, 56
-; CHECK-BE-NEXT:    sldi r7, r7, 56
-; CHECK-BE-NEXT:    sldi r9, r9, 56
-; CHECK-BE-NEXT:    mfvsrwz r4, f4
-; CHECK-BE-NEXT:    mfvsrwz r6, f5
-; CHECK-BE-NEXT:    mfvsrwz r8, f6
-; CHECK-BE-NEXT:    mfvsrwz r10, f7
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v6, r9
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
-; CHECK-BE-NEXT:    sldi r8, r8, 56
-; CHECK-BE-NEXT:    sldi r10, r10, 56
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v7, r10
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghb v2, v2, v3
-; CHECK-BE-NEXT:    vmrghb v3, v4, v5
-; CHECK-BE-NEXT:    vmrghb v4, v0, v1
-; CHECK-BE-NEXT:    vmrghb v5, v6, v7
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v5, v4
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f0, f0
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
@@ -1094,219 +1074,199 @@ define <16 x i8> @test16elt_signed(<16 x
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs2, 48(r3)
-; CHECK-P9-NEXT:    lxv vs3, 32(r3)
-; CHECK-P9-NEXT:    lxv vs4, 16(r3)
-; CHECK-P9-NEXT:    lxv vs5, 0(r3)
+; CHECK-P9-NEXT:    lxv vs7, 0(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f8, f7
+; CHECK-P9-NEXT:    xxswapd vs7, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    lxv vs0, 112(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 96(r3)
-; CHECK-P9-NEXT:    lxv vs6, 80(r3)
-; CHECK-P9-NEXT:    lxv vs7, 64(r3)
-; CHECK-P9-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    xxswapd vs8, vs5
-; CHECK-P9-NEXT:    xxswapd vs9, vs4
-; CHECK-P9-NEXT:    xxswapd vs10, vs3
-; CHECK-P9-NEXT:    xxswapd vs11, vs2
-; CHECK-P9-NEXT:    xxswapd vs12, vs7
-; CHECK-P9-NEXT:    xxswapd vs13, vs6
-; CHECK-P9-NEXT:    xxswapd v2, vs1
-; CHECK-P9-NEXT:    xxswapd v3, vs0
+; CHECK-P9-NEXT:    lxv vs2, 80(r3)
+; CHECK-P9-NEXT:    lxv vs3, 64(r3)
+; CHECK-P9-NEXT:    lxv vs4, 48(r3)
+; CHECK-P9-NEXT:    lxv vs5, 32(r3)
+; CHECK-P9-NEXT:    lxv vs6, 16(r3)
+; CHECK-P9-NEXT:    mfvsrwz r3, f8
+; CHECK-P9-NEXT:    mtvsrd f8, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f7
+; CHECK-P9-NEXT:    xxswapd v2, vs8
+; CHECK-P9-NEXT:    mtvsrd f7, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs7
+; CHECK-P9-NEXT:    xscvdpsxws f7, f6
+; CHECK-P9-NEXT:    xxswapd vs6, vs6
+; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    mfvsrwz r3, f7
+; CHECK-P9-NEXT:    mtvsrd f7, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f6
+; CHECK-P9-NEXT:    mtvsrd f6, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs6
+; CHECK-P9-NEXT:    xscvdpsxws f6, f5
+; CHECK-P9-NEXT:    xxswapd vs5, vs5
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f5
+; CHECK-P9-NEXT:    mfvsrwz r3, f6
+; CHECK-P9-NEXT:    mtvsrd f6, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f5
+; CHECK-P9-NEXT:    vmrglb v2, v2, v3
+; CHECK-P9-NEXT:    xxswapd v3, vs7
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v3, vs6
+; CHECK-P9-NEXT:    mtvsrd f5, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs5
+; CHECK-P9-NEXT:    xscvdpsxws f5, f4
+; CHECK-P9-NEXT:    xxswapd vs4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
+; CHECK-P9-NEXT:    mfvsrwz r3, f5
+; CHECK-P9-NEXT:    mtvsrd f5, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs5
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f4
+; CHECK-P9-NEXT:    mtvsrd f4, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f2
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
-; CHECK-P9-NEXT:    xscvdpsxws f6, f6
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f1
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v3, vs4
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
+; CHECK-P9-NEXT:    xxswapd v4, vs3
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f1, f0
+; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    xscvdpsxws f8, f8
-; CHECK-P9-NEXT:    xscvdpsxws f9, f9
-; CHECK-P9-NEXT:    xscvdpsxws f10, f10
-; CHECK-P9-NEXT:    xscvdpsxws f11, f11
-; CHECK-P9-NEXT:    xscvdpsxws f12, f12
-; CHECK-P9-NEXT:    xscvdpsxws f13, f13
-; CHECK-P9-NEXT:    xscvdpsxws v2, v2
-; CHECK-P9-NEXT:    xscvdpsxws v3, v3
-; CHECK-P9-NEXT:    mfvsrwz r3, f5
-; CHECK-P9-NEXT:    mfvsrwz r4, f4
-; CHECK-P9-NEXT:    mfvsrwz r5, f3
-; CHECK-P9-NEXT:    mfvsrwz r6, f2
-; CHECK-P9-NEXT:    mfvsrwz r11, f7
-; CHECK-P9-NEXT:    mfvsrwz r12, f6
-; CHECK-P9-NEXT:    mfvsrwz r0, f1
-; CHECK-P9-NEXT:    mfvsrwz r30, f0
-; CHECK-P9-NEXT:    mfvsrwz r7, f8
-; CHECK-P9-NEXT:    mfvsrwz r8, f9
-; CHECK-P9-NEXT:    mfvsrwz r9, f10
-; CHECK-P9-NEXT:    mfvsrwz r10, f11
-; CHECK-P9-NEXT:    mfvsrwz r29, f12
-; CHECK-P9-NEXT:    mfvsrwz r28, f13
-; CHECK-P9-NEXT:    mfvsrwz r27, v2
-; CHECK-P9-NEXT:    mfvsrwz r26, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mtvsrd f3, r6
-; CHECK-P9-NEXT:    mtvsrd f8, r11
-; CHECK-P9-NEXT:    mtvsrd f9, r12
-; CHECK-P9-NEXT:    mtvsrd f10, r0
-; CHECK-P9-NEXT:    mtvsrd f11, r30
-; CHECK-P9-NEXT:    mtvsrd f4, r7
-; CHECK-P9-NEXT:    mtvsrd f5, r8
-; CHECK-P9-NEXT:    mtvsrd f6, r9
-; CHECK-P9-NEXT:    mtvsrd f7, r10
-; CHECK-P9-NEXT:    mtvsrd f12, r29
-; CHECK-P9-NEXT:    mtvsrd f13, r28
-; CHECK-P9-NEXT:    mtvsrd v2, r27
-; CHECK-P9-NEXT:    mtvsrd v3, r26
-; CHECK-P9-NEXT:    xxswapd v4, vs0
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
-; CHECK-P9-NEXT:    xxswapd v0, vs2
-; CHECK-P9-NEXT:    xxswapd v1, vs3
-; CHECK-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxswapd v6, vs4
-; CHECK-P9-NEXT:    xxswapd v7, vs5
-; CHECK-P9-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-P9-NEXT:    xxswapd v8, vs6
-; CHECK-P9-NEXT:    xxswapd v9, vs7
-; CHECK-P9-NEXT:    xxswapd v10, vs8
-; CHECK-P9-NEXT:    xxswapd v11, vs12
-; CHECK-P9-NEXT:    xxswapd v12, vs9
-; CHECK-P9-NEXT:    xxswapd v13, vs13
-; CHECK-P9-NEXT:    xxswapd v14, vs10
-; CHECK-P9-NEXT:    xxswapd v2, v2
-; CHECK-P9-NEXT:    xxswapd v15, vs11
-; CHECK-P9-NEXT:    xxswapd v3, v3
-; CHECK-P9-NEXT:    vmrglb v4, v4, v6
-; CHECK-P9-NEXT:    vmrglb v5, v5, v7
-; CHECK-P9-NEXT:    vmrglb v0, v0, v8
-; CHECK-P9-NEXT:    vmrglb v1, v1, v9
-; CHECK-P9-NEXT:    vmrglb v6, v10, v11
-; CHECK-P9-NEXT:    vmrglb v7, v12, v13
-; CHECK-P9-NEXT:    vmrglb v2, v14, v2
-; CHECK-P9-NEXT:    vmrglb v3, v15, v3
+; CHECK-P9-NEXT:    xxswapd v0, vs0
+; CHECK-P9-NEXT:    vmrglb v5, v5, v0
 ; CHECK-P9-NEXT:    vmrglh v4, v5, v4
-; CHECK-P9-NEXT:    vmrglh v5, v1, v0
-; CHECK-P9-NEXT:    vmrglh v0, v7, v6
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    vmrglw v3, v5, v4
-; CHECK-P9-NEXT:    vmrglw v2, v2, v0
-; CHECK-P9-NEXT:    xxmrgld v2, v2, v3
+; CHECK-P9-NEXT:    vmrglw v3, v4, v3
+; CHECK-P9-NEXT:    xxmrgld v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs2, 64(r3)
-; CHECK-BE-NEXT:    lxv vs3, 80(r3)
-; CHECK-BE-NEXT:    lxv vs4, 96(r3)
-; CHECK-BE-NEXT:    lxv vs5, 112(r3)
+; CHECK-BE-NEXT:    lxv vs7, 112(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f8, f7
+; CHECK-BE-NEXT:    xxswapd vs7, vs7
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
+; CHECK-BE-NEXT:    lxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    lxv vs6, 32(r3)
-; CHECK-BE-NEXT:    lxv vs7, 48(r3)
-; CHECK-BE-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    xxswapd vs8, vs5
-; CHECK-BE-NEXT:    xxswapd vs9, vs4
-; CHECK-BE-NEXT:    xxswapd vs10, vs3
-; CHECK-BE-NEXT:    xxswapd vs11, vs2
-; CHECK-BE-NEXT:    xxswapd vs12, vs7
-; CHECK-BE-NEXT:    xxswapd vs13, vs6
-; CHECK-BE-NEXT:    xxswapd v2, vs1
-; CHECK-BE-NEXT:    xxswapd v3, vs0
+; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxv vs3, 48(r3)
+; CHECK-BE-NEXT:    lxv vs4, 64(r3)
+; CHECK-BE-NEXT:    lxv vs5, 80(r3)
+; CHECK-BE-NEXT:    mfvsrwz r3, f8
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f7
+; CHECK-BE-NEXT:    xscvdpsxws f7, f6
+; CHECK-BE-NEXT:    xxswapd vs6, vs6
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-NEXT:    mfvsrwz r3, f7
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f6
+; CHECK-BE-NEXT:    xscvdpsxws f6, f5
+; CHECK-BE-NEXT:    xxswapd vs5, vs5
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f6
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f5
+; CHECK-BE-NEXT:    xscvdpsxws f5, f4
+; CHECK-BE-NEXT:    xxswapd vs4, vs4
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f5
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
-; CHECK-BE-NEXT:    xscvdpsxws f6, f6
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    xscvdpsxws f2, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    xscvdpsxws f1, f0
+; CHECK-BE-NEXT:    xxswapd vs0, vs0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    xscvdpsxws f8, f8
-; CHECK-BE-NEXT:    xscvdpsxws f9, f9
-; CHECK-BE-NEXT:    xscvdpsxws f10, f10
-; CHECK-BE-NEXT:    xscvdpsxws f11, f11
-; CHECK-BE-NEXT:    xscvdpsxws f12, f12
-; CHECK-BE-NEXT:    xscvdpsxws f13, f13
-; CHECK-BE-NEXT:    xscvdpsxws v2, v2
-; CHECK-BE-NEXT:    xscvdpsxws v3, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f5
-; CHECK-BE-NEXT:    mfvsrwz r4, f4
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
-; CHECK-BE-NEXT:    mfvsrwz r6, f2
-; CHECK-BE-NEXT:    mfvsrwz r11, f7
-; CHECK-BE-NEXT:    mfvsrwz r12, f6
-; CHECK-BE-NEXT:    mfvsrwz r0, f1
-; CHECK-BE-NEXT:    mfvsrwz r30, f0
-; CHECK-BE-NEXT:    mfvsrwz r7, f8
-; CHECK-BE-NEXT:    mfvsrwz r8, f9
-; CHECK-BE-NEXT:    mfvsrwz r9, f10
-; CHECK-BE-NEXT:    mfvsrwz r10, f11
-; CHECK-BE-NEXT:    mfvsrwz r29, f12
-; CHECK-BE-NEXT:    mfvsrwz r28, f13
-; CHECK-BE-NEXT:    mfvsrwz r27, v2
-; CHECK-BE-NEXT:    mfvsrwz r26, v3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    sldi r4, r4, 56
-; CHECK-BE-NEXT:    sldi r5, r5, 56
-; CHECK-BE-NEXT:    sldi r6, r6, 56
-; CHECK-BE-NEXT:    sldi r11, r11, 56
-; CHECK-BE-NEXT:    sldi r12, r12, 56
-; CHECK-BE-NEXT:    sldi r0, r0, 56
-; CHECK-BE-NEXT:    sldi r30, r30, 56
-; CHECK-BE-NEXT:    sldi r7, r7, 56
-; CHECK-BE-NEXT:    sldi r8, r8, 56
-; CHECK-BE-NEXT:    sldi r9, r9, 56
-; CHECK-BE-NEXT:    sldi r10, r10, 56
-; CHECK-BE-NEXT:    sldi r29, r29, 56
-; CHECK-BE-NEXT:    sldi r28, r28, 56
-; CHECK-BE-NEXT:    sldi r27, r27, 56
-; CHECK-BE-NEXT:    sldi r26, r26, 56
-; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mtvsrd v5, r6
-; CHECK-BE-NEXT:    mtvsrd v8, r11
-; CHECK-BE-NEXT:    mtvsrd v10, r12
-; CHECK-BE-NEXT:    mtvsrd v12, r0
-; CHECK-BE-NEXT:    mtvsrd v14, r30
-; CHECK-BE-NEXT:    mtvsrd v0, r7
-; CHECK-BE-NEXT:    mtvsrd v1, r8
-; CHECK-BE-NEXT:    mtvsrd v6, r9
-; CHECK-BE-NEXT:    mtvsrd v7, r10
-; CHECK-BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v9, r29
-; CHECK-BE-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    mtvsrd v11, r28
-; CHECK-BE-NEXT:    mtvsrd v13, r27
-; CHECK-BE-NEXT:    mtvsrd v15, r26
-; CHECK-BE-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT:    vmrghb v2, v2, v0
-; CHECK-BE-NEXT:    vmrghb v3, v3, v1
-; CHECK-BE-NEXT:    vmrghb v4, v4, v6
-; CHECK-BE-NEXT:    vmrghb v5, v5, v7
-; CHECK-BE-NEXT:    vmrghb v0, v8, v9
-; CHECK-BE-NEXT:    vmrghb v1, v10, v11
-; CHECK-BE-NEXT:    vmrghb v6, v12, v13
-; CHECK-BE-NEXT:    vmrghb v7, v14, v15
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    vmrghh v3, v5, v4
-; CHECK-BE-NEXT:    vmrghh v4, v1, v0
-; CHECK-BE-NEXT:    vmrghh v5, v7, v6
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v5, r3
+; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    mtvsrd v0, r3
+; CHECK-BE-NEXT:    vmrghb v5, v5, v0
+; CHECK-BE-NEXT:    vmrghh v4, v5, v4
+; CHECK-BE-NEXT:    vmrghw v3, v4, v3
 ; CHECK-BE-NEXT:    xxmrghd v2, v3, v2
 ; CHECK-BE-NEXT:    blr
 entry:

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll Wed Jan  2 21:04:18 2019
@@ -163,58 +163,58 @@ define void @test16elt(<16 x i64>* noali
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 48(r4)
-; CHECK-P9-NEXT:    lxv vs1, 32(r4)
-; CHECK-P9-NEXT:    lxv vs2, 16(r4)
-; CHECK-P9-NEXT:    lxv vs3, 0(r4)
-; CHECK-P9-NEXT:    lxv vs4, 112(r4)
-; CHECK-P9-NEXT:    lxv vs5, 96(r4)
-; CHECK-P9-NEXT:    lxv vs6, 80(r4)
-; CHECK-P9-NEXT:    lxv vs7, 64(r4)
+; CHECK-P9-NEXT:    lxv vs0, 112(r4)
+; CHECK-P9-NEXT:    lxv vs1, 96(r4)
+; CHECK-P9-NEXT:    lxv vs2, 80(r4)
+; CHECK-P9-NEXT:    lxv vs3, 64(r4)
+; CHECK-P9-NEXT:    lxv vs4, 48(r4)
+; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-P9-NEXT:    lxv vs5, 32(r4)
+; CHECK-P9-NEXT:    lxv vs6, 16(r4)
+; CHECK-P9-NEXT:    lxv vs7, 0(r4)
+; CHECK-P9-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-P9-NEXT:    xvcvdpuxds vs5, vs5
 ; CHECK-P9-NEXT:    xvcvdpuxds vs3, vs3
 ; CHECK-P9-NEXT:    xvcvdpuxds vs2, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxds vs1, vs1
 ; CHECK-P9-NEXT:    xvcvdpuxds vs0, vs0
-; CHECK-P9-NEXT:    xvcvdpuxds vs7, vs7
-; CHECK-P9-NEXT:    xvcvdpuxds vs6, vs6
-; CHECK-P9-NEXT:    xvcvdpuxds vs5, vs5
-; CHECK-P9-NEXT:    xvcvdpuxds vs4, vs4
-; CHECK-P9-NEXT:    stxv vs0, 48(r3)
-; CHECK-P9-NEXT:    stxv vs1, 32(r3)
-; CHECK-P9-NEXT:    stxv vs2, 16(r3)
-; CHECK-P9-NEXT:    stxv vs3, 0(r3)
-; CHECK-P9-NEXT:    stxv vs4, 112(r3)
-; CHECK-P9-NEXT:    stxv vs5, 96(r3)
-; CHECK-P9-NEXT:    stxv vs6, 80(r3)
-; CHECK-P9-NEXT:    stxv vs7, 64(r3)
+; CHECK-P9-NEXT:    stxv vs0, 112(r3)
+; CHECK-P9-NEXT:    stxv vs1, 96(r3)
+; CHECK-P9-NEXT:    stxv vs2, 80(r3)
+; CHECK-P9-NEXT:    stxv vs3, 64(r3)
+; CHECK-P9-NEXT:    stxv vs4, 48(r3)
+; CHECK-P9-NEXT:    stxv vs5, 32(r3)
+; CHECK-P9-NEXT:    stxv vs6, 16(r3)
+; CHECK-P9-NEXT:    stxv vs7, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 48(r4)
-; CHECK-BE-NEXT:    lxv vs1, 32(r4)
-; CHECK-BE-NEXT:    lxv vs2, 16(r4)
-; CHECK-BE-NEXT:    lxv vs3, 0(r4)
-; CHECK-BE-NEXT:    lxv vs4, 112(r4)
-; CHECK-BE-NEXT:    lxv vs5, 96(r4)
-; CHECK-BE-NEXT:    lxv vs6, 80(r4)
-; CHECK-BE-NEXT:    lxv vs7, 64(r4)
+; CHECK-BE-NEXT:    lxv vs0, 112(r4)
+; CHECK-BE-NEXT:    lxv vs1, 96(r4)
+; CHECK-BE-NEXT:    lxv vs2, 80(r4)
+; CHECK-BE-NEXT:    lxv vs3, 64(r4)
+; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-BE-NEXT:    lxv vs5, 32(r4)
+; CHECK-BE-NEXT:    lxv vs6, 16(r4)
+; CHECK-BE-NEXT:    lxv vs7, 0(r4)
+; CHECK-BE-NEXT:    xvcvdpuxds vs7, vs7
+; CHECK-BE-NEXT:    xvcvdpuxds vs6, vs6
+; CHECK-BE-NEXT:    xvcvdpuxds vs5, vs5
 ; CHECK-BE-NEXT:    xvcvdpuxds vs3, vs3
 ; CHECK-BE-NEXT:    xvcvdpuxds vs2, vs2
 ; CHECK-BE-NEXT:    xvcvdpuxds vs1, vs1
 ; CHECK-BE-NEXT:    xvcvdpuxds vs0, vs0
-; CHECK-BE-NEXT:    xvcvdpuxds vs7, vs7
-; CHECK-BE-NEXT:    xvcvdpuxds vs6, vs6
-; CHECK-BE-NEXT:    xvcvdpuxds vs5, vs5
-; CHECK-BE-NEXT:    xvcvdpuxds vs4, vs4
-; CHECK-BE-NEXT:    stxv vs0, 48(r3)
-; CHECK-BE-NEXT:    stxv vs1, 32(r3)
-; CHECK-BE-NEXT:    stxv vs2, 16(r3)
-; CHECK-BE-NEXT:    stxv vs3, 0(r3)
-; CHECK-BE-NEXT:    stxv vs4, 112(r3)
-; CHECK-BE-NEXT:    stxv vs5, 96(r3)
-; CHECK-BE-NEXT:    stxv vs6, 80(r3)
-; CHECK-BE-NEXT:    stxv vs7, 64(r3)
+; CHECK-BE-NEXT:    stxv vs0, 112(r3)
+; CHECK-BE-NEXT:    stxv vs1, 96(r3)
+; CHECK-BE-NEXT:    stxv vs2, 80(r3)
+; CHECK-BE-NEXT:    stxv vs3, 64(r3)
+; CHECK-BE-NEXT:    stxv vs4, 48(r3)
+; CHECK-BE-NEXT:    stxv vs5, 32(r3)
+; CHECK-BE-NEXT:    stxv vs6, 16(r3)
+; CHECK-BE-NEXT:    stxv vs7, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, <16 x double>* %0, align 128
@@ -377,58 +377,58 @@ define void @test16elt_signed(<16 x i64>
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 48(r4)
-; CHECK-P9-NEXT:    lxv vs1, 32(r4)
-; CHECK-P9-NEXT:    lxv vs2, 16(r4)
-; CHECK-P9-NEXT:    lxv vs3, 0(r4)
-; CHECK-P9-NEXT:    lxv vs4, 112(r4)
-; CHECK-P9-NEXT:    lxv vs5, 96(r4)
-; CHECK-P9-NEXT:    lxv vs6, 80(r4)
-; CHECK-P9-NEXT:    lxv vs7, 64(r4)
+; CHECK-P9-NEXT:    lxv vs0, 112(r4)
+; CHECK-P9-NEXT:    lxv vs1, 96(r4)
+; CHECK-P9-NEXT:    lxv vs2, 80(r4)
+; CHECK-P9-NEXT:    lxv vs3, 64(r4)
+; CHECK-P9-NEXT:    lxv vs4, 48(r4)
+; CHECK-P9-NEXT:    xvcvdpsxds vs4, vs4
+; CHECK-P9-NEXT:    lxv vs5, 32(r4)
+; CHECK-P9-NEXT:    lxv vs6, 16(r4)
+; CHECK-P9-NEXT:    lxv vs7, 0(r4)
+; CHECK-P9-NEXT:    xvcvdpsxds vs7, vs7
+; CHECK-P9-NEXT:    xvcvdpsxds vs6, vs6
+; CHECK-P9-NEXT:    xvcvdpsxds vs5, vs5
 ; CHECK-P9-NEXT:    xvcvdpsxds vs3, vs3
 ; CHECK-P9-NEXT:    xvcvdpsxds vs2, vs2
 ; CHECK-P9-NEXT:    xvcvdpsxds vs1, vs1
 ; CHECK-P9-NEXT:    xvcvdpsxds vs0, vs0
-; CHECK-P9-NEXT:    xvcvdpsxds vs7, vs7
-; CHECK-P9-NEXT:    xvcvdpsxds vs6, vs6
-; CHECK-P9-NEXT:    xvcvdpsxds vs5, vs5
-; CHECK-P9-NEXT:    xvcvdpsxds vs4, vs4
-; CHECK-P9-NEXT:    stxv vs0, 48(r3)
-; CHECK-P9-NEXT:    stxv vs1, 32(r3)
-; CHECK-P9-NEXT:    stxv vs2, 16(r3)
-; CHECK-P9-NEXT:    stxv vs3, 0(r3)
-; CHECK-P9-NEXT:    stxv vs4, 112(r3)
-; CHECK-P9-NEXT:    stxv vs5, 96(r3)
-; CHECK-P9-NEXT:    stxv vs6, 80(r3)
-; CHECK-P9-NEXT:    stxv vs7, 64(r3)
+; CHECK-P9-NEXT:    stxv vs0, 112(r3)
+; CHECK-P9-NEXT:    stxv vs1, 96(r3)
+; CHECK-P9-NEXT:    stxv vs2, 80(r3)
+; CHECK-P9-NEXT:    stxv vs3, 64(r3)
+; CHECK-P9-NEXT:    stxv vs4, 48(r3)
+; CHECK-P9-NEXT:    stxv vs5, 32(r3)
+; CHECK-P9-NEXT:    stxv vs6, 16(r3)
+; CHECK-P9-NEXT:    stxv vs7, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 48(r4)
-; CHECK-BE-NEXT:    lxv vs1, 32(r4)
-; CHECK-BE-NEXT:    lxv vs2, 16(r4)
-; CHECK-BE-NEXT:    lxv vs3, 0(r4)
-; CHECK-BE-NEXT:    lxv vs4, 112(r4)
-; CHECK-BE-NEXT:    lxv vs5, 96(r4)
-; CHECK-BE-NEXT:    lxv vs6, 80(r4)
-; CHECK-BE-NEXT:    lxv vs7, 64(r4)
+; CHECK-BE-NEXT:    lxv vs0, 112(r4)
+; CHECK-BE-NEXT:    lxv vs1, 96(r4)
+; CHECK-BE-NEXT:    lxv vs2, 80(r4)
+; CHECK-BE-NEXT:    lxv vs3, 64(r4)
+; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    xvcvdpsxds vs4, vs4
+; CHECK-BE-NEXT:    lxv vs5, 32(r4)
+; CHECK-BE-NEXT:    lxv vs6, 16(r4)
+; CHECK-BE-NEXT:    lxv vs7, 0(r4)
+; CHECK-BE-NEXT:    xvcvdpsxds vs7, vs7
+; CHECK-BE-NEXT:    xvcvdpsxds vs6, vs6
+; CHECK-BE-NEXT:    xvcvdpsxds vs5, vs5
 ; CHECK-BE-NEXT:    xvcvdpsxds vs3, vs3
 ; CHECK-BE-NEXT:    xvcvdpsxds vs2, vs2
 ; CHECK-BE-NEXT:    xvcvdpsxds vs1, vs1
 ; CHECK-BE-NEXT:    xvcvdpsxds vs0, vs0
-; CHECK-BE-NEXT:    xvcvdpsxds vs7, vs7
-; CHECK-BE-NEXT:    xvcvdpsxds vs6, vs6
-; CHECK-BE-NEXT:    xvcvdpsxds vs5, vs5
-; CHECK-BE-NEXT:    xvcvdpsxds vs4, vs4
-; CHECK-BE-NEXT:    stxv vs0, 48(r3)
-; CHECK-BE-NEXT:    stxv vs1, 32(r3)
-; CHECK-BE-NEXT:    stxv vs2, 16(r3)
-; CHECK-BE-NEXT:    stxv vs3, 0(r3)
-; CHECK-BE-NEXT:    stxv vs4, 112(r3)
-; CHECK-BE-NEXT:    stxv vs5, 96(r3)
-; CHECK-BE-NEXT:    stxv vs6, 80(r3)
-; CHECK-BE-NEXT:    stxv vs7, 64(r3)
+; CHECK-BE-NEXT:    stxv vs0, 112(r3)
+; CHECK-BE-NEXT:    stxv vs1, 96(r3)
+; CHECK-BE-NEXT:    stxv vs2, 80(r3)
+; CHECK-BE-NEXT:    stxv vs3, 64(r3)
+; CHECK-BE-NEXT:    stxv vs4, 48(r3)
+; CHECK-BE-NEXT:    stxv vs5, 32(r3)
+; CHECK-BE-NEXT:    stxv vs6, 16(r3)
+; CHECK-BE-NEXT:    stxv vs7, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x double>, <16 x double>* %0, align 128

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll Wed Jan  2 21:04:18 2019
@@ -35,20 +35,20 @@ define i64 @test2elt(i32 %a.coerce) loca
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    li r4, 2
 ; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
 ; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 16, 31
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 16, 31
 ; CHECK-P9-NEXT:    mtvsrwz f0, r3
-; CHECK-P9-NEXT:    mtvsrwz f1, r4
+; CHECK-P9-NEXT:    li r3, 2
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
+; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT:    mtvsrwz f0, r3
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs1, f1
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
@@ -56,18 +56,18 @@ define i64 @test2elt(i32 %a.coerce) loca
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
 ; CHECK-BE-NEXT:    li r3, 2
-; CHECK-BE-NEXT:    li r4, 0
 ; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
-; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
 ; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 16, 31
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 16, 31
 ; CHECK-BE-NEXT:    mtvsrwz f0, r3
-; CHECK-BE-NEXT:    mtvsrwz f1, r4
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
+; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-BE-NEXT:    xscvdpspn v3, f0
+; CHECK-BE-NEXT:    mtvsrwz f0, r3
 ; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
-; CHECK-BE-NEXT:    xscvdpspn v3, f1
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -92,24 +92,24 @@ define <4 x float> @test4elt(i64 %a.coer
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xxlxor v4, v4, v4
-; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
-; CHECK-P9-NEXT:    xxswapd v3, vs0
-; CHECK-P9-NEXT:    lxvx v2, 0, r4
-; CHECK-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    mtvsrd v2, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r4
-; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -140,35 +140,35 @@ define void @test8elt(<8 x float>* noali
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
-; CHECK-P9-NEXT:    xxlxor v5, v5, v5
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    lxvx v4, 0, r5
-; CHECK-P9-NEXT:    vperm v3, v5, v2, v3
-; CHECK-P9-NEXT:    vperm v2, v5, v2, v4
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    xvcvuxwsp vs1, v2
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    lxvx v4, 0, r5
-; CHECK-BE-NEXT:    vperm v3, v2, v5, v3
-; CHECK-BE-NEXT:    vperm v2, v5, v2, v4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xvcvuxwsp vs1, v2
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = uitofp <8 x i16> %a to <8 x float>
@@ -207,52 +207,52 @@ define void @test16elt(<16 x float>* noa
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
 ; CHECK-P9-NEXT:    lxv v2, 16(r4)
 ; CHECK-P9-NEXT:    lxv v3, 0(r4)
-; CHECK-P9-NEXT:    xxlxor v0, v0, v0
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI3_1 at toc@l
-; CHECK-P9-NEXT:    lxvx v4, 0, r5
-; CHECK-P9-NEXT:    lxvx v5, 0, r6
-; CHECK-P9-NEXT:    vperm v1, v0, v3, v4
-; CHECK-P9-NEXT:    vperm v3, v0, v3, v5
-; CHECK-P9-NEXT:    vperm v4, v0, v2, v4
-; CHECK-P9-NEXT:    vperm v2, v0, v2, v5
-; CHECK-P9-NEXT:    xvcvuxwsp vs0, v1
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v4, 0, r4
+; CHECK-P9-NEXT:    xxlxor v5, v5, v5
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-P9-NEXT:    vperm v0, v5, v3, v4
+; CHECK-P9-NEXT:    xvcvuxwsp vs0, v0
+; CHECK-P9-NEXT:    lxvx v0, 0, r4
+; CHECK-P9-NEXT:    vperm v3, v5, v3, v0
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    xvcvuxwsp vs1, v3
-; CHECK-P9-NEXT:    xvcvuxwsp vs2, v4
+; CHECK-P9-NEXT:    vperm v3, v5, v2, v4
+; CHECK-P9-NEXT:    vperm v2, v5, v2, v0
+; CHECK-P9-NEXT:    xvcvuxwsp vs2, v3
 ; CHECK-P9-NEXT:    xvcvuxwsp vs3, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
 ; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
 ; CHECK-BE-NEXT:    lxv v2, 16(r4)
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
-; CHECK-BE-NEXT:    xxlxor v0, v0, v0
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI3_1 at toc@l
-; CHECK-BE-NEXT:    lxvx v4, 0, r5
-; CHECK-BE-NEXT:    lxvx v5, 0, r6
-; CHECK-BE-NEXT:    vperm v1, v3, v0, v4
-; CHECK-BE-NEXT:    vperm v3, v0, v3, v5
-; CHECK-BE-NEXT:    vperm v4, v2, v0, v4
-; CHECK-BE-NEXT:    vperm v2, v0, v2, v5
-; CHECK-BE-NEXT:    xvcvuxwsp vs0, v1
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v4, 0, r4
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-BE-NEXT:    vperm v0, v3, v5, v4
+; CHECK-BE-NEXT:    xvcvuxwsp vs0, v0
+; CHECK-BE-NEXT:    lxvx v0, 0, r4
+; CHECK-BE-NEXT:    vperm v3, v5, v3, v0
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xvcvuxwsp vs1, v3
-; CHECK-BE-NEXT:    xvcvuxwsp vs2, v4
+; CHECK-BE-NEXT:    vperm v3, v2, v5, v4
+; CHECK-BE-NEXT:    vperm v2, v5, v2, v0
+; CHECK-BE-NEXT:    xvcvuxwsp vs2, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp vs3, v2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i16>, <16 x i16>* %0, align 32
@@ -287,20 +287,20 @@ define i64 @test2elt_signed(i32 %a.coerc
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    li r4, 2
 ; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:    vextuhrx r4, r4, v2
 ; CHECK-P9-NEXT:    extsh r3, r3
-; CHECK-P9-NEXT:    extsh r4, r4
 ; CHECK-P9-NEXT:    mtvsrwa f0, r3
-; CHECK-P9-NEXT:    mtvsrwa f1, r4
+; CHECK-P9-NEXT:    li r3, 2
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
+; CHECK-P9-NEXT:    extsh r3, r3
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT:    mtvsrwa f0, r3
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs1, f1
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
@@ -308,18 +308,18 @@ define i64 @test2elt_signed(i32 %a.coerc
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
 ; CHECK-BE-NEXT:    li r3, 2
-; CHECK-BE-NEXT:    li r4, 0
 ; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
-; CHECK-BE-NEXT:    vextuhlx r4, r4, v2
 ; CHECK-BE-NEXT:    extsh r3, r3
-; CHECK-BE-NEXT:    extsh r4, r4
 ; CHECK-BE-NEXT:    mtvsrwa f0, r3
-; CHECK-BE-NEXT:    mtvsrwa f1, r4
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
+; CHECK-BE-NEXT:    extsh r3, r3
+; CHECK-BE-NEXT:    xscvdpspn v3, f0
+; CHECK-BE-NEXT:    mtvsrwa f0, r3
 ; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
-; CHECK-BE-NEXT:    xscvdpspn v3, f1
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -397,9 +397,9 @@ define void @test8elt_signed(<8 x float>
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v2
 ; CHECK-BE-NEXT:    vextsh2w v3, v3
@@ -449,43 +449,43 @@ define void @test16elt_signed(<16 x floa
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v2, 16(r4)
 ; CHECK-P9-NEXT:    lxv v3, 0(r4)
+; CHECK-P9-NEXT:    lxv v2, 16(r4)
 ; CHECK-P9-NEXT:    vmrglh v4, v3, v3
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v3
-; CHECK-P9-NEXT:    vmrglh v5, v2, v2
+; CHECK-P9-NEXT:    vextsh2w v3, v3
+; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
+; CHECK-P9-NEXT:    vmrglh v3, v2, v2
 ; CHECK-P9-NEXT:    vmrghh v2, v2, v2
 ; CHECK-P9-NEXT:    vextsh2w v4, v4
+; CHECK-P9-NEXT:    xvcvsxwsp vs0, v4
 ; CHECK-P9-NEXT:    vextsh2w v3, v3
-; CHECK-P9-NEXT:    vextsh2w v5, v5
 ; CHECK-P9-NEXT:    vextsh2w v2, v2
-; CHECK-P9-NEXT:    xvcvsxwsp vs0, v4
-; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
-; CHECK-P9-NEXT:    xvcvsxwsp vs2, v5
+; CHECK-P9-NEXT:    xvcvsxwsp vs2, v3
 ; CHECK-P9-NEXT:    xvcvsxwsp vs3, v2
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
 ; CHECK-BE-NEXT:    lxv v2, 16(r4)
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v4, 0, r4
 ; CHECK-BE-NEXT:    xxlxor v5, v5, v5
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v4, 0, r5
 ; CHECK-BE-NEXT:    vperm v0, v5, v3, v4
 ; CHECK-BE-NEXT:    vperm v4, v5, v2, v4
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v2
-; CHECK-BE-NEXT:    vextsh2w v5, v0
+; CHECK-BE-NEXT:    vextsh2w v0, v0
 ; CHECK-BE-NEXT:    vextsh2w v4, v4
 ; CHECK-BE-NEXT:    vextsh2w v3, v3
 ; CHECK-BE-NEXT:    vextsh2w v2, v2
-; CHECK-BE-NEXT:    xvcvsxwsp vs0, v5
+; CHECK-BE-NEXT:    xvcvsxwsp vs0, v0
 ; CHECK-BE-NEXT:    xvcvsxwsp vs1, v4
 ; CHECK-BE-NEXT:    xvcvsxwsp vs2, v3
 ; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll Wed Jan  2 21:04:18 2019
@@ -24,23 +24,23 @@ define <2 x double> @test2elt(i32 %a.coe
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r3
 ; CHECK-P9-NEXT:    xxlxor v4, v4, v4
-; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
-; CHECK-P9-NEXT:    lxvx v2, 0, r4
-; CHECK-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxddp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r4
-; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxddp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -74,39 +74,39 @@ define void @test4elt(<4 x double>* noal
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r4
-; CHECK-P9-NEXT:    xxlxor v5, v5, v5
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI1_1 at toc@l
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    lxvx v2, 0, r5
-; CHECK-P9-NEXT:    lxvx v3, 0, r6
-; CHECK-P9-NEXT:    vperm v2, v5, v4, v2
-; CHECK-P9-NEXT:    vperm v3, v5, v4, v3
-; CHECK-P9-NEXT:    xvcvuxddp vs0, v2
-; CHECK-P9-NEXT:    xvcvuxddp vs1, v3
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI1_1 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r5
-; CHECK-BE-NEXT:    lxvx v3, 0, r6
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    vperm v3, v5, v4, v3
-; CHECK-BE-NEXT:    xvcvuxddp vs0, v2
-; CHECK-BE-NEXT:    xvcvuxddp vs1, v3
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <4 x i16>
@@ -155,59 +155,59 @@ define void @test8elt(<8 x double>* noal
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI2_2 at toc@ha
-; CHECK-P9-NEXT:    addis r7, r2, .LCPI2_3 at toc@ha
-; CHECK-P9-NEXT:    xxlxor v1, v1, v1
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI2_2 at toc@l
-; CHECK-P9-NEXT:    addi r7, r7, .LCPI2_3 at toc@l
-; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    lxvx v4, 0, r5
-; CHECK-P9-NEXT:    lxvx v5, 0, r6
-; CHECK-P9-NEXT:    lxvx v0, 0, r7
-; CHECK-P9-NEXT:    vperm v3, v1, v2, v3
-; CHECK-P9-NEXT:    vperm v4, v1, v2, v4
-; CHECK-P9-NEXT:    vperm v5, v1, v2, v5
-; CHECK-P9-NEXT:    vperm v2, v1, v2, v0
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
-; CHECK-P9-NEXT:    xvcvuxddp vs1, v4
-; CHECK-P9-NEXT:    xvcvuxddp vs2, v5
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_2 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_2 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_3 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    xvcvuxddp vs3, v2
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI2_2 at toc@ha
-; CHECK-BE-NEXT:    addis r7, r2, .LCPI2_3 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v1, v1, v1
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI2_2 at toc@l
-; CHECK-BE-NEXT:    addi r7, r7, .LCPI2_3 at toc@l
-; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    lxvx v4, 0, r5
-; CHECK-BE-NEXT:    lxvx v5, 0, r6
-; CHECK-BE-NEXT:    lxvx v0, 0, r7
-; CHECK-BE-NEXT:    vperm v3, v2, v1, v3
-; CHECK-BE-NEXT:    vperm v4, v1, v2, v4
-; CHECK-BE-NEXT:    vperm v5, v1, v2, v5
-; CHECK-BE-NEXT:    vperm v2, v1, v2, v0
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
-; CHECK-BE-NEXT:    xvcvuxddp vs1, v4
-; CHECK-BE-NEXT:    xvcvuxddp vs2, v5
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_2 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_2 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_3 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_3 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    xvcvuxddp vs3, v2
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = uitofp <8 x i16> %a to <8 x double>
@@ -276,88 +276,88 @@ define void @test16elt(<16 x double>* no
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
-; CHECK-P9-NEXT:    addis r7, r2, .LCPI3_2 at toc@ha
-; CHECK-P9-NEXT:    addis r8, r2, .LCPI3_3 at toc@ha
-; CHECK-P9-NEXT:    lxv v0, 0(r4)
-; CHECK-P9-NEXT:    lxv v1, 16(r4)
-; CHECK-P9-NEXT:    xxlxor v6, v6, v6
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI3_1 at toc@l
-; CHECK-P9-NEXT:    addi r7, r7, .LCPI3_2 at toc@l
-; CHECK-P9-NEXT:    addi r8, r8, .LCPI3_3 at toc@l
-; CHECK-P9-NEXT:    lxvx v2, 0, r5
-; CHECK-P9-NEXT:    lxvx v3, 0, r6
-; CHECK-P9-NEXT:    lxvx v4, 0, r7
-; CHECK-P9-NEXT:    lxvx v5, 0, r8
-; CHECK-P9-NEXT:    vperm v7, v6, v0, v2
-; CHECK-P9-NEXT:    vperm v8, v6, v0, v3
-; CHECK-P9-NEXT:    vperm v9, v6, v0, v4
-; CHECK-P9-NEXT:    vperm v0, v6, v0, v5
-; CHECK-P9-NEXT:    vperm v2, v6, v1, v2
-; CHECK-P9-NEXT:    vperm v3, v6, v1, v3
-; CHECK-P9-NEXT:    vperm v4, v6, v1, v4
-; CHECK-P9-NEXT:    vperm v5, v6, v1, v5
-; CHECK-P9-NEXT:    xvcvuxddp vs0, v7
-; CHECK-P9-NEXT:    xvcvuxddp vs1, v8
-; CHECK-P9-NEXT:    xvcvuxddp vs2, v9
-; CHECK-P9-NEXT:    xvcvuxddp vs3, v0
-; CHECK-P9-NEXT:    xvcvuxddp vs4, v2
+; CHECK-P9-NEXT:    lxv v2, 16(r4)
+; CHECK-P9-NEXT:    lxv v3, 0(r4)
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v4, 0, r4
+; CHECK-P9-NEXT:    xxlxor v5, v5, v5
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-P9-NEXT:    vperm v0, v5, v3, v4
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v0
+; CHECK-P9-NEXT:    lxvx v0, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
+; CHECK-P9-NEXT:    vperm v1, v5, v3, v0
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v1
+; CHECK-P9-NEXT:    lxvx v1, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
+; CHECK-P9-NEXT:    vperm v6, v5, v3, v1
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v6
+; CHECK-P9-NEXT:    lxvx v6, 0, r4
+; CHECK-P9-NEXT:    vperm v3, v5, v3, v6
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs3, v3
+; CHECK-P9-NEXT:    vperm v3, v5, v2, v4
+; CHECK-P9-NEXT:    xvcvuxddp vs4, v3
+; CHECK-P9-NEXT:    vperm v3, v5, v2, v0
 ; CHECK-P9-NEXT:    xvcvuxddp vs5, v3
-; CHECK-P9-NEXT:    xvcvuxddp vs6, v4
-; CHECK-P9-NEXT:    xvcvuxddp vs7, v5
+; CHECK-P9-NEXT:    vperm v3, v5, v2, v1
+; CHECK-P9-NEXT:    vperm v2, v5, v2, v6
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs6, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs7, v2
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
 ; CHECK-P9-NEXT:    stxv vs7, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs6, 96(r3)
-; CHECK-P9-NEXT:    stxv vs5, 80(r3)
-; CHECK-P9-NEXT:    stxv vs4, 64(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
-; CHECK-BE-NEXT:    addis r7, r2, .LCPI3_2 at toc@ha
-; CHECK-BE-NEXT:    addis r8, r2, .LCPI3_3 at toc@ha
-; CHECK-BE-NEXT:    lxv v0, 0(r4)
-; CHECK-BE-NEXT:    lxv v1, 16(r4)
-; CHECK-BE-NEXT:    xxlxor v6, v6, v6
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI3_1 at toc@l
-; CHECK-BE-NEXT:    addi r7, r7, .LCPI3_2 at toc@l
-; CHECK-BE-NEXT:    addi r8, r8, .LCPI3_3 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r5
-; CHECK-BE-NEXT:    lxvx v3, 0, r6
-; CHECK-BE-NEXT:    lxvx v4, 0, r7
-; CHECK-BE-NEXT:    lxvx v5, 0, r8
-; CHECK-BE-NEXT:    vperm v7, v0, v6, v2
-; CHECK-BE-NEXT:    vperm v8, v6, v0, v3
-; CHECK-BE-NEXT:    vperm v9, v6, v0, v4
-; CHECK-BE-NEXT:    vperm v0, v6, v0, v5
-; CHECK-BE-NEXT:    vperm v2, v1, v6, v2
-; CHECK-BE-NEXT:    vperm v3, v6, v1, v3
-; CHECK-BE-NEXT:    vperm v4, v6, v1, v4
-; CHECK-BE-NEXT:    vperm v5, v6, v1, v5
-; CHECK-BE-NEXT:    xvcvuxddp vs0, v7
-; CHECK-BE-NEXT:    xvcvuxddp vs1, v8
-; CHECK-BE-NEXT:    xvcvuxddp vs2, v9
-; CHECK-BE-NEXT:    xvcvuxddp vs3, v0
-; CHECK-BE-NEXT:    xvcvuxddp vs4, v2
+; CHECK-BE-NEXT:    lxv v2, 16(r4)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v4, 0, r4
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-BE-NEXT:    vperm v0, v3, v5, v4
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v0
+; CHECK-BE-NEXT:    lxvx v0, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
+; CHECK-BE-NEXT:    vperm v1, v5, v3, v0
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v1
+; CHECK-BE-NEXT:    lxvx v1, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
+; CHECK-BE-NEXT:    vperm v6, v5, v3, v1
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v6
+; CHECK-BE-NEXT:    lxvx v6, 0, r4
+; CHECK-BE-NEXT:    vperm v3, v5, v3, v6
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs3, v3
+; CHECK-BE-NEXT:    vperm v3, v2, v5, v4
+; CHECK-BE-NEXT:    xvcvuxddp vs4, v3
+; CHECK-BE-NEXT:    vperm v3, v5, v2, v0
 ; CHECK-BE-NEXT:    xvcvuxddp vs5, v3
-; CHECK-BE-NEXT:    xvcvuxddp vs6, v4
-; CHECK-BE-NEXT:    xvcvuxddp vs7, v5
+; CHECK-BE-NEXT:    vperm v3, v5, v2, v1
+; CHECK-BE-NEXT:    vperm v2, v5, v2, v6
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs6, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs7, v2
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
 ; CHECK-BE-NEXT:    stxv vs7, 112(r3)
 ; CHECK-BE-NEXT:    stxv vs6, 96(r3)
-; CHECK-BE-NEXT:    stxv vs5, 80(r3)
-; CHECK-BE-NEXT:    stxv vs4, 64(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i16>, <16 x i16>* %0, align 32
@@ -386,22 +386,22 @@ define <2 x double> @test2elt_signed(i32
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
-; CHECK-P9-NEXT:    mtvsrws v3, r3
-; CHECK-P9-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
-; CHECK-P9-NEXT:    lxvx v2, 0, r4
-; CHECK-P9-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r3
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-NEXT:    mtvsrws v3, r3
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r4
-; CHECK-BE-NEXT:    vperm v2, v3, v3, v2
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp v2, v2
 ; CHECK-BE-NEXT:    blr
@@ -443,41 +443,41 @@ define void @test4elt_signed(<4 x double
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI5_1 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r4
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI5_1 at toc@l
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    lxvx v2, 0, r5
-; CHECK-P9-NEXT:    lxvx v3, 0, r6
-; CHECK-P9-NEXT:    vperm v2, v4, v4, v2
-; CHECK-P9-NEXT:    vperm v3, v4, v4, v3
-; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
 ; CHECK-P9-NEXT:    vextsh2d v3, v3
-; CHECK-P9-NEXT:    xvcvsxddp vs0, v2
-; CHECK-P9-NEXT:    xvcvsxddp vs1, v3
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI5_1 at toc@ha
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI5_1 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r5
-; CHECK-BE-NEXT:    lxvx v3, 0, r6
-; CHECK-BE-NEXT:    vperm v2, v5, v4, v2
-; CHECK-BE-NEXT:    vperm v3, v4, v4, v3
-; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v4, 0, r4
+; CHECK-BE-NEXT:    xxlxor v3, v3, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
-; CHECK-BE-NEXT:    xvcvsxddp vs0, v2
-; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v2
 ; CHECK-BE-NEXT:    stxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -538,66 +538,66 @@ define void @test8elt_signed(<8 x double
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI6_1 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI6_2 at toc@ha
-; CHECK-P9-NEXT:    addis r7, r2, .LCPI6_3 at toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI6_1 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI6_2 at toc@l
-; CHECK-P9-NEXT:    addi r7, r7, .LCPI6_3 at toc@l
-; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    lxvx v4, 0, r5
-; CHECK-P9-NEXT:    lxvx v5, 0, r6
-; CHECK-P9-NEXT:    lxvx v0, 0, r7
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
 ; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
-; CHECK-P9-NEXT:    vperm v4, v2, v2, v4
-; CHECK-P9-NEXT:    vperm v5, v2, v2, v5
-; CHECK-P9-NEXT:    vperm v2, v2, v2, v0
 ; CHECK-P9-NEXT:    vextsh2d v3, v3
-; CHECK-P9-NEXT:    vextsh2d v4, v4
-; CHECK-P9-NEXT:    vextsh2d v5, v5
-; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
-; CHECK-P9-NEXT:    xvcvsxddp vs1, v4
-; CHECK-P9-NEXT:    xvcvsxddp vs2, v5
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_2 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_2 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    vextsh2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_3 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    vextsh2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI6_1 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI6_2 at toc@ha
-; CHECK-BE-NEXT:    addis r7, r2, .LCPI6_3 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v1, v1, v1
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI6_1 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI6_2 at toc@l
-; CHECK-BE-NEXT:    addi r7, r7, .LCPI6_3 at toc@l
-; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    lxvx v4, 0, r5
-; CHECK-BE-NEXT:    lxvx v5, 0, r6
-; CHECK-BE-NEXT:    lxvx v0, 0, r7
-; CHECK-BE-NEXT:    vperm v3, v1, v2, v3
-; CHECK-BE-NEXT:    vperm v4, v1, v2, v4
-; CHECK-BE-NEXT:    vperm v5, v2, v2, v5
-; CHECK-BE-NEXT:    vperm v2, v2, v2, v0
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
-; CHECK-BE-NEXT:    vextsh2d v4, v4
-; CHECK-BE-NEXT:    vextsh2d v5, v5
-; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
-; CHECK-BE-NEXT:    xvcvsxddp vs1, v4
-; CHECK-BE-NEXT:    xvcvsxddp vs2, v5
-; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
-; CHECK-BE-NEXT:    stxv vs1, 48(r3)
-; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_2 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_2 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vextsh2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_3 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_3 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    vextsh2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = sitofp <8 x i16> %a to <8 x double>
@@ -686,102 +686,102 @@ define void @test16elt_signed(<16 x doub
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI7_1 at toc@ha
-; CHECK-P9-NEXT:    addis r7, r2, .LCPI7_2 at toc@ha
-; CHECK-P9-NEXT:    addis r8, r2, .LCPI7_3 at toc@ha
-; CHECK-P9-NEXT:    lxv v0, 0(r4)
-; CHECK-P9-NEXT:    lxv v1, 16(r4)
 ; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI7_1 at toc@l
-; CHECK-P9-NEXT:    addi r7, r7, .LCPI7_2 at toc@l
-; CHECK-P9-NEXT:    addi r8, r8, .LCPI7_3 at toc@l
-; CHECK-P9-NEXT:    lxvx v2, 0, r5
-; CHECK-P9-NEXT:    lxvx v3, 0, r6
-; CHECK-P9-NEXT:    lxvx v4, 0, r7
-; CHECK-P9-NEXT:    lxvx v5, 0, r8
-; CHECK-P9-NEXT:    vperm v6, v0, v0, v2
-; CHECK-P9-NEXT:    vperm v7, v0, v0, v3
-; CHECK-P9-NEXT:    vperm v8, v0, v0, v4
-; CHECK-P9-NEXT:    vperm v0, v0, v0, v5
-; CHECK-P9-NEXT:    vperm v2, v1, v1, v2
-; CHECK-P9-NEXT:    vperm v3, v1, v1, v3
-; CHECK-P9-NEXT:    vperm v4, v1, v1, v4
-; CHECK-P9-NEXT:    vperm v5, v1, v1, v5
-; CHECK-P9-NEXT:    vextsh2d v1, v6
-; CHECK-P9-NEXT:    vextsh2d v6, v7
-; CHECK-P9-NEXT:    vextsh2d v7, v8
-; CHECK-P9-NEXT:    vextsh2d v0, v0
-; CHECK-P9-NEXT:    vextsh2d v2, v2
-; CHECK-P9-NEXT:    vextsh2d v3, v3
+; CHECK-P9-NEXT:    lxv v2, 0(r4)
+; CHECK-P9-NEXT:    lxvx v3, 0, r5
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
+; CHECK-P9-NEXT:    lxvx v5, 0, r5
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_2 at toc@ha
+; CHECK-P9-NEXT:    vperm v4, v2, v2, v3
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_2 at toc@l
 ; CHECK-P9-NEXT:    vextsh2d v4, v4
-; CHECK-P9-NEXT:    vextsh2d v5, v5
-; CHECK-P9-NEXT:    xvcvsxddp vs0, v1
-; CHECK-P9-NEXT:    xvcvsxddp vs1, v6
-; CHECK-P9-NEXT:    xvcvsxddp vs2, v7
-; CHECK-P9-NEXT:    xvcvsxddp vs3, v0
-; CHECK-P9-NEXT:    xvcvsxddp vs4, v2
-; CHECK-P9-NEXT:    xvcvsxddp vs5, v3
-; CHECK-P9-NEXT:    xvcvsxddp vs6, v4
-; CHECK-P9-NEXT:    xvcvsxddp vs7, v5
+; CHECK-P9-NEXT:    lxvx v0, 0, r5
+; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_3 at toc@ha
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v4
+; CHECK-P9-NEXT:    vperm v4, v2, v2, v5
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_3 at toc@l
+; CHECK-P9-NEXT:    lxvx v1, 0, r5
+; CHECK-P9-NEXT:    vextsh2d v4, v4
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v4
+; CHECK-P9-NEXT:    vperm v4, v2, v2, v0
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v1
+; CHECK-P9-NEXT:    vextsh2d v4, v4
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v4
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
+; CHECK-P9-NEXT:    vperm v2, v4, v4, v3
+; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    xvcvsxddp vs4, v2
+; CHECK-P9-NEXT:    vperm v2, v4, v4, v5
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs5, v2
+; CHECK-P9-NEXT:    vperm v2, v4, v4, v0
+; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs6, v2
+; CHECK-P9-NEXT:    vperm v2, v4, v4, v1
+; CHECK-P9-NEXT:    vextsh2d v2, v2
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    xvcvsxddp vs7, v2
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs7, 112(r3)
-; CHECK-P9-NEXT:    stxv vs6, 96(r3)
-; CHECK-P9-NEXT:    stxv vs5, 80(r3)
-; CHECK-P9-NEXT:    stxv vs4, 64(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI7_1 at toc@ha
-; CHECK-BE-NEXT:    addis r7, r2, .LCPI7_2 at toc@ha
-; CHECK-BE-NEXT:    addis r8, r2, .LCPI7_3 at toc@ha
-; CHECK-BE-NEXT:    lxv v2, 16(r4)
-; CHECK-BE-NEXT:    lxv v3, 0(r4)
-; CHECK-BE-NEXT:    xxlxor v6, v6, v6
 ; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI7_1 at toc@l
-; CHECK-BE-NEXT:    addi r7, r7, .LCPI7_2 at toc@l
-; CHECK-BE-NEXT:    addi r8, r8, .LCPI7_3 at toc@l
-; CHECK-BE-NEXT:    lxvx v4, 0, r5
-; CHECK-BE-NEXT:    lxvx v5, 0, r6
-; CHECK-BE-NEXT:    lxvx v0, 0, r7
-; CHECK-BE-NEXT:    lxvx v1, 0, r8
-; CHECK-BE-NEXT:    vperm v7, v6, v3, v4
-; CHECK-BE-NEXT:    vperm v8, v6, v3, v5
-; CHECK-BE-NEXT:    vperm v4, v6, v2, v4
-; CHECK-BE-NEXT:    vperm v5, v6, v2, v5
-; CHECK-BE-NEXT:    vperm v6, v3, v3, v0
-; CHECK-BE-NEXT:    vperm v3, v3, v3, v1
-; CHECK-BE-NEXT:    vperm v0, v2, v2, v0
-; CHECK-BE-NEXT:    vperm v2, v2, v2, v1
-; CHECK-BE-NEXT:    vextsh2d v1, v7
-; CHECK-BE-NEXT:    vextsh2d v7, v8
-; CHECK-BE-NEXT:    vextsh2d v4, v4
-; CHECK-BE-NEXT:    vextsh2d v5, v5
-; CHECK-BE-NEXT:    vextsh2d v6, v6
-; CHECK-BE-NEXT:    vextsh2d v3, v3
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
+; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v1, 16(r4)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    vperm v0, v5, v4, v2
+; CHECK-BE-NEXT:    lxvx v3, 0, r5
+; CHECK-BE-NEXT:    vperm v2, v5, v1, v2
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
 ; CHECK-BE-NEXT:    vextsh2d v0, v0
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v2
+; CHECK-BE-NEXT:    vperm v2, v5, v1, v3
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
-; CHECK-BE-NEXT:    xvcvsxddp vs0, v1
-; CHECK-BE-NEXT:    xvcvsxddp vs1, v7
-; CHECK-BE-NEXT:    xvcvsxddp vs2, v4
-; CHECK-BE-NEXT:    xvcvsxddp vs3, v5
-; CHECK-BE-NEXT:    xvcvsxddp vs4, v6
-; CHECK-BE-NEXT:    xvcvsxddp vs5, v3
-; CHECK-BE-NEXT:    xvcvsxddp vs6, v0
-; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
-; CHECK-BE-NEXT:    stxv vs3, 112(r3)
 ; CHECK-BE-NEXT:    stxv vs2, 80(r3)
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
+; CHECK-BE-NEXT:    lxvx v2, 0, r4
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v0
+; CHECK-BE-NEXT:    vperm v0, v5, v4, v3
+; CHECK-BE-NEXT:    vperm v3, v4, v4, v2
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
+; CHECK-BE-NEXT:    vextsh2d v0, v0
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v0
 ; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    vextsh2d v3, v3
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
+; CHECK-BE-NEXT:    xvcvsxddp vs4, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v1, v1, v2
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs6, v2
+; CHECK-BE-NEXT:    vperm v2, v1, v1, v3
+; CHECK-BE-NEXT:    vperm v4, v4, v4, v3
+; CHECK-BE-NEXT:    vextsh2d v4, v4
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs5, v4
+; CHECK-BE-NEXT:    stxv vs3, 112(r3)
+; CHECK-BE-NEXT:    stxv vs6, 64(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    stxv vs7, 96(r3)
-; CHECK-BE-NEXT:    stxv vs6, 64(r3)
 ; CHECK-BE-NEXT:    stxv vs5, 32(r3)
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i16>, <16 x i16>* %0, align 32

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll Wed Jan  2 21:04:18 2019
@@ -105,38 +105,38 @@ define void @test8elt(<8 x double>* noal
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r4)
 ; CHECK-P9-NEXT:    lxv vs1, 0(r4)
 ; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
-; CHECK-P9-NEXT:    xxmrghw v3, vs1, vs1
-; CHECK-P9-NEXT:    xxmrglw v4, vs0, vs0
-; CHECK-P9-NEXT:    xxmrghw v5, vs0, vs0
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    xvcvuxwdp vs2, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs1, vs1
+; CHECK-P9-NEXT:    xvcvuxwdp vs1, v2
+; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
+; CHECK-P9-NEXT:    xvcvuxwdp vs3, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs0, vs0
+; CHECK-P9-NEXT:    stxv vs2, 0(r3)
 ; CHECK-P9-NEXT:    xvcvuxwdp vs0, v2
-; CHECK-P9-NEXT:    xvcvuxwdp vs1, v3
-; CHECK-P9-NEXT:    xvcvuxwdp vs2, v4
-; CHECK-P9-NEXT:    xvcvuxwdp vs3, v5
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs3, 32(r3)
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 16(r4)
 ; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
-; CHECK-BE-NEXT:    xxmrglw v3, vs1, vs1
-; CHECK-BE-NEXT:    xxmrghw v4, vs0, vs0
-; CHECK-BE-NEXT:    xxmrglw v5, vs0, vs0
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    xvcvuxwdp vs2, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs1, vs1
+; CHECK-BE-NEXT:    xvcvuxwdp vs1, v2
+; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
+; CHECK-BE-NEXT:    xvcvuxwdp vs3, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs0, vs0
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
 ; CHECK-BE-NEXT:    xvcvuxwdp vs0, v2
-; CHECK-BE-NEXT:    xvcvuxwdp vs1, v3
-; CHECK-BE-NEXT:    xvcvuxwdp vs2, v4
-; CHECK-BE-NEXT:    xvcvuxwdp vs3, v5
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x i32>, <8 x i32>* %0, align 32
@@ -195,66 +195,66 @@ define void @test16elt(<16 x double>* no
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r4)
-; CHECK-P9-NEXT:    lxv vs1, 0(r4)
-; CHECK-P9-NEXT:    lxv vs2, 48(r4)
-; CHECK-P9-NEXT:    lxv vs3, 32(r4)
-; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
-; CHECK-P9-NEXT:    xxmrghw v3, vs1, vs1
-; CHECK-P9-NEXT:    xxmrglw v4, vs0, vs0
-; CHECK-P9-NEXT:    xxmrghw v5, vs0, vs0
-; CHECK-P9-NEXT:    xxmrglw v0, vs3, vs3
-; CHECK-P9-NEXT:    xxmrghw v1, vs3, vs3
-; CHECK-P9-NEXT:    xxmrglw v6, vs2, vs2
-; CHECK-P9-NEXT:    xxmrghw v7, vs2, vs2
+; CHECK-P9-NEXT:    lxv vs0, 0(r4)
+; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs4, 48(r4)
+; CHECK-P9-NEXT:    xvcvuxwdp vs1, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs0, vs0
+; CHECK-P9-NEXT:    lxv vs5, 32(r4)
 ; CHECK-P9-NEXT:    xvcvuxwdp vs0, v2
-; CHECK-P9-NEXT:    xvcvuxwdp vs1, v3
-; CHECK-P9-NEXT:    xvcvuxwdp vs2, v4
-; CHECK-P9-NEXT:    xvcvuxwdp vs3, v5
-; CHECK-P9-NEXT:    xvcvuxwdp vs4, v0
-; CHECK-P9-NEXT:    xvcvuxwdp vs5, v1
-; CHECK-P9-NEXT:    xvcvuxwdp vs6, v6
-; CHECK-P9-NEXT:    xvcvuxwdp vs7, v7
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs7, 112(r3)
-; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    xxmrglw v2, vs2, vs2
+; CHECK-P9-NEXT:    xvcvuxwdp vs3, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs2, vs2
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    xvcvuxwdp vs2, v2
+; CHECK-P9-NEXT:    xxmrglw v2, vs5, vs5
+; CHECK-P9-NEXT:    xvcvuxwdp vs6, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs5, vs5
+; CHECK-P9-NEXT:    stxv vs3, 32(r3)
+; CHECK-P9-NEXT:    stxv vs2, 48(r3)
+; CHECK-P9-NEXT:    xvcvuxwdp vs5, v2
+; CHECK-P9-NEXT:    xxmrglw v2, vs4, vs4
+; CHECK-P9-NEXT:    xvcvuxwdp vs7, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs4, vs4
+; CHECK-P9-NEXT:    stxv vs6, 64(r3)
 ; CHECK-P9-NEXT:    stxv vs5, 80(r3)
-; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    xvcvuxwdp vs4, v2
+; CHECK-P9-NEXT:    stxv vs7, 96(r3)
+; CHECK-P9-NEXT:    stxv vs4, 112(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 16(r4)
-; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    lxv vs2, 48(r4)
-; CHECK-BE-NEXT:    lxv vs3, 32(r4)
-; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
-; CHECK-BE-NEXT:    xxmrglw v3, vs1, vs1
-; CHECK-BE-NEXT:    xxmrghw v4, vs0, vs0
-; CHECK-BE-NEXT:    xxmrglw v5, vs0, vs0
-; CHECK-BE-NEXT:    xxmrghw v0, vs3, vs3
-; CHECK-BE-NEXT:    xxmrglw v1, vs3, vs3
-; CHECK-BE-NEXT:    xxmrghw v6, vs2, vs2
-; CHECK-BE-NEXT:    xxmrglw v7, vs2, vs2
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    xvcvuxwdp vs1, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs0, vs0
+; CHECK-BE-NEXT:    lxv vs5, 32(r4)
 ; CHECK-BE-NEXT:    xvcvuxwdp vs0, v2
-; CHECK-BE-NEXT:    xvcvuxwdp vs1, v3
-; CHECK-BE-NEXT:    xvcvuxwdp vs2, v4
-; CHECK-BE-NEXT:    xvcvuxwdp vs3, v5
-; CHECK-BE-NEXT:    xvcvuxwdp vs4, v0
-; CHECK-BE-NEXT:    xvcvuxwdp vs5, v1
-; CHECK-BE-NEXT:    xvcvuxwdp vs6, v6
-; CHECK-BE-NEXT:    xvcvuxwdp vs7, v7
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs7, 112(r3)
-; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    xxmrghw v2, vs2, vs2
+; CHECK-BE-NEXT:    xvcvuxwdp vs3, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs2, vs2
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    xvcvuxwdp vs2, v2
+; CHECK-BE-NEXT:    xxmrghw v2, vs5, vs5
+; CHECK-BE-NEXT:    xvcvuxwdp vs6, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs5, vs5
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 48(r3)
+; CHECK-BE-NEXT:    xvcvuxwdp vs5, v2
+; CHECK-BE-NEXT:    xxmrghw v2, vs4, vs4
+; CHECK-BE-NEXT:    xvcvuxwdp vs7, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs4, vs4
+; CHECK-BE-NEXT:    stxv vs6, 64(r3)
 ; CHECK-BE-NEXT:    stxv vs5, 80(r3)
-; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    xvcvuxwdp vs4, v2
+; CHECK-BE-NEXT:    stxv vs7, 96(r3)
+; CHECK-BE-NEXT:    stxv vs4, 112(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i32>, <16 x i32>* %0, align 64
@@ -359,38 +359,38 @@ define void @test8elt_signed(<8 x double
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r4)
 ; CHECK-P9-NEXT:    lxv vs1, 0(r4)
 ; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
-; CHECK-P9-NEXT:    xxmrghw v3, vs1, vs1
-; CHECK-P9-NEXT:    xxmrglw v4, vs0, vs0
-; CHECK-P9-NEXT:    xxmrghw v5, vs0, vs0
+; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    xvcvsxwdp vs2, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs1, vs1
+; CHECK-P9-NEXT:    xvcvsxwdp vs1, v2
+; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
+; CHECK-P9-NEXT:    xvcvsxwdp vs3, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs0, vs0
+; CHECK-P9-NEXT:    stxv vs2, 0(r3)
 ; CHECK-P9-NEXT:    xvcvsxwdp vs0, v2
-; CHECK-P9-NEXT:    xvcvsxwdp vs1, v3
-; CHECK-P9-NEXT:    xvcvsxwdp vs2, v4
-; CHECK-P9-NEXT:    xvcvsxwdp vs3, v5
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    stxv vs3, 32(r3)
+; CHECK-P9-NEXT:    stxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 16(r4)
 ; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
-; CHECK-BE-NEXT:    xxmrglw v3, vs1, vs1
-; CHECK-BE-NEXT:    xxmrghw v4, vs0, vs0
-; CHECK-BE-NEXT:    xxmrglw v5, vs0, vs0
+; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    xvcvsxwdp vs2, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs1, vs1
+; CHECK-BE-NEXT:    xvcvsxwdp vs1, v2
+; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
+; CHECK-BE-NEXT:    xvcvsxwdp vs3, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs0, vs0
+; CHECK-BE-NEXT:    stxv vs2, 0(r3)
 ; CHECK-BE-NEXT:    xvcvsxwdp vs0, v2
-; CHECK-BE-NEXT:    xvcvsxwdp vs1, v3
-; CHECK-BE-NEXT:    xvcvsxwdp vs2, v4
-; CHECK-BE-NEXT:    xvcvsxwdp vs3, v5
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs0, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x i32>, <8 x i32>* %0, align 32
@@ -449,66 +449,66 @@ define void @test16elt_signed(<16 x doub
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 16(r4)
-; CHECK-P9-NEXT:    lxv vs1, 0(r4)
-; CHECK-P9-NEXT:    lxv vs2, 48(r4)
-; CHECK-P9-NEXT:    lxv vs3, 32(r4)
-; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
-; CHECK-P9-NEXT:    xxmrghw v3, vs1, vs1
-; CHECK-P9-NEXT:    xxmrglw v4, vs0, vs0
-; CHECK-P9-NEXT:    xxmrghw v5, vs0, vs0
-; CHECK-P9-NEXT:    xxmrglw v0, vs3, vs3
-; CHECK-P9-NEXT:    xxmrghw v1, vs3, vs3
-; CHECK-P9-NEXT:    xxmrglw v6, vs2, vs2
-; CHECK-P9-NEXT:    xxmrghw v7, vs2, vs2
+; CHECK-P9-NEXT:    lxv vs0, 0(r4)
+; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
+; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs4, 48(r4)
+; CHECK-P9-NEXT:    xvcvsxwdp vs1, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs0, vs0
+; CHECK-P9-NEXT:    lxv vs5, 32(r4)
 ; CHECK-P9-NEXT:    xvcvsxwdp vs0, v2
-; CHECK-P9-NEXT:    xvcvsxwdp vs1, v3
-; CHECK-P9-NEXT:    xvcvsxwdp vs2, v4
-; CHECK-P9-NEXT:    xvcvsxwdp vs3, v5
-; CHECK-P9-NEXT:    xvcvsxwdp vs4, v0
-; CHECK-P9-NEXT:    xvcvsxwdp vs5, v1
-; CHECK-P9-NEXT:    xvcvsxwdp vs6, v6
-; CHECK-P9-NEXT:    xvcvsxwdp vs7, v7
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs7, 112(r3)
-; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    xxmrglw v2, vs2, vs2
+; CHECK-P9-NEXT:    xvcvsxwdp vs3, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs2, vs2
+; CHECK-P9-NEXT:    stxv vs1, 0(r3)
+; CHECK-P9-NEXT:    stxv vs0, 16(r3)
+; CHECK-P9-NEXT:    xvcvsxwdp vs2, v2
+; CHECK-P9-NEXT:    xxmrglw v2, vs5, vs5
+; CHECK-P9-NEXT:    xvcvsxwdp vs6, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs5, vs5
+; CHECK-P9-NEXT:    stxv vs3, 32(r3)
+; CHECK-P9-NEXT:    stxv vs2, 48(r3)
+; CHECK-P9-NEXT:    xvcvsxwdp vs5, v2
+; CHECK-P9-NEXT:    xxmrglw v2, vs4, vs4
+; CHECK-P9-NEXT:    xvcvsxwdp vs7, v2
+; CHECK-P9-NEXT:    xxmrghw v2, vs4, vs4
+; CHECK-P9-NEXT:    stxv vs6, 64(r3)
 ; CHECK-P9-NEXT:    stxv vs5, 80(r3)
-; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    xvcvsxwdp vs4, v2
+; CHECK-P9-NEXT:    stxv vs7, 96(r3)
+; CHECK-P9-NEXT:    stxv vs4, 112(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv vs0, 16(r4)
-; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    lxv vs2, 48(r4)
-; CHECK-BE-NEXT:    lxv vs3, 32(r4)
-; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
-; CHECK-BE-NEXT:    xxmrglw v3, vs1, vs1
-; CHECK-BE-NEXT:    xxmrghw v4, vs0, vs0
-; CHECK-BE-NEXT:    xxmrglw v5, vs0, vs0
-; CHECK-BE-NEXT:    xxmrghw v0, vs3, vs3
-; CHECK-BE-NEXT:    xxmrglw v1, vs3, vs3
-; CHECK-BE-NEXT:    xxmrghw v6, vs2, vs2
-; CHECK-BE-NEXT:    xxmrglw v7, vs2, vs2
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
+; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    xvcvsxwdp vs1, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs0, vs0
+; CHECK-BE-NEXT:    lxv vs5, 32(r4)
 ; CHECK-BE-NEXT:    xvcvsxwdp vs0, v2
-; CHECK-BE-NEXT:    xvcvsxwdp vs1, v3
-; CHECK-BE-NEXT:    xvcvsxwdp vs2, v4
-; CHECK-BE-NEXT:    xvcvsxwdp vs3, v5
-; CHECK-BE-NEXT:    xvcvsxwdp vs4, v0
-; CHECK-BE-NEXT:    xvcvsxwdp vs5, v1
-; CHECK-BE-NEXT:    xvcvsxwdp vs6, v6
-; CHECK-BE-NEXT:    xvcvsxwdp vs7, v7
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs7, 112(r3)
-; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    xxmrghw v2, vs2, vs2
+; CHECK-BE-NEXT:    xvcvsxwdp vs3, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs2, vs2
+; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    xvcvsxwdp vs2, v2
+; CHECK-BE-NEXT:    xxmrghw v2, vs5, vs5
+; CHECK-BE-NEXT:    xvcvsxwdp vs6, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs5, vs5
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs2, 48(r3)
+; CHECK-BE-NEXT:    xvcvsxwdp vs5, v2
+; CHECK-BE-NEXT:    xxmrghw v2, vs4, vs4
+; CHECK-BE-NEXT:    xvcvsxwdp vs7, v2
+; CHECK-BE-NEXT:    xxmrglw v2, vs4, vs4
+; CHECK-BE-NEXT:    stxv vs6, 64(r3)
 ; CHECK-BE-NEXT:    stxv vs5, 80(r3)
-; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    xvcvsxwdp vs4, v2
+; CHECK-BE-NEXT:    stxv vs7, 96(r3)
+; CHECK-BE-NEXT:    stxv vs4, 112(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i32>, <16 x i32>* %0, align 64

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll Wed Jan  2 21:04:18 2019
@@ -28,14 +28,14 @@ define i64 @test2elt(<2 x i64> %a) local
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    xxlor vs1, v2, v2
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs1, f1
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT:    xxlor vs0, v2, v2
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
@@ -73,24 +73,24 @@ define <4 x float> @test4elt(<4 x i64>*
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v2, 16(r3)
 ; CHECK-P9-NEXT:    lxv v3, 0(r3)
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v3
-; CHECK-P9-NEXT:    xvcvuxdsp vs1, v2
+; CHECK-P9-NEXT:    lxv v2, 16(r3)
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v2
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT:    vpkudum v2, v3, v2
+; CHECK-P9-NEXT:    vpkudum v2, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
 ; CHECK-BE-NEXT:    lxv v3, 16(r3)
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v3
-; CHECK-BE-NEXT:    xvcvuxdsp vs1, v2
+; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v2
 ; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT:    vpkudum v2, v3, v2
+; CHECK-BE-NEXT:    vpkudum v2, v2, v3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <4 x i64>, <4 x i64>* %0, align 32
@@ -128,42 +128,42 @@ define void @test8elt(<8 x float>* noali
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v2, 48(r4)
-; CHECK-P9-NEXT:    lxv v3, 32(r4)
-; CHECK-P9-NEXT:    lxv v4, 16(r4)
 ; CHECK-P9-NEXT:    lxv v5, 0(r4)
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v5
-; CHECK-P9-NEXT:    xvcvuxdsp vs1, v4
-; CHECK-P9-NEXT:    xvcvuxdsp vs2, v3
-; CHECK-P9-NEXT:    xvcvuxdsp vs3, v2
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    xxsldwi v5, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v4
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v3
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    vpkudum v3, v4, v5
+; CHECK-P9-NEXT:    stxv v3, 0(r3)
+; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v2
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxsldwi v4, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxsldwi v5, vs3, vs3, 3
-; CHECK-P9-NEXT:    vpkudum v2, v3, v2
-; CHECK-P9-NEXT:    vpkudum v3, v5, v4
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
-; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    vpkudum v2, v2, v4
+; CHECK-P9-NEXT:    stxv v2, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 32(r4)
-; CHECK-BE-NEXT:    lxv v3, 48(r4)
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v5
-; CHECK-BE-NEXT:    xvcvuxdsp vs1, v4
-; CHECK-BE-NEXT:    xvcvuxdsp vs2, v3
-; CHECK-BE-NEXT:    xvcvuxdsp vs3, v2
+; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    xxsldwi v5, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v4
+; CHECK-BE-NEXT:    lxv v3, 48(r4)
+; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v3
+; CHECK-BE-NEXT:    lxv v2, 32(r4)
+; CHECK-BE-NEXT:    vpkudum v3, v4, v5
+; CHECK-BE-NEXT:    stxv v3, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v2
 ; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxsldwi v4, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxsldwi v5, vs3, vs3, 3
-; CHECK-BE-NEXT:    vpkudum v2, v3, v2
-; CHECK-BE-NEXT:    vpkudum v3, v5, v4
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    vpkudum v2, v2, v4
+; CHECK-BE-NEXT:    stxv v2, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x i64>, <8 x i64>* %0, align 64
@@ -226,74 +226,74 @@ define void @test16elt(<16 x float>* noa
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v2, 48(r4)
-; CHECK-P9-NEXT:    lxv v3, 32(r4)
-; CHECK-P9-NEXT:    lxv v4, 16(r4)
-; CHECK-P9-NEXT:    lxv v5, 0(r4)
-; CHECK-P9-NEXT:    lxv v0, 112(r4)
-; CHECK-P9-NEXT:    lxv v1, 96(r4)
-; CHECK-P9-NEXT:    lxv v6, 80(r4)
-; CHECK-P9-NEXT:    lxv v7, 64(r4)
+; CHECK-P9-NEXT:    lxv v7, 0(r4)
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v7
+; CHECK-P9-NEXT:    lxv v6, 16(r4)
+; CHECK-P9-NEXT:    xxsldwi v7, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v6
+; CHECK-P9-NEXT:    lxv v1, 32(r4)
+; CHECK-P9-NEXT:    xxsldwi v6, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v1
+; CHECK-P9-NEXT:    lxv v0, 48(r4)
+; CHECK-P9-NEXT:    vpkudum v1, v6, v7
+; CHECK-P9-NEXT:    xxsldwi v6, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v0
+; CHECK-P9-NEXT:    lxv v5, 64(r4)
+; CHECK-P9-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v5
-; CHECK-P9-NEXT:    xvcvuxdsp vs1, v4
-; CHECK-P9-NEXT:    xvcvuxdsp vs2, v3
-; CHECK-P9-NEXT:    xvcvuxdsp vs3, v2
-; CHECK-P9-NEXT:    xvcvuxdsp vs4, v7
-; CHECK-P9-NEXT:    xvcvuxdsp vs5, v6
-; CHECK-P9-NEXT:    xvcvuxdsp vs6, v1
-; CHECK-P9-NEXT:    xvcvuxdsp vs7, v0
-; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxsldwi v4, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxsldwi v5, vs3, vs3, 3
-; CHECK-P9-NEXT:    xxsldwi v0, vs4, vs4, 3
-; CHECK-P9-NEXT:    xxsldwi v1, vs5, vs5, 3
-; CHECK-P9-NEXT:    xxsldwi v6, vs6, vs6, 3
-; CHECK-P9-NEXT:    xxsldwi v7, vs7, vs7, 3
-; CHECK-P9-NEXT:    vpkudum v2, v3, v2
-; CHECK-P9-NEXT:    vpkudum v3, v5, v4
-; CHECK-P9-NEXT:    vpkudum v4, v1, v0
-; CHECK-P9-NEXT:    vpkudum v5, v7, v6
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
-; CHECK-P9-NEXT:    stxv v2, 0(r3)
-; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    lxv v4, 80(r4)
+; CHECK-P9-NEXT:    vpkudum v0, v0, v6
+; CHECK-P9-NEXT:    xxsldwi v5, vs0, vs0, 3
+; CHECK-P9-NEXT:    lxv v3, 96(r4)
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v4
+; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v3
+; CHECK-P9-NEXT:    lxv v2, 112(r4)
+; CHECK-P9-NEXT:    stxv v0, 16(r3)
+; CHECK-P9-NEXT:    stxv v1, 0(r3)
+; CHECK-P9-NEXT:    vpkudum v4, v4, v5
 ; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v2
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    vpkudum v2, v2, v3
+; CHECK-P9-NEXT:    stxv v2, 48(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 32(r4)
-; CHECK-BE-NEXT:    lxv v3, 48(r4)
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
-; CHECK-BE-NEXT:    lxv v5, 16(r4)
-; CHECK-BE-NEXT:    lxv v0, 96(r4)
-; CHECK-BE-NEXT:    lxv v1, 112(r4)
-; CHECK-BE-NEXT:    lxv v6, 64(r4)
-; CHECK-BE-NEXT:    lxv v7, 80(r4)
+; CHECK-BE-NEXT:    lxv v7, 16(r4)
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v7
+; CHECK-BE-NEXT:    lxv v6, 0(r4)
+; CHECK-BE-NEXT:    xxsldwi v7, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v6
+; CHECK-BE-NEXT:    lxv v1, 48(r4)
+; CHECK-BE-NEXT:    xxsldwi v6, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v1
+; CHECK-BE-NEXT:    lxv v0, 32(r4)
+; CHECK-BE-NEXT:    vpkudum v1, v6, v7
+; CHECK-BE-NEXT:    xxsldwi v6, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v0
+; CHECK-BE-NEXT:    lxv v5, 80(r4)
+; CHECK-BE-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v5
-; CHECK-BE-NEXT:    xvcvuxdsp vs1, v4
-; CHECK-BE-NEXT:    xvcvuxdsp vs2, v3
-; CHECK-BE-NEXT:    xvcvuxdsp vs3, v2
-; CHECK-BE-NEXT:    xvcvuxdsp vs4, v7
-; CHECK-BE-NEXT:    xvcvuxdsp vs5, v6
-; CHECK-BE-NEXT:    xvcvuxdsp vs6, v1
-; CHECK-BE-NEXT:    xvcvuxdsp vs7, v0
-; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxsldwi v4, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxsldwi v5, vs3, vs3, 3
-; CHECK-BE-NEXT:    xxsldwi v0, vs4, vs4, 3
-; CHECK-BE-NEXT:    xxsldwi v1, vs5, vs5, 3
-; CHECK-BE-NEXT:    xxsldwi v6, vs6, vs6, 3
-; CHECK-BE-NEXT:    xxsldwi v7, vs7, vs7, 3
-; CHECK-BE-NEXT:    vpkudum v2, v3, v2
-; CHECK-BE-NEXT:    vpkudum v3, v5, v4
-; CHECK-BE-NEXT:    vpkudum v4, v1, v0
-; CHECK-BE-NEXT:    vpkudum v5, v7, v6
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    lxv v4, 64(r4)
+; CHECK-BE-NEXT:    vpkudum v0, v0, v6
+; CHECK-BE-NEXT:    xxsldwi v5, vs0, vs0, 3
+; CHECK-BE-NEXT:    lxv v3, 112(r4)
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v4
+; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v3
+; CHECK-BE-NEXT:    lxv v2, 96(r4)
+; CHECK-BE-NEXT:    stxv v0, 16(r3)
+; CHECK-BE-NEXT:    stxv v1, 0(r3)
+; CHECK-BE-NEXT:    vpkudum v4, v4, v5
 ; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v2
+; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-BE-NEXT:    vpkudum v2, v2, v3
+; CHECK-BE-NEXT:    stxv v2, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i64>, <16 x i64>* %0, align 128
@@ -321,14 +321,14 @@ define i64 @test2elt_signed(<2 x i64> %a
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
-; CHECK-P9-NEXT:    xxlor vs1, v2, v2
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs1, f1
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT:    xxlor vs0, v2, v2
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
@@ -366,24 +366,24 @@ define <4 x float> @test4elt_signed(<4 x
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v2, 16(r3)
 ; CHECK-P9-NEXT:    lxv v3, 0(r3)
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v3
-; CHECK-P9-NEXT:    xvcvsxdsp vs1, v2
+; CHECK-P9-NEXT:    lxv v2, 16(r3)
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v2
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT:    vpkudum v2, v3, v2
+; CHECK-P9-NEXT:    vpkudum v2, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
 ; CHECK-BE-NEXT:    lxv v3, 16(r3)
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v3
-; CHECK-BE-NEXT:    xvcvsxdsp vs1, v2
+; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v2
 ; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT:    vpkudum v2, v3, v2
+; CHECK-BE-NEXT:    vpkudum v2, v2, v3
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <4 x i64>, <4 x i64>* %0, align 32
@@ -421,42 +421,42 @@ define void @test8elt_signed(<8 x float>
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v2, 48(r4)
-; CHECK-P9-NEXT:    lxv v3, 32(r4)
-; CHECK-P9-NEXT:    lxv v4, 16(r4)
 ; CHECK-P9-NEXT:    lxv v5, 0(r4)
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v5
-; CHECK-P9-NEXT:    xvcvsxdsp vs1, v4
-; CHECK-P9-NEXT:    xvcvsxdsp vs2, v3
-; CHECK-P9-NEXT:    xvcvsxdsp vs3, v2
+; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    xxsldwi v5, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v4
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v3
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    vpkudum v3, v4, v5
+; CHECK-P9-NEXT:    stxv v3, 0(r3)
+; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v2
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxsldwi v4, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxsldwi v5, vs3, vs3, 3
-; CHECK-P9-NEXT:    vpkudum v2, v3, v2
-; CHECK-P9-NEXT:    vpkudum v3, v5, v4
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
-; CHECK-P9-NEXT:    stxv v2, 0(r3)
+; CHECK-P9-NEXT:    vpkudum v2, v2, v4
+; CHECK-P9-NEXT:    stxv v2, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 32(r4)
-; CHECK-BE-NEXT:    lxv v3, 48(r4)
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v5
-; CHECK-BE-NEXT:    xvcvsxdsp vs1, v4
-; CHECK-BE-NEXT:    xvcvsxdsp vs2, v3
-; CHECK-BE-NEXT:    xvcvsxdsp vs3, v2
+; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    xxsldwi v5, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v4
+; CHECK-BE-NEXT:    lxv v3, 48(r4)
+; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v3
+; CHECK-BE-NEXT:    lxv v2, 32(r4)
+; CHECK-BE-NEXT:    vpkudum v3, v4, v5
+; CHECK-BE-NEXT:    stxv v3, 0(r3)
+; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v2
 ; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxsldwi v4, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxsldwi v5, vs3, vs3, 3
-; CHECK-BE-NEXT:    vpkudum v2, v3, v2
-; CHECK-BE-NEXT:    vpkudum v3, v5, v4
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    vpkudum v2, v2, v4
+; CHECK-BE-NEXT:    stxv v2, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <8 x i64>, <8 x i64>* %0, align 64
@@ -519,74 +519,74 @@ define void @test16elt_signed(<16 x floa
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v2, 48(r4)
-; CHECK-P9-NEXT:    lxv v3, 32(r4)
-; CHECK-P9-NEXT:    lxv v4, 16(r4)
-; CHECK-P9-NEXT:    lxv v5, 0(r4)
-; CHECK-P9-NEXT:    lxv v0, 112(r4)
-; CHECK-P9-NEXT:    lxv v1, 96(r4)
-; CHECK-P9-NEXT:    lxv v6, 80(r4)
-; CHECK-P9-NEXT:    lxv v7, 64(r4)
+; CHECK-P9-NEXT:    lxv v7, 0(r4)
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v7
+; CHECK-P9-NEXT:    lxv v6, 16(r4)
+; CHECK-P9-NEXT:    xxsldwi v7, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v6
+; CHECK-P9-NEXT:    lxv v1, 32(r4)
+; CHECK-P9-NEXT:    xxsldwi v6, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v1
+; CHECK-P9-NEXT:    lxv v0, 48(r4)
+; CHECK-P9-NEXT:    vpkudum v1, v6, v7
+; CHECK-P9-NEXT:    xxsldwi v6, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v0
+; CHECK-P9-NEXT:    lxv v5, 64(r4)
+; CHECK-P9-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v5
-; CHECK-P9-NEXT:    xvcvsxdsp vs1, v4
-; CHECK-P9-NEXT:    xvcvsxdsp vs2, v3
-; CHECK-P9-NEXT:    xvcvsxdsp vs3, v2
-; CHECK-P9-NEXT:    xvcvsxdsp vs4, v7
-; CHECK-P9-NEXT:    xvcvsxdsp vs5, v6
-; CHECK-P9-NEXT:    xvcvsxdsp vs6, v1
-; CHECK-P9-NEXT:    xvcvsxdsp vs7, v0
-; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-P9-NEXT:    xxsldwi v4, vs2, vs2, 3
-; CHECK-P9-NEXT:    xxsldwi v5, vs3, vs3, 3
-; CHECK-P9-NEXT:    xxsldwi v0, vs4, vs4, 3
-; CHECK-P9-NEXT:    xxsldwi v1, vs5, vs5, 3
-; CHECK-P9-NEXT:    xxsldwi v6, vs6, vs6, 3
-; CHECK-P9-NEXT:    xxsldwi v7, vs7, vs7, 3
-; CHECK-P9-NEXT:    vpkudum v2, v3, v2
-; CHECK-P9-NEXT:    vpkudum v3, v5, v4
-; CHECK-P9-NEXT:    vpkudum v4, v1, v0
-; CHECK-P9-NEXT:    vpkudum v5, v7, v6
-; CHECK-P9-NEXT:    stxv v3, 16(r3)
-; CHECK-P9-NEXT:    stxv v2, 0(r3)
-; CHECK-P9-NEXT:    stxv v5, 48(r3)
+; CHECK-P9-NEXT:    lxv v4, 80(r4)
+; CHECK-P9-NEXT:    vpkudum v0, v0, v6
+; CHECK-P9-NEXT:    xxsldwi v5, vs0, vs0, 3
+; CHECK-P9-NEXT:    lxv v3, 96(r4)
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v4
+; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v3
+; CHECK-P9-NEXT:    lxv v2, 112(r4)
+; CHECK-P9-NEXT:    stxv v0, 16(r3)
+; CHECK-P9-NEXT:    stxv v1, 0(r3)
+; CHECK-P9-NEXT:    vpkudum v4, v4, v5
 ; CHECK-P9-NEXT:    stxv v4, 32(r3)
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v2
+; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P9-NEXT:    vpkudum v2, v2, v3
+; CHECK-P9-NEXT:    stxv v2, 48(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 32(r4)
-; CHECK-BE-NEXT:    lxv v3, 48(r4)
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
-; CHECK-BE-NEXT:    lxv v5, 16(r4)
-; CHECK-BE-NEXT:    lxv v0, 96(r4)
-; CHECK-BE-NEXT:    lxv v1, 112(r4)
-; CHECK-BE-NEXT:    lxv v6, 64(r4)
-; CHECK-BE-NEXT:    lxv v7, 80(r4)
+; CHECK-BE-NEXT:    lxv v7, 16(r4)
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v7
+; CHECK-BE-NEXT:    lxv v6, 0(r4)
+; CHECK-BE-NEXT:    xxsldwi v7, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v6
+; CHECK-BE-NEXT:    lxv v1, 48(r4)
+; CHECK-BE-NEXT:    xxsldwi v6, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v1
+; CHECK-BE-NEXT:    lxv v0, 32(r4)
+; CHECK-BE-NEXT:    vpkudum v1, v6, v7
+; CHECK-BE-NEXT:    xxsldwi v6, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v0
+; CHECK-BE-NEXT:    lxv v5, 80(r4)
+; CHECK-BE-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v5
-; CHECK-BE-NEXT:    xvcvsxdsp vs1, v4
-; CHECK-BE-NEXT:    xvcvsxdsp vs2, v3
-; CHECK-BE-NEXT:    xvcvsxdsp vs3, v2
-; CHECK-BE-NEXT:    xvcvsxdsp vs4, v7
-; CHECK-BE-NEXT:    xvcvsxdsp vs5, v6
-; CHECK-BE-NEXT:    xvcvsxdsp vs6, v1
-; CHECK-BE-NEXT:    xvcvsxdsp vs7, v0
-; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
-; CHECK-BE-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-BE-NEXT:    xxsldwi v4, vs2, vs2, 3
-; CHECK-BE-NEXT:    xxsldwi v5, vs3, vs3, 3
-; CHECK-BE-NEXT:    xxsldwi v0, vs4, vs4, 3
-; CHECK-BE-NEXT:    xxsldwi v1, vs5, vs5, 3
-; CHECK-BE-NEXT:    xxsldwi v6, vs6, vs6, 3
-; CHECK-BE-NEXT:    xxsldwi v7, vs7, vs7, 3
-; CHECK-BE-NEXT:    vpkudum v2, v3, v2
-; CHECK-BE-NEXT:    vpkudum v3, v5, v4
-; CHECK-BE-NEXT:    vpkudum v4, v1, v0
-; CHECK-BE-NEXT:    vpkudum v5, v7, v6
-; CHECK-BE-NEXT:    stxv v3, 16(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v5, 48(r3)
+; CHECK-BE-NEXT:    lxv v4, 64(r4)
+; CHECK-BE-NEXT:    vpkudum v0, v0, v6
+; CHECK-BE-NEXT:    xxsldwi v5, vs0, vs0, 3
+; CHECK-BE-NEXT:    lxv v3, 112(r4)
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v4
+; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v3
+; CHECK-BE-NEXT:    lxv v2, 96(r4)
+; CHECK-BE-NEXT:    stxv v0, 16(r3)
+; CHECK-BE-NEXT:    stxv v1, 0(r3)
+; CHECK-BE-NEXT:    vpkudum v4, v4, v5
 ; CHECK-BE-NEXT:    stxv v4, 32(r3)
+; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 3
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v2
+; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-BE-NEXT:    vpkudum v2, v2, v3
+; CHECK-BE-NEXT:    stxv v2, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i64>, <16 x i64>* %0, align 128

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll Wed Jan  2 21:04:18 2019
@@ -35,20 +35,20 @@ define i64 @test2elt(i16 %a.coerce) loca
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    li r4, 1
 ; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
 ; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 24, 31
-; CHECK-P9-NEXT:    rlwinm r4, r4, 0, 24, 31
 ; CHECK-P9-NEXT:    mtvsrwz f0, r3
-; CHECK-P9-NEXT:    mtvsrwz f1, r4
+; CHECK-P9-NEXT:    li r3, 1
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    vextubrx r3, r3, v2
+; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT:    mtvsrwz f0, r3
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvuxdsp f1, f1
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs1, f1
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
@@ -56,18 +56,18 @@ define i64 @test2elt(i16 %a.coerce) loca
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
 ; CHECK-BE-NEXT:    li r3, 1
-; CHECK-BE-NEXT:    li r4, 0
 ; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
 ; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 24, 31
-; CHECK-BE-NEXT:    rlwinm r4, r4, 0, 24, 31
 ; CHECK-BE-NEXT:    mtvsrwz f0, r3
-; CHECK-BE-NEXT:    mtvsrwz f1, r4
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    xscvuxdsp f0, f0
+; CHECK-BE-NEXT:    vextublx r3, r3, v2
+; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-BE-NEXT:    xscvdpspn v3, f0
+; CHECK-BE-NEXT:    mtvsrwz f0, r3
 ; CHECK-BE-NEXT:    xscvuxdsp f0, f0
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
-; CHECK-BE-NEXT:    xscvdpspn v3, f1
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -92,23 +92,23 @@ define <4 x float> @test4elt(i32 %a.coer
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
-; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r3
 ; CHECK-P9-NEXT:    xxlxor v4, v4, v4
-; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
-; CHECK-P9-NEXT:    lxvx v2, 0, r4
-; CHECK-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r4
-; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -140,39 +140,39 @@ define void @test8elt(<8 x float>* noali
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r4
-; CHECK-P9-NEXT:    xxlxor v5, v5, v5
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI2_1 at toc@l
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    lxvx v2, 0, r5
-; CHECK-P9-NEXT:    lxvx v3, 0, r6
-; CHECK-P9-NEXT:    vperm v2, v5, v4, v2
-; CHECK-P9-NEXT:    vperm v3, v5, v4, v3
-; CHECK-P9-NEXT:    xvcvuxwsp vs0, v2
-; CHECK-P9-NEXT:    xvcvuxwsp vs1, v3
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xvcvuxwsp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI2_1 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r5
-; CHECK-BE-NEXT:    lxvx v3, 0, r6
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    vperm v3, v5, v4, v3
-; CHECK-BE-NEXT:    xvcvuxwsp vs0, v2
-; CHECK-BE-NEXT:    xvcvuxwsp vs1, v3
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xvcvuxwsp vs1, v2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <8 x i8>
@@ -217,59 +217,59 @@ define void @test16elt(<16 x float>* noa
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
-; CHECK-P9-NEXT:    addis r7, r2, .LCPI3_3 at toc@ha
-; CHECK-P9-NEXT:    xxlxor v1, v1, v1
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
-; CHECK-P9-NEXT:    addi r7, r7, .LCPI3_3 at toc@l
-; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    lxvx v4, 0, r5
-; CHECK-P9-NEXT:    lxvx v5, 0, r6
-; CHECK-P9-NEXT:    lxvx v0, 0, r7
-; CHECK-P9-NEXT:    vperm v3, v1, v2, v3
-; CHECK-P9-NEXT:    vperm v4, v1, v2, v4
-; CHECK-P9-NEXT:    vperm v5, v1, v2, v5
-; CHECK-P9-NEXT:    vperm v2, v1, v2, v0
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxwsp vs0, v3
-; CHECK-P9-NEXT:    xvcvuxwsp vs1, v4
-; CHECK-P9-NEXT:    xvcvuxwsp vs2, v5
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xvcvuxwsp vs1, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    xvcvuxwsp vs2, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    xvcvuxwsp vs3, v2
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
-; CHECK-BE-NEXT:    addis r7, r2, .LCPI3_3 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v1, v1, v1
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
-; CHECK-BE-NEXT:    addi r7, r7, .LCPI3_3 at toc@l
-; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    lxvx v4, 0, r5
-; CHECK-BE-NEXT:    lxvx v5, 0, r6
-; CHECK-BE-NEXT:    lxvx v0, 0, r7
-; CHECK-BE-NEXT:    vperm v3, v2, v1, v3
-; CHECK-BE-NEXT:    vperm v4, v1, v2, v4
-; CHECK-BE-NEXT:    vperm v5, v1, v2, v5
-; CHECK-BE-NEXT:    vperm v2, v1, v2, v0
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
-; CHECK-BE-NEXT:    xvcvuxwsp vs1, v4
-; CHECK-BE-NEXT:    xvcvuxwsp vs2, v5
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xvcvuxwsp vs1, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xvcvuxwsp vs2, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    xvcvuxwsp vs3, v2
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = uitofp <16 x i8> %a to <16 x float>
@@ -303,20 +303,20 @@ define i64 @test2elt_signed(i16 %a.coerc
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    li r4, 1
 ; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    vextubrx r4, r4, v2
 ; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    extsb r4, r4
 ; CHECK-P9-NEXT:    mtvsrwa f0, r3
-; CHECK-P9-NEXT:    mtvsrwa f1, r4
+; CHECK-P9-NEXT:    li r3, 1
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
+; CHECK-P9-NEXT:    vextubrx r3, r3, v2
+; CHECK-P9-NEXT:    extsb r3, r3
+; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
+; CHECK-P9-NEXT:    mtvsrwa f0, r3
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvsxdsp f1, f1
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs1, f1
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 1
-; CHECK-P9-NEXT:    xxsldwi v3, vs1, vs1, 1
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
 ; CHECK-P9-NEXT:    blr
 ;
@@ -324,18 +324,18 @@ define i64 @test2elt_signed(i16 %a.coerc
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
 ; CHECK-BE-NEXT:    li r3, 1
-; CHECK-BE-NEXT:    li r4, 0
 ; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    vextublx r4, r4, v2
 ; CHECK-BE-NEXT:    extsb r3, r3
-; CHECK-BE-NEXT:    extsb r4, r4
 ; CHECK-BE-NEXT:    mtvsrwa f0, r3
-; CHECK-BE-NEXT:    mtvsrwa f1, r4
+; CHECK-BE-NEXT:    li r3, 0
+; CHECK-BE-NEXT:    xscvsxdsp f0, f0
+; CHECK-BE-NEXT:    vextublx r3, r3, v2
+; CHECK-BE-NEXT:    extsb r3, r3
+; CHECK-BE-NEXT:    xscvdpspn v3, f0
+; CHECK-BE-NEXT:    mtvsrwa f0, r3
 ; CHECK-BE-NEXT:    xscvsxdsp f0, f0
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
 ; CHECK-BE-NEXT:    xscvdpspn v2, f0
-; CHECK-BE-NEXT:    xscvdpspn v3, f1
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mfvsrd r3, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -363,22 +363,22 @@ define <4 x float> @test4elt_signed(i32
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
-; CHECK-P9-NEXT:    mtvsrws v3, r3
-; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
-; CHECK-P9-NEXT:    lxvx v2, 0, r4
-; CHECK-P9-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r3
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P9-NEXT:    vextsb2w v2, v2
 ; CHECK-P9-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-NEXT:    mtvsrws v3, r3
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r4
-; CHECK-BE-NEXT:    vperm v2, v3, v3, v2
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsb2w v2, v2
 ; CHECK-BE-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
@@ -416,41 +416,41 @@ define void @test8elt_signed(<8 x float>
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI6_1 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r4
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI6_1 at toc@l
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    lxvx v2, 0, r5
-; CHECK-P9-NEXT:    lxvx v3, 0, r6
-; CHECK-P9-NEXT:    vperm v2, v4, v4, v2
-; CHECK-P9-NEXT:    vperm v3, v4, v4, v3
-; CHECK-P9-NEXT:    vextsb2w v2, v2
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
 ; CHECK-P9-NEXT:    vextsb2w v3, v3
-; CHECK-P9-NEXT:    xvcvsxwsp vs0, v2
-; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    xvcvsxwsp vs0, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    vextsb2w v2, v2
+; CHECK-P9-NEXT:    xvcvsxwsp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI6_1 at toc@ha
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI6_1 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r5
-; CHECK-BE-NEXT:    lxvx v3, 0, r6
-; CHECK-BE-NEXT:    vperm v2, v5, v4, v2
-; CHECK-BE-NEXT:    vperm v3, v4, v4, v3
-; CHECK-BE-NEXT:    vextsb2w v2, v2
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v4, 0, r4
+; CHECK-BE-NEXT:    xxlxor v3, v3, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
 ; CHECK-BE-NEXT:    vextsb2w v3, v3
-; CHECK-BE-NEXT:    xvcvsxwsp vs0, v2
-; CHECK-BE-NEXT:    xvcvsxwsp vs1, v3
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
+; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vextsb2w v2, v2
+; CHECK-BE-NEXT:    xvcvsxwsp vs1, v2
 ; CHECK-BE-NEXT:    stxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -505,66 +505,66 @@ define void @test16elt_signed(<16 x floa
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
-; CHECK-P9-NEXT:    addis r7, r2, .LCPI7_3 at toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
-; CHECK-P9-NEXT:    addi r7, r7, .LCPI7_3 at toc@l
-; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    lxvx v4, 0, r5
-; CHECK-P9-NEXT:    lxvx v5, 0, r6
-; CHECK-P9-NEXT:    lxvx v0, 0, r7
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
 ; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
-; CHECK-P9-NEXT:    vperm v4, v2, v2, v4
-; CHECK-P9-NEXT:    vperm v5, v2, v2, v5
-; CHECK-P9-NEXT:    vperm v2, v2, v2, v0
 ; CHECK-P9-NEXT:    vextsb2w v3, v3
-; CHECK-P9-NEXT:    vextsb2w v4, v4
-; CHECK-P9-NEXT:    vextsb2w v5, v5
-; CHECK-P9-NEXT:    vextsb2w v2, v2
 ; CHECK-P9-NEXT:    xvcvsxwsp vs0, v3
-; CHECK-P9-NEXT:    xvcvsxwsp vs1, v4
-; CHECK-P9-NEXT:    xvcvsxwsp vs2, v5
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    vextsb2w v3, v3
+; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    vextsb2w v3, v3
+; CHECK-P9-NEXT:    xvcvsxwsp vs2, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    vextsb2w v2, v2
 ; CHECK-P9-NEXT:    xvcvsxwsp vs3, v2
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
-; CHECK-BE-NEXT:    addis r7, r2, .LCPI7_3 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v1, v1, v1
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
-; CHECK-BE-NEXT:    addi r7, r7, .LCPI7_3 at toc@l
-; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    lxvx v4, 0, r5
-; CHECK-BE-NEXT:    lxvx v5, 0, r6
-; CHECK-BE-NEXT:    lxvx v0, 0, r7
-; CHECK-BE-NEXT:    vperm v3, v1, v2, v3
-; CHECK-BE-NEXT:    vperm v4, v1, v2, v4
-; CHECK-BE-NEXT:    vperm v5, v2, v2, v5
-; CHECK-BE-NEXT:    vperm v2, v2, v2, v0
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
 ; CHECK-BE-NEXT:    vextsb2w v3, v3
-; CHECK-BE-NEXT:    vextsb2w v4, v4
-; CHECK-BE-NEXT:    vextsb2w v5, v5
-; CHECK-BE-NEXT:    vextsb2w v2, v2
 ; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
-; CHECK-BE-NEXT:    xvcvsxwsp vs1, v4
-; CHECK-BE-NEXT:    xvcvsxwsp vs2, v5
-; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2
-; CHECK-BE-NEXT:    stxv vs1, 48(r3)
-; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vextsb2w v3, v3
+; CHECK-BE-NEXT:    xvcvsxwsp vs1, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    vextsb2w v3, v3
+; CHECK-BE-NEXT:    xvcvsxwsp vs2, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    vextsb2w v2, v2
+; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = sitofp <16 x i8> %a to <16 x float>

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll Wed Jan  2 21:04:18 2019
@@ -24,23 +24,23 @@ define <2 x double> @test2elt(i16 %a.coe
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; CHECK-P9-NEXT:    mtvsrws v3, r3
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r3
 ; CHECK-P9-NEXT:    xxlxor v4, v4, v4
-; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
-; CHECK-P9-NEXT:    lxvx v2, 0, r4
-; CHECK-P9-NEXT:    vperm v2, v4, v3, v2
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxddp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    mtvsrws v3, r3
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-BE-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r4
-; CHECK-BE-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxddp v2, v2
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -74,38 +74,38 @@ define void @test4elt(<4 x double>* noal
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
-; CHECK-P9-NEXT:    mtvsrws v4, r4
-; CHECK-P9-NEXT:    xxlxor v5, v5, v5
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI1_1 at toc@l
-; CHECK-P9-NEXT:    lxvx v2, 0, r5
-; CHECK-P9-NEXT:    lxvx v3, 0, r6
-; CHECK-P9-NEXT:    vperm v2, v5, v4, v2
-; CHECK-P9-NEXT:    vperm v3, v5, v4, v3
-; CHECK-P9-NEXT:    xvcvuxddp vs0, v2
-; CHECK-P9-NEXT:    xvcvuxddp vs1, v3
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    mtvsrws v2, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
-; CHECK-BE-NEXT:    mtvsrws v4, r4
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI1_1 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r5
-; CHECK-BE-NEXT:    lxvx v3, 0, r6
-; CHECK-BE-NEXT:    vperm v2, v4, v5, v2
-; CHECK-BE-NEXT:    vperm v3, v5, v4, v3
-; CHECK-BE-NEXT:    xvcvuxddp vs0, v2
-; CHECK-BE-NEXT:    xvcvuxddp vs1, v3
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    mtvsrws v2, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v2
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i32 %a.coerce to <4 x i8>
@@ -155,63 +155,63 @@ define void @test8elt(<8 x double>* noal
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
-; CHECK-P9-NEXT:    addis r7, r2, .LCPI2_2 at toc@ha
-; CHECK-P9-NEXT:    addis r8, r2, .LCPI2_3 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r4
-; CHECK-P9-NEXT:    xxlxor v1, v1, v1
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI2_1 at toc@l
-; CHECK-P9-NEXT:    addi r7, r7, .LCPI2_2 at toc@l
-; CHECK-P9-NEXT:    addi r8, r8, .LCPI2_3 at toc@l
-; CHECK-P9-NEXT:    xxswapd v0, vs0
-; CHECK-P9-NEXT:    lxvx v2, 0, r5
-; CHECK-P9-NEXT:    lxvx v3, 0, r6
-; CHECK-P9-NEXT:    lxvx v4, 0, r7
-; CHECK-P9-NEXT:    lxvx v5, 0, r8
-; CHECK-P9-NEXT:    vperm v2, v1, v0, v2
-; CHECK-P9-NEXT:    vperm v3, v1, v0, v3
-; CHECK-P9-NEXT:    vperm v4, v1, v0, v4
-; CHECK-P9-NEXT:    vperm v5, v1, v0, v5
-; CHECK-P9-NEXT:    xvcvuxddp vs0, v2
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_2 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_2 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    xvcvuxddp vs1, v3
-; CHECK-P9-NEXT:    xvcvuxddp vs2, v4
-; CHECK-P9-NEXT:    xvcvuxddp vs3, v5
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_3 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs3, v2
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
-; CHECK-BE-NEXT:    addis r7, r2, .LCPI2_2 at toc@ha
-; CHECK-BE-NEXT:    addis r8, r2, .LCPI2_3 at toc@ha
-; CHECK-BE-NEXT:    mtvsrd v0, r4
-; CHECK-BE-NEXT:    xxlxor v1, v1, v1
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI2_1 at toc@l
-; CHECK-BE-NEXT:    addi r7, r7, .LCPI2_2 at toc@l
-; CHECK-BE-NEXT:    addi r8, r8, .LCPI2_3 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r5
-; CHECK-BE-NEXT:    lxvx v3, 0, r6
-; CHECK-BE-NEXT:    lxvx v4, 0, r7
-; CHECK-BE-NEXT:    lxvx v5, 0, r8
-; CHECK-BE-NEXT:    vperm v2, v0, v1, v2
-; CHECK-BE-NEXT:    vperm v3, v1, v0, v3
-; CHECK-BE-NEXT:    vperm v4, v1, v0, v4
-; CHECK-BE-NEXT:    vperm v5, v1, v0, v5
-; CHECK-BE-NEXT:    xvcvuxddp vs0, v2
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_2 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_2 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xvcvuxddp vs1, v3
-; CHECK-BE-NEXT:    xvcvuxddp vs2, v4
-; CHECK-BE-NEXT:    xvcvuxddp vs3, v5
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_3 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_3 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs3, v2
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <8 x i8>
@@ -292,107 +292,107 @@ define void @test16elt(<16 x double>* no
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
-; CHECK-P9-NEXT:    addis r7, r2, .LCPI3_3 at toc@ha
-; CHECK-P9-NEXT:    addis r8, r2, .LCPI3_4 at toc@ha
-; CHECK-P9-NEXT:    addis r9, r2, .LCPI3_5 at toc@ha
-; CHECK-P9-NEXT:    addis r10, r2, .LCPI3_6 at toc@ha
-; CHECK-P9-NEXT:    addis r11, r2, .LCPI3_7 at toc@ha
-; CHECK-P9-NEXT:    xxlxor v9, v9, v9
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
-; CHECK-P9-NEXT:    addi r7, r7, .LCPI3_3 at toc@l
-; CHECK-P9-NEXT:    addi r8, r8, .LCPI3_4 at toc@l
-; CHECK-P9-NEXT:    addi r9, r9, .LCPI3_5 at toc@l
-; CHECK-P9-NEXT:    addi r10, r10, .LCPI3_6 at toc@l
-; CHECK-P9-NEXT:    addi r11, r11, .LCPI3_7 at toc@l
-; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    lxvx v4, 0, r5
-; CHECK-P9-NEXT:    lxvx v5, 0, r6
-; CHECK-P9-NEXT:    lxvx v0, 0, r7
-; CHECK-P9-NEXT:    lxvx v1, 0, r8
-; CHECK-P9-NEXT:    lxvx v6, 0, r9
-; CHECK-P9-NEXT:    lxvx v7, 0, r10
-; CHECK-P9-NEXT:    lxvx v8, 0, r11
-; CHECK-P9-NEXT:    vperm v3, v9, v2, v3
-; CHECK-P9-NEXT:    vperm v4, v9, v2, v4
-; CHECK-P9-NEXT:    vperm v5, v9, v2, v5
-; CHECK-P9-NEXT:    vperm v0, v9, v2, v0
-; CHECK-P9-NEXT:    vperm v1, v9, v2, v1
-; CHECK-P9-NEXT:    vperm v6, v9, v2, v6
-; CHECK-P9-NEXT:    vperm v7, v9, v2, v7
-; CHECK-P9-NEXT:    vperm v2, v9, v2, v8
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
-; CHECK-P9-NEXT:    xvcvuxddp vs1, v4
-; CHECK-P9-NEXT:    xvcvuxddp vs2, v5
-; CHECK-P9-NEXT:    xvcvuxddp vs3, v0
-; CHECK-P9-NEXT:    xvcvuxddp vs4, v1
-; CHECK-P9-NEXT:    xvcvuxddp vs5, v6
-; CHECK-P9-NEXT:    xvcvuxddp vs6, v7
-; CHECK-P9-NEXT:    xvcvuxddp vs7, v2
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs7, 112(r3)
-; CHECK-P9-NEXT:    stxv vs6, 96(r3)
-; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_4 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_4 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs3, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_5 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_5 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs4, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_6 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_6 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs5, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_7 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_7 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs6, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    xvcvuxddp vs7, v2
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
-; CHECK-BE-NEXT:    addis r7, r2, .LCPI3_3 at toc@ha
-; CHECK-BE-NEXT:    addis r8, r2, .LCPI3_4 at toc@ha
-; CHECK-BE-NEXT:    addis r9, r2, .LCPI3_5 at toc@ha
-; CHECK-BE-NEXT:    addis r10, r2, .LCPI3_6 at toc@ha
-; CHECK-BE-NEXT:    addis r11, r2, .LCPI3_7 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v9, v9, v9
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
-; CHECK-BE-NEXT:    addi r7, r7, .LCPI3_3 at toc@l
-; CHECK-BE-NEXT:    addi r8, r8, .LCPI3_4 at toc@l
-; CHECK-BE-NEXT:    addi r9, r9, .LCPI3_5 at toc@l
-; CHECK-BE-NEXT:    addi r10, r10, .LCPI3_6 at toc@l
-; CHECK-BE-NEXT:    addi r11, r11, .LCPI3_7 at toc@l
-; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    lxvx v4, 0, r5
-; CHECK-BE-NEXT:    lxvx v5, 0, r6
-; CHECK-BE-NEXT:    lxvx v0, 0, r7
-; CHECK-BE-NEXT:    lxvx v1, 0, r8
-; CHECK-BE-NEXT:    lxvx v6, 0, r9
-; CHECK-BE-NEXT:    lxvx v7, 0, r10
-; CHECK-BE-NEXT:    lxvx v8, 0, r11
-; CHECK-BE-NEXT:    vperm v3, v2, v9, v3
-; CHECK-BE-NEXT:    vperm v4, v9, v2, v4
-; CHECK-BE-NEXT:    vperm v5, v9, v2, v5
-; CHECK-BE-NEXT:    vperm v0, v9, v2, v0
-; CHECK-BE-NEXT:    vperm v1, v9, v2, v1
-; CHECK-BE-NEXT:    vperm v6, v9, v2, v6
-; CHECK-BE-NEXT:    vperm v7, v9, v2, v7
-; CHECK-BE-NEXT:    vperm v2, v9, v2, v8
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
-; CHECK-BE-NEXT:    xvcvuxddp vs1, v4
-; CHECK-BE-NEXT:    xvcvuxddp vs2, v5
-; CHECK-BE-NEXT:    xvcvuxddp vs3, v0
-; CHECK-BE-NEXT:    xvcvuxddp vs4, v1
-; CHECK-BE-NEXT:    xvcvuxddp vs5, v6
-; CHECK-BE-NEXT:    xvcvuxddp vs6, v7
-; CHECK-BE-NEXT:    xvcvuxddp vs7, v2
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
-; CHECK-BE-NEXT:    stxv vs7, 112(r3)
-; CHECK-BE-NEXT:    stxv vs6, 96(r3)
-; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_4 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_4 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs3, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_5 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_5 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs4, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_6 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_6 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs5, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_7 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_7 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs6, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    xvcvuxddp vs7, v2
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = uitofp <16 x i8> %a to <16 x double>
@@ -420,22 +420,22 @@ define <2 x double> @test2elt_signed(i16
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
-; CHECK-P9-NEXT:    mtvsrws v3, r3
-; CHECK-P9-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
-; CHECK-P9-NEXT:    lxvx v2, 0, r4
-; CHECK-P9-NEXT:    vperm v2, v3, v3, v2
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r3
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P9-NEXT:    vextsb2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp v2, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-NEXT:    mtvsrws v3, r3
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r4
-; CHECK-BE-NEXT:    vperm v2, v3, v3, v2
+; CHECK-BE-NEXT:    mtvsrws v2, r3
+; CHECK-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r3
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsb2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp v2, v2
 ; CHECK-BE-NEXT:    blr
@@ -477,40 +477,40 @@ define void @test4elt_signed(<4 x double
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI5_1 at toc@ha
-; CHECK-P9-NEXT:    mtvsrws v4, r4
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI5_1 at toc@l
-; CHECK-P9-NEXT:    lxvx v2, 0, r5
-; CHECK-P9-NEXT:    lxvx v3, 0, r6
-; CHECK-P9-NEXT:    vperm v2, v4, v4, v2
-; CHECK-P9-NEXT:    vperm v3, v4, v4, v3
-; CHECK-P9-NEXT:    vextsb2d v2, v2
+; CHECK-P9-NEXT:    mtvsrws v2, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-P9-NEXT:    vextsb2d v3, v3
-; CHECK-P9-NEXT:    xvcvsxddp vs0, v2
-; CHECK-P9-NEXT:    xvcvsxddp vs1, v3
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    vextsb2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v2
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI5_1 at toc@ha
-; CHECK-BE-NEXT:    mtvsrws v4, r4
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI5_1 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r5
-; CHECK-BE-NEXT:    lxvx v3, 0, r6
-; CHECK-BE-NEXT:    vperm v2, v5, v4, v2
-; CHECK-BE-NEXT:    vperm v3, v4, v4, v3
-; CHECK-BE-NEXT:    vextsb2d v2, v2
+; CHECK-BE-NEXT:    mtvsrws v2, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v4, 0, r4
+; CHECK-BE-NEXT:    xxlxor v3, v3, v3
+; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
 ; CHECK-BE-NEXT:    vextsb2d v3, v3
-; CHECK-BE-NEXT:    xvcvsxddp vs0, v2
-; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vextsb2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v2
 ; CHECK-BE-NEXT:    stxv vs1, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
@@ -572,70 +572,70 @@ define void @test8elt_signed(<8 x double
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI6_1 at toc@ha
-; CHECK-P9-NEXT:    addis r7, r2, .LCPI6_2 at toc@ha
-; CHECK-P9-NEXT:    addis r8, r2, .LCPI6_3 at toc@ha
 ; CHECK-P9-NEXT:    mtvsrd f0, r4
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI6_1 at toc@l
-; CHECK-P9-NEXT:    addi r7, r7, .LCPI6_2 at toc@l
-; CHECK-P9-NEXT:    addi r8, r8, .LCPI6_3 at toc@l
-; CHECK-P9-NEXT:    xxswapd v0, vs0
-; CHECK-P9-NEXT:    lxvx v2, 0, r5
-; CHECK-P9-NEXT:    lxvx v3, 0, r6
-; CHECK-P9-NEXT:    lxvx v4, 0, r7
-; CHECK-P9-NEXT:    lxvx v5, 0, r8
-; CHECK-P9-NEXT:    vperm v2, v0, v0, v2
-; CHECK-P9-NEXT:    vperm v3, v0, v0, v3
-; CHECK-P9-NEXT:    vperm v4, v0, v0, v4
-; CHECK-P9-NEXT:    vperm v5, v0, v0, v5
-; CHECK-P9-NEXT:    vextsb2d v2, v2
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    xxswapd v2, vs0
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_2 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_2 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    vextsb2d v3, v3
-; CHECK-P9-NEXT:    vextsb2d v4, v4
-; CHECK-P9-NEXT:    vextsb2d v5, v5
-; CHECK-P9-NEXT:    xvcvsxddp vs0, v2
 ; CHECK-P9-NEXT:    xvcvsxddp vs1, v3
-; CHECK-P9-NEXT:    xvcvsxddp vs2, v4
-; CHECK-P9-NEXT:    xvcvsxddp vs3, v5
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_3 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    vextsb2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI6_1 at toc@ha
-; CHECK-BE-NEXT:    addis r7, r2, .LCPI6_2 at toc@ha
-; CHECK-BE-NEXT:    addis r8, r2, .LCPI6_3 at toc@ha
-; CHECK-BE-NEXT:    mtvsrd v0, r4
-; CHECK-BE-NEXT:    xxlxor v1, v1, v1
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI6_1 at toc@l
-; CHECK-BE-NEXT:    addi r7, r7, .LCPI6_2 at toc@l
-; CHECK-BE-NEXT:    addi r8, r8, .LCPI6_3 at toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r5
-; CHECK-BE-NEXT:    lxvx v3, 0, r6
-; CHECK-BE-NEXT:    lxvx v4, 0, r7
-; CHECK-BE-NEXT:    lxvx v5, 0, r8
-; CHECK-BE-NEXT:    vperm v2, v1, v0, v2
-; CHECK-BE-NEXT:    vperm v3, v1, v0, v3
-; CHECK-BE-NEXT:    vperm v4, v0, v0, v4
-; CHECK-BE-NEXT:    vperm v5, v0, v0, v5
-; CHECK-BE-NEXT:    vextsb2d v2, v2
+; CHECK-BE-NEXT:    mtvsrd v2, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_2 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_2 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    vextsb2d v3, v3
-; CHECK-BE-NEXT:    vextsb2d v4, v4
-; CHECK-BE-NEXT:    vextsb2d v5, v5
-; CHECK-BE-NEXT:    xvcvsxddp vs0, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
-; CHECK-BE-NEXT:    xvcvsxddp vs2, v4
-; CHECK-BE-NEXT:    xvcvsxddp vs3, v5
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_3 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_3 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    stxv vs1, 48(r3)
-; CHECK-BE-NEXT:    stxv vs3, 32(r3)
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    vextsb2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
+; CHECK-BE-NEXT:    stxv vs3, 32(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <8 x i8>
@@ -735,122 +735,122 @@ define void @test16elt_signed(<16 x doub
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
-; CHECK-P9-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
-; CHECK-P9-NEXT:    addis r7, r2, .LCPI7_3 at toc@ha
-; CHECK-P9-NEXT:    addis r8, r2, .LCPI7_4 at toc@ha
-; CHECK-P9-NEXT:    addis r9, r2, .LCPI7_5 at toc@ha
-; CHECK-P9-NEXT:    addis r10, r2, .LCPI7_6 at toc@ha
-; CHECK-P9-NEXT:    addis r11, r2, .LCPI7_7 at toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
-; CHECK-P9-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
-; CHECK-P9-NEXT:    addi r7, r7, .LCPI7_3 at toc@l
-; CHECK-P9-NEXT:    addi r8, r8, .LCPI7_4 at toc@l
-; CHECK-P9-NEXT:    addi r9, r9, .LCPI7_5 at toc@l
-; CHECK-P9-NEXT:    addi r10, r10, .LCPI7_6 at toc@l
-; CHECK-P9-NEXT:    addi r11, r11, .LCPI7_7 at toc@l
-; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    lxvx v4, 0, r5
-; CHECK-P9-NEXT:    lxvx v5, 0, r6
-; CHECK-P9-NEXT:    lxvx v0, 0, r7
-; CHECK-P9-NEXT:    lxvx v1, 0, r8
-; CHECK-P9-NEXT:    lxvx v6, 0, r9
-; CHECK-P9-NEXT:    lxvx v7, 0, r10
-; CHECK-P9-NEXT:    lxvx v8, 0, r11
-; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
-; CHECK-P9-NEXT:    vperm v4, v2, v2, v4
-; CHECK-P9-NEXT:    vperm v5, v2, v2, v5
-; CHECK-P9-NEXT:    vperm v0, v2, v2, v0
-; CHECK-P9-NEXT:    vperm v1, v2, v2, v1
-; CHECK-P9-NEXT:    vperm v6, v2, v2, v6
-; CHECK-P9-NEXT:    vperm v7, v2, v2, v7
-; CHECK-P9-NEXT:    vperm v2, v2, v2, v8
-; CHECK-P9-NEXT:    vextsb2d v3, v3
-; CHECK-P9-NEXT:    vextsb2d v4, v4
-; CHECK-P9-NEXT:    vextsb2d v5, v5
-; CHECK-P9-NEXT:    vextsb2d v0, v0
-; CHECK-P9-NEXT:    vextsb2d v1, v1
-; CHECK-P9-NEXT:    vextsb2d v6, v6
-; CHECK-P9-NEXT:    vextsb2d v7, v7
-; CHECK-P9-NEXT:    vextsb2d v2, v2
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    vextsb2d v3, v3
 ; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
-; CHECK-P9-NEXT:    xvcvsxddp vs1, v4
-; CHECK-P9-NEXT:    xvcvsxddp vs2, v5
-; CHECK-P9-NEXT:    xvcvsxddp vs3, v0
-; CHECK-P9-NEXT:    xvcvsxddp vs4, v1
-; CHECK-P9-NEXT:    xvcvsxddp vs5, v6
-; CHECK-P9-NEXT:    xvcvsxddp vs6, v7
-; CHECK-P9-NEXT:    xvcvsxddp vs7, v2
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
-; CHECK-P9-NEXT:    stxv vs7, 112(r3)
-; CHECK-P9-NEXT:    stxv vs6, 96(r3)
-; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_4 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_4 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs3, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_5 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_5 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs4, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_6 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_6 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs5, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_7 at toc@ha
+; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_7 at toc@l
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs5, 80(r3)
+; CHECK-P9-NEXT:    vextsb2d v3, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs6, v3
+; CHECK-P9-NEXT:    lxvx v3, 0, r4
+; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
+; CHECK-P9-NEXT:    vextsb2d v2, v2
+; CHECK-P9-NEXT:    xvcvsxddp vs7, v2
+; CHECK-P9-NEXT:    stxv vs7, 112(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
-; CHECK-BE-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
-; CHECK-BE-NEXT:    addis r7, r2, .LCPI7_3 at toc@ha
-; CHECK-BE-NEXT:    addis r8, r2, .LCPI7_4 at toc@ha
-; CHECK-BE-NEXT:    addis r9, r2, .LCPI7_5 at toc@ha
-; CHECK-BE-NEXT:    addis r10, r2, .LCPI7_6 at toc@ha
-; CHECK-BE-NEXT:    addis r11, r2, .LCPI7_7 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v9, v9, v9
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
-; CHECK-BE-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
-; CHECK-BE-NEXT:    addi r7, r7, .LCPI7_3 at toc@l
-; CHECK-BE-NEXT:    addi r8, r8, .LCPI7_4 at toc@l
-; CHECK-BE-NEXT:    addi r9, r9, .LCPI7_5 at toc@l
-; CHECK-BE-NEXT:    addi r10, r10, .LCPI7_6 at toc@l
-; CHECK-BE-NEXT:    addi r11, r11, .LCPI7_7 at toc@l
-; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    lxvx v4, 0, r5
-; CHECK-BE-NEXT:    lxvx v5, 0, r6
-; CHECK-BE-NEXT:    lxvx v0, 0, r7
-; CHECK-BE-NEXT:    lxvx v1, 0, r8
-; CHECK-BE-NEXT:    lxvx v6, 0, r9
-; CHECK-BE-NEXT:    lxvx v7, 0, r10
-; CHECK-BE-NEXT:    lxvx v8, 0, r11
-; CHECK-BE-NEXT:    vperm v3, v9, v2, v3
-; CHECK-BE-NEXT:    vperm v4, v9, v2, v4
-; CHECK-BE-NEXT:    vperm v5, v9, v2, v5
-; CHECK-BE-NEXT:    vperm v0, v9, v2, v0
-; CHECK-BE-NEXT:    vperm v1, v2, v2, v1
-; CHECK-BE-NEXT:    vperm v6, v2, v2, v6
-; CHECK-BE-NEXT:    vperm v7, v2, v2, v7
-; CHECK-BE-NEXT:    vperm v2, v2, v2, v8
-; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    lxvx v4, 0, r4
+; CHECK-BE-NEXT:    xxlxor v3, v3, v3
+; CHECK-BE-NEXT:    vperm v4, v3, v2, v4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
+; CHECK-BE-NEXT:    vextsb2d v4, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v4
+; CHECK-BE-NEXT:    lxvx v4, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
+; CHECK-BE-NEXT:    vperm v4, v3, v2, v4
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
 ; CHECK-BE-NEXT:    vextsb2d v4, v4
-; CHECK-BE-NEXT:    vextsb2d v5, v5
-; CHECK-BE-NEXT:    vextsb2d v0, v0
-; CHECK-BE-NEXT:    vextsb2d v1, v1
-; CHECK-BE-NEXT:    vextsb2d v6, v6
-; CHECK-BE-NEXT:    vextsb2d v7, v7
-; CHECK-BE-NEXT:    vextsb2d v2, v2
-; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs1, v4
-; CHECK-BE-NEXT:    xvcvsxddp vs2, v5
-; CHECK-BE-NEXT:    xvcvsxddp vs3, v0
-; CHECK-BE-NEXT:    xvcvsxddp vs4, v1
-; CHECK-BE-NEXT:    xvcvsxddp vs5, v6
-; CHECK-BE-NEXT:    xvcvsxddp vs6, v7
-; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
-; CHECK-BE-NEXT:    stxv vs3, 112(r3)
-; CHECK-BE-NEXT:    stxv vs2, 80(r3)
+; CHECK-BE-NEXT:    lxvx v4, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
+; CHECK-BE-NEXT:    vperm v4, v3, v2, v4
 ; CHECK-BE-NEXT:    stxv vs1, 48(r3)
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
-; CHECK-BE-NEXT:    stxv vs7, 96(r3)
-; CHECK-BE-NEXT:    stxv vs6, 64(r3)
-; CHECK-BE-NEXT:    stxv vs5, 32(r3)
+; CHECK-BE-NEXT:    vextsb2d v4, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v4
+; CHECK-BE-NEXT:    lxvx v4, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_4 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_4 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    stxv vs2, 80(r3)
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_5 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_5 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs3, 112(r3)
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs4, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_6 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_6 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    stxv vs4, 0(r3)
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs5, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_7 at toc@ha
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_7 at toc@l
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs5, 32(r3)
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs6, v3
+; CHECK-BE-NEXT:    lxvx v3, 0, r4
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs6, 64(r3)
+; CHECK-BE-NEXT:    vextsb2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
+; CHECK-BE-NEXT:    stxv vs7, 96(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = sitofp <16 x i8> %a to <16 x double>

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll Wed Jan  2 21:04:18 2019
@@ -163,58 +163,58 @@ define void @test16elt(<16 x double>* no
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v2, 48(r4)
-; CHECK-P9-NEXT:    lxv v3, 32(r4)
-; CHECK-P9-NEXT:    lxv v4, 16(r4)
-; CHECK-P9-NEXT:    lxv v5, 0(r4)
-; CHECK-P9-NEXT:    lxv v0, 112(r4)
-; CHECK-P9-NEXT:    lxv v1, 96(r4)
-; CHECK-P9-NEXT:    lxv v6, 80(r4)
-; CHECK-P9-NEXT:    lxv v7, 64(r4)
-; CHECK-P9-NEXT:    xvcvuxddp vs0, v5
-; CHECK-P9-NEXT:    xvcvuxddp vs1, v4
-; CHECK-P9-NEXT:    xvcvuxddp vs2, v3
-; CHECK-P9-NEXT:    xvcvuxddp vs3, v2
-; CHECK-P9-NEXT:    xvcvuxddp vs4, v7
-; CHECK-P9-NEXT:    xvcvuxddp vs5, v6
-; CHECK-P9-NEXT:    xvcvuxddp vs6, v1
-; CHECK-P9-NEXT:    xvcvuxddp vs7, v0
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    lxv v2, 112(r4)
+; CHECK-P9-NEXT:    lxv v3, 96(r4)
+; CHECK-P9-NEXT:    lxv v4, 80(r4)
+; CHECK-P9-NEXT:    lxv v5, 64(r4)
+; CHECK-P9-NEXT:    lxv v0, 48(r4)
+; CHECK-P9-NEXT:    xvcvuxddp vs3, v0
+; CHECK-P9-NEXT:    lxv v1, 32(r4)
+; CHECK-P9-NEXT:    lxv v6, 16(r4)
+; CHECK-P9-NEXT:    lxv v7, 0(r4)
+; CHECK-P9-NEXT:    xvcvuxddp vs0, v7
+; CHECK-P9-NEXT:    xvcvuxddp vs1, v6
+; CHECK-P9-NEXT:    xvcvuxddp vs2, v1
+; CHECK-P9-NEXT:    xvcvuxddp vs4, v5
+; CHECK-P9-NEXT:    xvcvuxddp vs5, v4
+; CHECK-P9-NEXT:    xvcvuxddp vs6, v3
+; CHECK-P9-NEXT:    xvcvuxddp vs7, v2
 ; CHECK-P9-NEXT:    stxv vs7, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs6, 96(r3)
 ; CHECK-P9-NEXT:    stxv vs5, 80(r3)
 ; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 48(r4)
-; CHECK-BE-NEXT:    lxv v3, 32(r4)
-; CHECK-BE-NEXT:    lxv v4, 16(r4)
-; CHECK-BE-NEXT:    lxv v5, 0(r4)
-; CHECK-BE-NEXT:    lxv v0, 112(r4)
-; CHECK-BE-NEXT:    lxv v1, 96(r4)
-; CHECK-BE-NEXT:    lxv v6, 80(r4)
-; CHECK-BE-NEXT:    lxv v7, 64(r4)
-; CHECK-BE-NEXT:    xvcvuxddp vs0, v5
-; CHECK-BE-NEXT:    xvcvuxddp vs1, v4
-; CHECK-BE-NEXT:    xvcvuxddp vs2, v3
-; CHECK-BE-NEXT:    xvcvuxddp vs3, v2
-; CHECK-BE-NEXT:    xvcvuxddp vs4, v7
-; CHECK-BE-NEXT:    xvcvuxddp vs5, v6
-; CHECK-BE-NEXT:    xvcvuxddp vs6, v1
-; CHECK-BE-NEXT:    xvcvuxddp vs7, v0
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv v2, 112(r4)
+; CHECK-BE-NEXT:    lxv v3, 96(r4)
+; CHECK-BE-NEXT:    lxv v4, 80(r4)
+; CHECK-BE-NEXT:    lxv v5, 64(r4)
+; CHECK-BE-NEXT:    lxv v0, 48(r4)
+; CHECK-BE-NEXT:    xvcvuxddp vs3, v0
+; CHECK-BE-NEXT:    lxv v1, 32(r4)
+; CHECK-BE-NEXT:    lxv v6, 16(r4)
+; CHECK-BE-NEXT:    lxv v7, 0(r4)
+; CHECK-BE-NEXT:    xvcvuxddp vs0, v7
+; CHECK-BE-NEXT:    xvcvuxddp vs1, v6
+; CHECK-BE-NEXT:    xvcvuxddp vs2, v1
+; CHECK-BE-NEXT:    xvcvuxddp vs4, v5
+; CHECK-BE-NEXT:    xvcvuxddp vs5, v4
+; CHECK-BE-NEXT:    xvcvuxddp vs6, v3
+; CHECK-BE-NEXT:    xvcvuxddp vs7, v2
 ; CHECK-BE-NEXT:    stxv vs7, 112(r3)
 ; CHECK-BE-NEXT:    stxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    stxv vs5, 80(r3)
 ; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i64>, <16 x i64>* %0, align 128
@@ -377,58 +377,58 @@ define void @test16elt_signed(<16 x doub
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv v2, 48(r4)
-; CHECK-P9-NEXT:    lxv v3, 32(r4)
-; CHECK-P9-NEXT:    lxv v4, 16(r4)
-; CHECK-P9-NEXT:    lxv v5, 0(r4)
-; CHECK-P9-NEXT:    lxv v0, 112(r4)
-; CHECK-P9-NEXT:    lxv v1, 96(r4)
-; CHECK-P9-NEXT:    lxv v6, 80(r4)
-; CHECK-P9-NEXT:    lxv v7, 64(r4)
-; CHECK-P9-NEXT:    xvcvsxddp vs0, v5
-; CHECK-P9-NEXT:    xvcvsxddp vs1, v4
-; CHECK-P9-NEXT:    xvcvsxddp vs2, v3
-; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
-; CHECK-P9-NEXT:    xvcvsxddp vs4, v7
-; CHECK-P9-NEXT:    xvcvsxddp vs5, v6
-; CHECK-P9-NEXT:    xvcvsxddp vs6, v1
-; CHECK-P9-NEXT:    xvcvsxddp vs7, v0
-; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs2, 32(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
+; CHECK-P9-NEXT:    lxv v2, 112(r4)
+; CHECK-P9-NEXT:    lxv v3, 96(r4)
+; CHECK-P9-NEXT:    lxv v4, 80(r4)
+; CHECK-P9-NEXT:    lxv v5, 64(r4)
+; CHECK-P9-NEXT:    lxv v0, 48(r4)
+; CHECK-P9-NEXT:    xvcvsxddp vs3, v0
+; CHECK-P9-NEXT:    lxv v1, 32(r4)
+; CHECK-P9-NEXT:    lxv v6, 16(r4)
+; CHECK-P9-NEXT:    lxv v7, 0(r4)
+; CHECK-P9-NEXT:    xvcvsxddp vs0, v7
+; CHECK-P9-NEXT:    xvcvsxddp vs1, v6
+; CHECK-P9-NEXT:    xvcvsxddp vs2, v1
+; CHECK-P9-NEXT:    xvcvsxddp vs4, v5
+; CHECK-P9-NEXT:    xvcvsxddp vs5, v4
+; CHECK-P9-NEXT:    xvcvsxddp vs6, v3
+; CHECK-P9-NEXT:    xvcvsxddp vs7, v2
 ; CHECK-P9-NEXT:    stxv vs7, 112(r3)
 ; CHECK-P9-NEXT:    stxv vs6, 96(r3)
 ; CHECK-P9-NEXT:    stxv vs5, 80(r3)
 ; CHECK-P9-NEXT:    stxv vs4, 64(r3)
+; CHECK-P9-NEXT:    stxv vs3, 48(r3)
+; CHECK-P9-NEXT:    stxv vs2, 32(r3)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lxv v2, 48(r4)
-; CHECK-BE-NEXT:    lxv v3, 32(r4)
-; CHECK-BE-NEXT:    lxv v4, 16(r4)
-; CHECK-BE-NEXT:    lxv v5, 0(r4)
-; CHECK-BE-NEXT:    lxv v0, 112(r4)
-; CHECK-BE-NEXT:    lxv v1, 96(r4)
-; CHECK-BE-NEXT:    lxv v6, 80(r4)
-; CHECK-BE-NEXT:    lxv v7, 64(r4)
-; CHECK-BE-NEXT:    xvcvsxddp vs0, v5
-; CHECK-BE-NEXT:    xvcvsxddp vs1, v4
-; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
-; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
-; CHECK-BE-NEXT:    xvcvsxddp vs4, v7
-; CHECK-BE-NEXT:    xvcvsxddp vs5, v6
-; CHECK-BE-NEXT:    xvcvsxddp vs6, v1
-; CHECK-BE-NEXT:    xvcvsxddp vs7, v0
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    lxv v2, 112(r4)
+; CHECK-BE-NEXT:    lxv v3, 96(r4)
+; CHECK-BE-NEXT:    lxv v4, 80(r4)
+; CHECK-BE-NEXT:    lxv v5, 64(r4)
+; CHECK-BE-NEXT:    lxv v0, 48(r4)
+; CHECK-BE-NEXT:    xvcvsxddp vs3, v0
+; CHECK-BE-NEXT:    lxv v1, 32(r4)
+; CHECK-BE-NEXT:    lxv v6, 16(r4)
+; CHECK-BE-NEXT:    lxv v7, 0(r4)
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v7
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v6
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v1
+; CHECK-BE-NEXT:    xvcvsxddp vs4, v5
+; CHECK-BE-NEXT:    xvcvsxddp vs5, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs6, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
 ; CHECK-BE-NEXT:    stxv vs7, 112(r3)
 ; CHECK-BE-NEXT:    stxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    stxv vs5, 80(r3)
 ; CHECK-BE-NEXT:    stxv vs4, 64(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i64>, <16 x i64>* %0, align 128

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx-p9.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx-p9.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx-p9.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx-p9.ll Wed Jan  2 21:04:18 2019
@@ -37,7 +37,7 @@ entry:
   %add.i = add <16 x i8> %1, %0
   tail call void (...) @sink(<16 x i8> %add.i)
 ; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3 
 ; CHECK: vaddubm 2, 3, 2
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -46,7 +46,7 @@ entry:
   %add.i22 = add <16 x i8> %3, %2
   tail call void (...) @sink(<16 x i8> %add.i22)
 ; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3 
 ; CHECK: vaddubm 2, 3, 2
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -55,7 +55,7 @@ entry:
   %add.i21 = add <8 x i16> %5, %4
   tail call void (...) @sink(<8 x i16> %add.i21)
 ; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3 
 ; CHECK: vadduhm 2, 3, 2
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -64,7 +64,7 @@ entry:
   %add.i20 = add <8 x i16> %7, %6
   tail call void (...) @sink(<8 x i16> %add.i20)
 ; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3 
 ; CHECK: vadduhm 2, 3, 2
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -73,7 +73,7 @@ entry:
   %add.i19 = add <4 x i32> %9, %8
   tail call void (...) @sink(<4 x i32> %add.i19)
 ; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3 
 ; CHECK: vadduwm 2, 3, 2
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -82,7 +82,7 @@ entry:
   %add.i18 = add <4 x i32> %11, %10
   tail call void (...) @sink(<4 x i32> %add.i18)
 ; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3 
 ; CHECK: vadduwm 2, 3, 2
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -91,7 +91,7 @@ entry:
   %add.i17 = add <2 x i64> %13, %12
   tail call void (...) @sink(<2 x i64> %add.i17)
 ; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3 
 ; CHECK: vaddudm 2, 3, 2
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -100,7 +100,7 @@ entry:
   %add.i16 = add <2 x i64> %15, %14
   tail call void (...) @sink(<2 x i64> %add.i16)
 ; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3 
 ; CHECK: vaddudm 2, 3, 2
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -109,7 +109,7 @@ entry:
   %add.i15 = add <1 x i128> %17, %16
   tail call void (...) @sink(<1 x i128> %add.i15)
 ; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3 
 ; CHECK: vadduqm 2, 3, 2
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -118,7 +118,7 @@ entry:
   %add.i14 = add <1 x i128> %19, %18
   tail call void (...) @sink(<1 x i128> %add.i14)
 ; CHECK: lxvx 34, 0, 3
-; CHECK: lxvx 35, 0, 4
+; CHECK: lxvx 35, 0, 3 
 ; CHECK: vadduqm 2, 3, 2
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -127,7 +127,7 @@ entry:
   %add.i13 = fadd <4 x float> %20, %21
   tail call void (...) @sink(<4 x float> %add.i13)
 ; CHECK: lxvx 0, 0, 3
-; CHECK: lxvx 1, 0, 4
+; CHECK: lxvx 1, 0, 3 
 ; CHECK: xvaddsp 34, 0, 1
 ; CHECK: stxv 34,
 ; CHECK: bl sink
@@ -136,7 +136,7 @@ entry:
   %add.i12 = fadd <2 x double> %22, %23
   tail call void (...) @sink(<2 x double> %add.i12)
 ; CHECK: lxvx 0, 0, 3
-; CHECK: lxvx 1, 0, 4
+; CHECK: lxvx 1, 0, 3 
 ; CHECK: xvadddp 0, 0, 1
 ; CHECK: stxv 0,
 ; CHECK: bl sink

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx-spill.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx-spill.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx-spill.ll Wed Jan  2 21:04:18 2019
@@ -93,8 +93,8 @@ entry:
 
 ; CHECK-P9-REG-LABEL: foo3
 ; CHECK-P9-REG: stdu r1, -400(r1)
-; CHECK-P9-REG: lfd f30, 384(r1)
-; CHECK-P9-REG: xsadddp f1, f0, f0
+; CHECK-P9-REG-DAG: lfd f30, 384(r1)
+; CHECK-P9-REG-DAG: xsadddp f1, f0, f0
 
 ; CHECK-P9-FISL-LABEL: foo3
 ; CHECK-P9-FISL: stdu r1, -400(r1)

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll?rev=350285&r1=350284&r2=350285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll Wed Jan  2 21:04:18 2019
@@ -31,10 +31,10 @@ define <2 x double> @testi0(<2 x double>
 ;
 ; CHECK-P9-LABEL: testi0:
 ; CHECK-P9:       # %bb.0:
-; CHECK-P9-NEXT:    lfd f0, 0(r4)
-; CHECK-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-P9-NEXT:    xxpermdi vs0, f0, f0, 2
-; CHECK-P9-NEXT:    xxpermdi v2, vs1, vs0, 1
+; CHECK-P9-NEXT:    lfd [[REG:f[0-9]+]], 0(r4)
+; CHECK-P9-NEXT:    lxv [[REG1:vs[0-9]+]], 0(r3)
+; CHECK-P9-NEXT:    xxpermdi [[REG2:vs[0-9]+]], [[REG]], [[REG]], 2
+; CHECK-P9-NEXT:    xxpermdi v2, [[REG1]], [[REG2]], 1
 ; CHECK-P9-NEXT:    blr
   %v = load <2 x double>, <2 x double>* %p1
   %s = load double, double* %p2
@@ -65,10 +65,10 @@ define <2 x double> @testi1(<2 x double>
 ;
 ; CHECK-P9-LABEL: testi1:
 ; CHECK-P9:       # %bb.0:
-; CHECK-P9-NEXT:    lfd f0, 0(r4)
-; CHECK-P9-NEXT:    lxv vs1, 0(r3)
-; CHECK-P9-NEXT:    xxpermdi vs0, f0, f0, 2
-; CHECK-P9-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-P9-NEXT:    lfd [[REG:f[0-9]+]], 0(r4)
+; CHECK-P9-NEXT:    lxv [[REG1:vs[0-9]+]], 0(r3)
+; CHECK-P9-NEXT:    xxpermdi [[REG2:vs[0-9]+]], [[REG]], [[REG]], 2
+; CHECK-P9-NEXT:    xxmrgld v2, [[REG2]], [[REG1]] 
 ; CHECK-P9-NEXT:    blr
   %v = load <2 x double>, <2 x double>* %p1
   %s = load double, double* %p2




More information about the llvm-commits mailing list