[llvm] b922a36 - [PowerPC] Define SchedModel for Power8

Qiu Chaofan via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 8 00:45:07 PDT 2023


Author: Qiu Chaofan
Date: 2023-09-08T15:43:21+08:00
New Revision: b922a3621116b404d868af8b74cab25ab78555be

URL: https://github.com/llvm/llvm-project/commit/b922a3621116b404d868af8b74cab25ab78555be
DIFF: https://github.com/llvm/llvm-project/commit/b922a3621116b404d868af8b74cab25ab78555be.diff

LOG: [PowerPC] Define SchedModel for Power8

PowerPC subtargets prior to Power9 use the 'legacy' itinerary way to
provide scheduling information. This patch re-writes the tablegen file
to define the scheduling information in the new SchedModel way, which
can bring improvements to some benchmarks.

Reviewed By: shchenz

Differential Revision: https://reviews.llvm.org/D154488

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCScheduleP8.td
    llvm/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
    llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll
    llvm/test/CodeGen/PowerPC/CSR-fit.ll
    llvm/test/CodeGen/PowerPC/CompareEliminationSpillIssue.ll
    llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
    llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
    llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
    llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll
    llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
    llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
    llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
    llvm/test/CodeGen/PowerPC/all-atomics.ll
    llvm/test/CodeGen/PowerPC/and-extend-combine.ll
    llvm/test/CodeGen/PowerPC/asm-template-I.ll
    llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
    llvm/test/CodeGen/PowerPC/atomics-i128.ll
    llvm/test/CodeGen/PowerPC/atomics-i16-ldst.ll
    llvm/test/CodeGen/PowerPC/atomics-i32-ldst.ll
    llvm/test/CodeGen/PowerPC/atomics-i64-ldst.ll
    llvm/test/CodeGen/PowerPC/atomics-i8-ldst.ll
    llvm/test/CodeGen/PowerPC/atomics-regression.ll
    llvm/test/CodeGen/PowerPC/bool-math.ll
    llvm/test/CodeGen/PowerPC/branch_coalesce.ll
    llvm/test/CodeGen/PowerPC/build-vector-tests.ll
    llvm/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll
    llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
    llvm/test/CodeGen/PowerPC/cfence-float.ll
    llvm/test/CodeGen/PowerPC/coldcc2.ll
    llvm/test/CodeGen/PowerPC/combine-fneg.ll
    llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll
    llvm/test/CodeGen/PowerPC/combine_ext_trunc.ll
    llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll
    llvm/test/CodeGen/PowerPC/const-splat-array-init.ll
    llvm/test/CodeGen/PowerPC/constant-combines.ll
    llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
    llvm/test/CodeGen/PowerPC/csr-split.ll
    llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
    llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
    llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
    llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll
    llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
    llvm/test/CodeGen/PowerPC/extra-toc-reg-deps.ll
    llvm/test/CodeGen/PowerPC/extract-and-store.ll
    llvm/test/CodeGen/PowerPC/f128-aggregates.ll
    llvm/test/CodeGen/PowerPC/f128-arith.ll
    llvm/test/CodeGen/PowerPC/f128-bitcast.ll
    llvm/test/CodeGen/PowerPC/f128-compare.ll
    llvm/test/CodeGen/PowerPC/f128-conv.ll
    llvm/test/CodeGen/PowerPC/f128-fma.ll
    llvm/test/CodeGen/PowerPC/f128-passByValue.ll
    llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
    llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll
    llvm/test/CodeGen/PowerPC/fma-combine.ll
    llvm/test/CodeGen/PowerPC/fmf-propagation.ll
    llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll
    llvm/test/CodeGen/PowerPC/fp-classify.ll
    llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
    llvm/test/CodeGen/PowerPC/fp-strict-conv.ll
    llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll
    llvm/test/CodeGen/PowerPC/fp-strict-round.ll
    llvm/test/CodeGen/PowerPC/fp-strict.ll
    llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
    llvm/test/CodeGen/PowerPC/fpscr-intrinsics.ll
    llvm/test/CodeGen/PowerPC/frounds.ll
    llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
    llvm/test/CodeGen/PowerPC/funnel-shift.ll
    llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
    llvm/test/CodeGen/PowerPC/huge-frame-call.ll
    llvm/test/CodeGen/PowerPC/huge-frame-size.ll
    llvm/test/CodeGen/PowerPC/int128_ldst.ll
    llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
    llvm/test/CodeGen/PowerPC/licm-remat.ll
    llvm/test/CodeGen/PowerPC/licm-tocReg.ll
    llvm/test/CodeGen/PowerPC/load-and-splat.ll
    llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
    llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
    llvm/test/CodeGen/PowerPC/memcmp.ll
    llvm/test/CodeGen/PowerPC/memset-tail.ll
    llvm/test/CodeGen/PowerPC/mergeable-string-pool-large.ll
    llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
    llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
    llvm/test/CodeGen/PowerPC/mulld.ll
    llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll
    llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll
    llvm/test/CodeGen/PowerPC/p8-isel-sched.ll
    llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
    llvm/test/CodeGen/PowerPC/peephole-align.ll
    llvm/test/CodeGen/PowerPC/pow-025-075-intrinsic-scalar-mass-fast.ll
    llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
    llvm/test/CodeGen/PowerPC/ppc-clear-before-return.ll
    llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
    llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
    llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
    llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
    llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
    llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
    llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll
    llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
    llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
    llvm/test/CodeGen/PowerPC/ppc64-varargs.ll
    llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
    llvm/test/CodeGen/PowerPC/pr25080.ll
    llvm/test/CodeGen/PowerPC/pr27078.ll
    llvm/test/CodeGen/PowerPC/pr33093.ll
    llvm/test/CodeGen/PowerPC/pr33547.ll
    llvm/test/CodeGen/PowerPC/pr35402.ll
    llvm/test/CodeGen/PowerPC/pr36292.ll
    llvm/test/CodeGen/PowerPC/pr45628.ll
    llvm/test/CodeGen/PowerPC/pr46759.ll
    llvm/test/CodeGen/PowerPC/pr47707.ll
    llvm/test/CodeGen/PowerPC/pr47830.ll
    llvm/test/CodeGen/PowerPC/pr47891.ll
    llvm/test/CodeGen/PowerPC/pr48388.ll
    llvm/test/CodeGen/PowerPC/pr48519.ll
    llvm/test/CodeGen/PowerPC/pr48527.ll
    llvm/test/CodeGen/PowerPC/pr52894-32bit.ll
    llvm/test/CodeGen/PowerPC/pr52894.ll
    llvm/test/CodeGen/PowerPC/pr61882.ll
    llvm/test/CodeGen/PowerPC/recipest.ll
    llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
    llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll
    llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
    llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
    llvm/test/CodeGen/PowerPC/sat-add.ll
    llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar-equal.ll
    llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar_cmp.ll
    llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
    llvm/test/CodeGen/PowerPC/scalars-in-altivec-regs.ll
    llvm/test/CodeGen/PowerPC/scheduling-mem-dependency.ll
    llvm/test/CodeGen/PowerPC/select-constant-xor.ll
    llvm/test/CodeGen/PowerPC/select.ll
    llvm/test/CodeGen/PowerPC/select_const.ll
    llvm/test/CodeGen/PowerPC/setcc-logic.ll
    llvm/test/CodeGen/PowerPC/setcc-vector.ll
    llvm/test/CodeGen/PowerPC/sext-vector-inreg.ll
    llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll
    llvm/test/CodeGen/PowerPC/signbit-shift.ll
    llvm/test/CodeGen/PowerPC/sms-remark.ll
    llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
    llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
    llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
    llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
    llvm/test/CodeGen/PowerPC/store-constant.ll
    llvm/test/CodeGen/PowerPC/stwu-sched.ll
    llvm/test/CodeGen/PowerPC/swap-reduction.ll
    llvm/test/CodeGen/PowerPC/swaps-le-5.ll
    llvm/test/CodeGen/PowerPC/swaps-le-6.ll
    llvm/test/CodeGen/PowerPC/swaps-le-7.ll
    llvm/test/CodeGen/PowerPC/test-vector-insert.ll
    llvm/test/CodeGen/PowerPC/testBitReverse.ll
    llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
    llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
    llvm/test/CodeGen/PowerPC/testComparesieqsc.ll
    llvm/test/CodeGen/PowerPC/testComparesieqsi.ll
    llvm/test/CodeGen/PowerPC/testComparesieqsll.ll
    llvm/test/CodeGen/PowerPC/testComparesieqss.ll
    llvm/test/CodeGen/PowerPC/testComparesiequc.ll
    llvm/test/CodeGen/PowerPC/testComparesiequi.ll
    llvm/test/CodeGen/PowerPC/testComparesiequll.ll
    llvm/test/CodeGen/PowerPC/testComparesiequs.ll
    llvm/test/CodeGen/PowerPC/testComparesigesc.ll
    llvm/test/CodeGen/PowerPC/testComparesigesi.ll
    llvm/test/CodeGen/PowerPC/testComparesigesll.ll
    llvm/test/CodeGen/PowerPC/testComparesigess.ll
    llvm/test/CodeGen/PowerPC/testComparesigeuc.ll
    llvm/test/CodeGen/PowerPC/testComparesigeui.ll
    llvm/test/CodeGen/PowerPC/testComparesigeull.ll
    llvm/test/CodeGen/PowerPC/testComparesigeus.ll
    llvm/test/CodeGen/PowerPC/testComparesigtsc.ll
    llvm/test/CodeGen/PowerPC/testComparesigtsi.ll
    llvm/test/CodeGen/PowerPC/testComparesigtsll.ll
    llvm/test/CodeGen/PowerPC/testComparesigtss.ll
    llvm/test/CodeGen/PowerPC/testComparesigtuc.ll
    llvm/test/CodeGen/PowerPC/testComparesigtui.ll
    llvm/test/CodeGen/PowerPC/testComparesigtus.ll
    llvm/test/CodeGen/PowerPC/testComparesilesc.ll
    llvm/test/CodeGen/PowerPC/testComparesilesi.ll
    llvm/test/CodeGen/PowerPC/testComparesilesll.ll
    llvm/test/CodeGen/PowerPC/testComparesiless.ll
    llvm/test/CodeGen/PowerPC/testComparesileuc.ll
    llvm/test/CodeGen/PowerPC/testComparesileui.ll
    llvm/test/CodeGen/PowerPC/testComparesileull.ll
    llvm/test/CodeGen/PowerPC/testComparesileus.ll
    llvm/test/CodeGen/PowerPC/testComparesiltsc.ll
    llvm/test/CodeGen/PowerPC/testComparesiltsi.ll
    llvm/test/CodeGen/PowerPC/testComparesiltsll.ll
    llvm/test/CodeGen/PowerPC/testComparesiltss.ll
    llvm/test/CodeGen/PowerPC/testComparesiltuc.ll
    llvm/test/CodeGen/PowerPC/testComparesiltui.ll
    llvm/test/CodeGen/PowerPC/testComparesiltus.ll
    llvm/test/CodeGen/PowerPC/testComparesinesc.ll
    llvm/test/CodeGen/PowerPC/testComparesinesi.ll
    llvm/test/CodeGen/PowerPC/testComparesinesll.ll
    llvm/test/CodeGen/PowerPC/testComparesiness.ll
    llvm/test/CodeGen/PowerPC/testComparesineuc.ll
    llvm/test/CodeGen/PowerPC/testComparesineui.ll
    llvm/test/CodeGen/PowerPC/testComparesineull.ll
    llvm/test/CodeGen/PowerPC/testComparesineus.ll
    llvm/test/CodeGen/PowerPC/testCompareslleqsc.ll
    llvm/test/CodeGen/PowerPC/testCompareslleqsi.ll
    llvm/test/CodeGen/PowerPC/testCompareslleqsll.ll
    llvm/test/CodeGen/PowerPC/testCompareslleqss.ll
    llvm/test/CodeGen/PowerPC/testComparesllequc.ll
    llvm/test/CodeGen/PowerPC/testComparesllequi.ll
    llvm/test/CodeGen/PowerPC/testComparesllequll.ll
    llvm/test/CodeGen/PowerPC/testComparesllequs.ll
    llvm/test/CodeGen/PowerPC/testComparesllgesc.ll
    llvm/test/CodeGen/PowerPC/testComparesllgesi.ll
    llvm/test/CodeGen/PowerPC/testComparesllgesll.ll
    llvm/test/CodeGen/PowerPC/testComparesllgess.ll
    llvm/test/CodeGen/PowerPC/testComparesllgeuc.ll
    llvm/test/CodeGen/PowerPC/testComparesllgeui.ll
    llvm/test/CodeGen/PowerPC/testComparesllgeull.ll
    llvm/test/CodeGen/PowerPC/testComparesllgeus.ll
    llvm/test/CodeGen/PowerPC/testComparesllgtsll.ll
    llvm/test/CodeGen/PowerPC/testComparesllgtuc.ll
    llvm/test/CodeGen/PowerPC/testComparesllgtui.ll
    llvm/test/CodeGen/PowerPC/testComparesllgtus.ll
    llvm/test/CodeGen/PowerPC/testCompareslllesc.ll
    llvm/test/CodeGen/PowerPC/testCompareslllesi.ll
    llvm/test/CodeGen/PowerPC/testCompareslllesll.ll
    llvm/test/CodeGen/PowerPC/testComparesllless.ll
    llvm/test/CodeGen/PowerPC/testComparesllleuc.ll
    llvm/test/CodeGen/PowerPC/testComparesllleui.ll
    llvm/test/CodeGen/PowerPC/testComparesllleull.ll
    llvm/test/CodeGen/PowerPC/testComparesllleus.ll
    llvm/test/CodeGen/PowerPC/testComparesllltsll.ll
    llvm/test/CodeGen/PowerPC/testComparesllltuc.ll
    llvm/test/CodeGen/PowerPC/testComparesllltui.ll
    llvm/test/CodeGen/PowerPC/testComparesllltus.ll
    llvm/test/CodeGen/PowerPC/testComparesllnesll.ll
    llvm/test/CodeGen/PowerPC/testComparesllneull.ll
    llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
    llvm/test/CodeGen/PowerPC/toc-float.ll
    llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
    llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
    llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
    llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
    llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
    llvm/test/CodeGen/PowerPC/vavg.ll
    llvm/test/CodeGen/PowerPC/vec-itofp.ll
    llvm/test/CodeGen/PowerPC/vec-min-max.ll
    llvm/test/CodeGen/PowerPC/vec-promote.ll
    llvm/test/CodeGen/PowerPC/vec-trunc.ll
    llvm/test/CodeGen/PowerPC/vec-trunc2.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll
    llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
    llvm/test/CodeGen/PowerPC/vec_select.ll
    llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
    llvm/test/CodeGen/PowerPC/vector-ldst.ll
    llvm/test/CodeGen/PowerPC/vperm-swap.ll
    llvm/test/CodeGen/PowerPC/vselect-constants.ll
    llvm/test/CodeGen/PowerPC/vsx.ll
    llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
    llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 70a58f42a98ab9f..3a2d9d9b3bc19f6 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -6,408 +6,332 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the itinerary class data for the POWER8 processor.
+// This file defines the SchedModel for the POWER8 processor.
 //
 //===----------------------------------------------------------------------===//
 
-// Scheduling for the P8 involves tracking two types of resources:
-//  1. The dispatch bundle slots
-//  2. The functional unit resources
-
-// Dispatch units:
-def P8_DU1    : FuncUnit;
-def P8_DU2    : FuncUnit;
-def P8_DU3    : FuncUnit;
-def P8_DU4    : FuncUnit;
-def P8_DU5    : FuncUnit;
-def P8_DU6    : FuncUnit;
-def P8_DU7    : FuncUnit; // Only branch instructions will use DU7,DU8
-def P8_DU8    : FuncUnit;
-
-// 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
-
-def P8_LU1     : FuncUnit; // Loads or fixed-point operations 1
-def P8_LU2     : FuncUnit; // Loads or fixed-point operations 2
-
-// Load/Store pipelines can handle Stores, fixed-point loads, and simple
-// fixed-point operations.
-def P8_LSU1    : FuncUnit; // Load/Store pipeline 1
-def P8_LSU2    : FuncUnit; // Load/Store pipeline 2
-
-// Fixed Point unit
-def P8_FXU1    : FuncUnit; // FX pipeline 1
-def P8_FXU2    : FuncUnit; // FX pipeline 2
-
-// The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
-// are combined on P7 and newer into a Vector Scalar Unit (VSU).
-// The P8 Instruction latency documents still refers to the unit as the
-// FPU, so keep in mind that FPU==VSU.
-// In contrast to the P7, the VMX units on P8 are symmetric, so no need to
-// split vector integer ops or 128-bit load/store/perms to the specific units.
-def P8_FPU1    : FuncUnit; // VS pipeline 1
-def P8_FPU2    : FuncUnit; // VS pipeline 2
-
-def P8_CRU    : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
-def P8_BRU    : FuncUnit; // BR unit
-
-def P8Itineraries : ProcessorItineraries<
-  [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
-   P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
-   P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
-  InstrItinData<IIC_IntSimple   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2,
-                                                  P8_LU1, P8_LU2,
-                                                  P8_LSU1, P8_LSU2]>],
-                                  [1, 1, 1]>,
-  InstrItinData<IIC_IntGeneral  , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
-                                                  P8_LU2, P8_LSU1, P8_LSU2]>],
-                                  [1, 1, 1]>,
-  InstrItinData<IIC_IntISEL,      [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
-                                   InstrStage<1, [P8_BRU]>],
-                                  [1, 1, 1, 1]>,
-  InstrItinData<IIC_IntCompare  , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [1, 1, 1]>,
-  InstrItinData<IIC_IntDivW     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<15, [P8_FXU1, P8_FXU2]>],
-                                  [15, 1, 1]>,
-  InstrItinData<IIC_IntDivD     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<23, [P8_FXU1, P8_FXU2]>],
-                                  [23, 1, 1]>,
-  InstrItinData<IIC_IntMulHW    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [4, 1, 1]>,
-  InstrItinData<IIC_IntMulHWU   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [4, 1, 1]>,
-  InstrItinData<IIC_IntMulHD    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [4, 1, 1]>,
-  InstrItinData<IIC_IntMulLI    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [4, 1, 1]>,
-  InstrItinData<IIC_IntRotate   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                   [1, 1, 1]>,
-  InstrItinData<IIC_IntRotateD  , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                   [1, 1, 1]>,
-  InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                   [1, 1, 1]>,
-  InstrItinData<IIC_IntShift    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [1, 1, 1]>,
-  InstrItinData<IIC_IntTrapW    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [1, 1]>,
-  InstrItinData<IIC_IntTrapD    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [1, 1]>,
-  InstrItinData<IIC_BrB         , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
-                                   InstrStage<1, [P8_BRU]>],
-                                  [3, 1, 1]>,
-  // FIXME - the Br* groups below are not branch related, so should probably
-  // be renamed.
-  // IIC_BrCR consists of the cr* instructions.  (crand,crnor,creqv, etc).
-  // and should be 'First' in dispatch.
-  InstrItinData<IIC_BrCR        , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_CRU]>],
-                                  [3, 1, 1]>,
-  // IIC_BrMCR consists of the mcrf instruction.
-  InstrItinData<IIC_BrMCR       , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_CRU]>],
-                                  [3, 1, 1]>,
-  // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
-  // should be first in the dispatch group.
-  InstrItinData<IIC_BrMCRX      , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [3, 1, 1]>,
-  InstrItinData<IIC_BrMCRX      , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [3, 1]>,
-  InstrItinData<IIC_LdStLoad    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2]>],
-                                  [2, 1, 1]>,
-  InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2 ], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [2, 2, 1, 1]>,
-  // Update-Indexed form loads/stores are no longer first and last in the
-  // dispatch group.  They are simply cracked, so require DU1,DU2.
-  InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [3, 3, 1, 1]>,
-  InstrItinData<IIC_LdStLD      , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2]>],
-                                  [2, 1, 1]>,
-  InstrItinData<IIC_LdStLDU     , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [2, 2, 1, 1]>,
-  InstrItinData<IIC_LdStLDUX    , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [3, 3, 1, 1]>,
-  InstrItinData<IIC_LdStLFD     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_LU1, P8_LU2]>],
-                                  [3, 1, 1]>,
-  InstrItinData<IIC_LdStLVecX   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_LU1, P8_LU2]>],
-                                  [3, 1, 1]>,
-  InstrItinData<IIC_LdStLFDU    , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LU1, P8_LU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [3, 3, 1, 1]>,
-  InstrItinData<IIC_LdStLFDUX   , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LU1, P8_LU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [3, 3, 1, 1]>,
-  InstrItinData<IIC_LdStLHA     , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2,
-                                                  P8_LU1, P8_LU2]>],
-                                  [3, 1, 1]>,
-  InstrItinData<IIC_LdStLHAU    , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [4, 4, 1, 1]>,
-  // first+last in dispatch group.
-  InstrItinData<IIC_LdStLHAUX   , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_DU3], 0>,
-                                   InstrStage<1, [P8_DU4], 0>,
-                                   InstrStage<1, [P8_DU5], 0>,
-                                   InstrStage<1, [P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [4, 4, 1, 1]>,
-  InstrItinData<IIC_LdStLWA     , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2]>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [3, 1, 1]>,
-  InstrItinData<IIC_LdStLWARX,    [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_DU3], 0>,
-                                   InstrStage<1, [P8_DU4], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2]>],
-                                  [3, 1, 1]>,
-  // first+last
-  InstrItinData<IIC_LdStLDARX,    [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_DU3], 0>,
-                                   InstrStage<1, [P8_DU4], 0>,
-                                   InstrStage<1, [P8_DU5], 0>,
-                                   InstrStage<1, [P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2]>],
-                                  [3, 1, 1]>,
-  InstrItinData<IIC_LdStLMW     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2,
-                                                  P8_LU1, P8_LU2]>],
-                                  [2, 1, 1]>,
-// Stores are dual-issued from the issue queue, so may only take up one
-// dispatch slot.  The instruction will be broken into two IOPS. The agen
-// op is issued to the LSU, and the data op (register fetch) is issued
-// to either the LU (GPR store) or the VSU (FPR store).
-  InstrItinData<IIC_LdStStore   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2]>,
-                                   InstrStage<1, [P8_LU1, P8_LU2]>],
-                                  [1, 1, 1]>,
-  InstrItinData<IIC_LdStSTD     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_LU1, P8_LU2,
-                                                  P8_LSU1, P8_LSU2]>]
-                                  [1, 1, 1]>,
-  InstrItinData<IIC_LdStSTU     , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LU1, P8_LU2,
-                                                  P8_LSU1, P8_LSU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [2, 1, 1, 1]>,
-  // First+last
-  InstrItinData<IIC_LdStSTUX    , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_DU3], 0>,
-                                   InstrStage<1, [P8_DU4], 0>,
-                                   InstrStage<1, [P8_DU5], 0>,
-                                   InstrStage<1, [P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [2, 1, 1, 1]>,
-  InstrItinData<IIC_LdStSTFD    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [1, 1, 1]>,
-  InstrItinData<IIC_LdStSTFDU   , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [2, 1, 1, 1]>,
-  InstrItinData<IIC_LdStSTVEBX  , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [1, 1, 1]>,
-  InstrItinData<IIC_LdStSTDCX   , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_DU3], 0>,
-                                   InstrStage<1, [P8_DU4], 0>,
-                                   InstrStage<1, [P8_DU5], 0>,
-                                   InstrStage<1, [P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
-                                   InstrStage<1, [P8_LU1, P8_LU2]>],
-                                  [1, 1, 1]>,
-  InstrItinData<IIC_LdStSTWCX   , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_DU2], 0>,
-                                   InstrStage<1, [P8_DU3], 0>,
-                                   InstrStage<1, [P8_DU4], 0>,
-                                   InstrStage<1, [P8_DU5], 0>,
-                                   InstrStage<1, [P8_DU6], 0>,
-                                   InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
-                                   InstrStage<1, [P8_LU1, P8_LU2]>],
-                                  [1, 1, 1]>,
-  InstrItinData<IIC_SprMFCR     , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_CRU]>],
-                                  [6, 1]>,
-  InstrItinData<IIC_SprMFCRF    , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_CRU]>],
-                                  [3, 1]>,
-  InstrItinData<IIC_SprMTSPR    , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
-                                  [4, 1]>, // mtctr
-  InstrItinData<IIC_FPGeneral   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [5, 1, 1]>,
-  InstrItinData<IIC_FPAddSub    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [5, 1, 1]>,
-  InstrItinData<IIC_FPCompare   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [8, 1, 1]>,
-  InstrItinData<IIC_FPDivD      , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [33, 1, 1]>,
-  InstrItinData<IIC_FPDivS      , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [27, 1, 1]>,
-  InstrItinData<IIC_FPSqrtD     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [44, 1, 1]>,
-  InstrItinData<IIC_FPSqrtS     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [32, 1, 1]>,
-  InstrItinData<IIC_FPFused     , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [5, 1, 1, 1]>,
-  InstrItinData<IIC_FPRes       , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
-                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [5, 1, 1]>,
-  InstrItinData<IIC_VecGeneral  , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [2, 1, 1]>,
-  InstrItinData<IIC_VecVSL      , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [2, 1, 1]>,
-  InstrItinData<IIC_VecVSR      , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [2, 1, 1]>,
-  InstrItinData<IIC_VecFP       , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [6, 1, 1]>,
-  InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [6, 1, 1]>,
-  InstrItinData<IIC_VecFPRound  , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [6, 1, 1]>,
-  InstrItinData<IIC_VecComplex  , [InstrStage<1, [P8_DU1], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [7, 1, 1]>,
-  InstrItinData<IIC_VecPerm     , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
-                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
-                                  [3, 1, 1]>
-]>;
-
-// ===---------------------------------------------------------------------===//
-// P8 machine model for scheduling and other instruction cost heuristics.
-// P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
-// to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
-
 def P8Model : SchedMachineModel {
-  let IssueWidth = 8;  // up to 8 instructions dispatched per cycle.
-                       // up to six non-branch instructions.
-                       // up to two branches in a dispatch group.
-
-  let LoadLatency = 3; // Optimistic load latency assuming bypass.
-                       // This is overriden by OperandCycles if the
-                       // Itineraries are queried instead.
+  let IssueWidth = 8;
+  let LoadLatency = 3;
   let MispredictPenalty = 16;
-
-  // Try to make sure we have at least 10 dispatch groups in a loop.
   let LoopMicroOpBufferSize = 60;
-
+  let MicroOpBufferSize = 64;
+  // TODO: Due to limitation of instruction definitions, non-P8 instructions
+  // are required to be listed here. Change this after it got fixed.
   let CompleteModel = 0;
-
-  let Itineraries = P8Itineraries;
+  let UnsupportedFeatures = [HasSPE, PrefixInstrs, MMA,
+                             PairedVectorMemops, PCRelativeMemops,
+                             IsISA3_0, IsISA3_1, IsISAFuture];
 }
 
+let SchedModel = P8Model in {
+  // Power8 Pipeline Units:
+
+  def P8_LU_LS_FX : ProcResource<6>;
+  def P8_LU_LS : ProcResource<4> { let Super = P8_LU_LS_FX; }
+  def P8_LS : ProcResource<2> { let Super = P8_LU_LS; }
+  def P8_LU : ProcResource<2> { let Super = P8_LU_LS; }
+  def P8_FX : ProcResource<2> { let Super = P8_LU_LS_FX; }
+  def P8_DFU : ProcResource<1>;
+  def P8_BR : ProcResource<1> { let BufferSize = 16; }
+  def P8_CY : ProcResource<1>;
+  def P8_CRL : ProcResource<1>;
+  def P8_VMX : ProcResource<2>;
+  def P8_PM : ProcResource<2> {
+    // This is workaround for scheduler to respect latency of long permute chain.
+    let BufferSize = 1;
+    let Super = P8_VMX;
+  }
+  def P8_XS : ProcResource<2> { let Super = P8_VMX; }
+  def P8_VX : ProcResource<2> { let Super = P8_VMX; }
+  def P8_FPU : ProcResource<4>;
+  // Units for scalar, 2xDouble and 4xSingle
+  def P8_FP_Scal : ProcResource<2> { let Super = P8_FPU; }
+  def P8_FP_2x64 : ProcResource<2> { let Super = P8_FPU; }
+  def P8_FP_4x32 : ProcResource<2> { let Super = P8_FPU; }
+
+  // Power8 Dispatch Ports:
+  // Two ports to do loads or fixed-point operations.
+  // Two ports to do stores, fixed-point loads, or fixed-point operations.
+  // Two ports for fixed-point operations.
+  // Two issue ports shared by 2 DFP/2 VSX/2 VMX/1 CY/1 DFP operations.
+  // One for branch operations.
+  // One for condition register operations.
+
+  // TODO: Model dispatch of cracked instructions.
+
+  // Six ports in total are available for fixed-point operations.
+  def P8_PORT_ALLFX : ProcResource<6>;
+  // Four ports in total are available for fixed-point load operations.
+  def P8_PORT_FXLD : ProcResource<4> { let Super = P8_PORT_ALLFX; }
+  // Two ports to do loads or fixed-point operations.
+  def P8_PORT_LD_FX : ProcResource<2> { let Super = P8_PORT_FXLD; }
+  // Two ports to do stores, fixed-point loads, or fixed-point operations.
+  def P8_PORT_ST_FXLD_FX : ProcResource<2> { let Super = P8_PORT_FXLD; }
+  // Two issue ports shared by two floating-point, two VSX, two VMX, one crypto,
+  // and one DFP operations.
+  def P8_PORT_VMX_FP : ProcResource<2>;
+  // One port for branch operation.
+  def P8_PORT_BR : ProcResource<1>;
+  // One port for condition register operation.
+  def P8_PORT_CR : ProcResource<1>;
+
+  def P8_ISSUE_FX : SchedWriteRes<[P8_PORT_ALLFX]>;
+  def P8_ISSUE_FXLD : SchedWriteRes<[P8_PORT_FXLD]>;
+  def P8_ISSUE_LD : SchedWriteRes<[P8_PORT_LD_FX]>;
+  def P8_ISSUE_ST : SchedWriteRes<[P8_PORT_ST_FXLD_FX]>;
+  def P8_ISSUE_VSX : SchedWriteRes<[P8_PORT_VMX_FP]>;
+  def P8_ISSUE_BR : SchedWriteRes<[P8_PORT_BR]>;
+  def P8_ISSUE_CR : SchedWriteRes<[P8_PORT_CR]>;
+
+  // Power8 Instruction Latency & Port Groups:
+
+  def P8_LS_LU_NONE : SchedWriteRes<[P8_LU, P8_LS]>;
+  def P8_LS_FP_NONE : SchedWriteRes<[P8_LS, P8_FPU]>;
+  def P8_LU_or_LS_3C : SchedWriteRes<[P8_LU_LS]> { let Latency = 3; }
+  def P8_LS_FX_3C : SchedWriteRes<[P8_LS, P8_FX]> { let Latency = 3; }
+  def P8_LU_or_LS_or_FX_2C : SchedWriteRes<[P8_LU_LS_FX]> { let Latency = 2; }
+  def P8_LU_or_LS_FX_3C : SchedWriteRes<[P8_LU_LS, P8_FX]> { let Latency = 3; }
+  def P8_FX_NONE : SchedWriteRes<[P8_FX]>;
+  def P8_FX_1C : SchedWriteRes<[P8_FX]> { let Latency = 1; }
+  def P8_FX_2C : SchedWriteRes<[P8_FX]> { let Latency = 2; }
+  def P8_FX_3C : SchedWriteRes<[P8_FX]> { let Latency = 3; }
+  def P8_FX_5C : SchedWriteRes<[P8_FX]> { let Latency = 5; }
+  def P8_FX_10C : SchedWriteRes<[P8_FX]> { let Latency = 10; }
+  def P8_FX_23C : SchedWriteRes<[P8_FX]> { let Latency = 23; }
+  def P8_FX_15C : SchedWriteRes<[P8_FX]> { let Latency = 15; }
+  def P8_FX_41C : SchedWriteRes<[P8_FX]> { let Latency = 41; }
+  def P8_BR_2C : SchedWriteRes<[P8_BR]> { let Latency = 2; }
+  def P8_CR_NONE : SchedWriteRes<[P8_CRL]>;
+  def P8_CR_3C : SchedWriteRes<[P8_CRL]> { let Latency = 3; }
+  def P8_CR_5C : SchedWriteRes<[P8_CRL]> { let Latency = 5; }
+  def P8_LU_5C : SchedWriteRes<[P8_LU]> { let Latency = 5; }
+  def P8_LU_FX_5C : SchedWriteRes<[P8_LU, P8_FX]> { let Latency = 5; }
+  def P8_LS_FP_FX_2C : SchedWriteRes<[P8_LS, P8_FPU, P8_FX]> { let Latency = 2; }
+  def P8_LS_FP_FX_3C : SchedWriteRes<[P8_LS, P8_FPU, P8_FX]> { let Latency = 3; }
+  def P8_LS_3C : SchedWriteRes<[P8_LS]> { let Latency = 3; }
+  def P8_FP_3C : SchedWriteRes<[P8_FPU]> { let Latency = 3; }
+  def P8_FP_Scal_6C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 6; }
+  def P8_FP_4x32_6C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 6; }
+  def P8_FP_2x64_6C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 6; }
+  def P8_FP_26C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 26; }
+  def P8_FP_28C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 28; }
+  def P8_FP_31C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 31; }
+  def P8_FP_Scal_32C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 32; }
+  def P8_FP_2x64_32C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 32; }
+  def P8_FP_4x32_32C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 32; }
+  def P8_FP_Scal_43C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 43; }
+  def P8_FP_2x64_43C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 43; }
+  def P8_XS_2C : SchedWriteRes<[P8_XS]> { let Latency = 2; }
+  def P8_PM_2C : SchedWriteRes<[P8_PM]> { let Latency = 2; }
+  def P8_XS_4C : SchedWriteRes<[P8_XS]> { let Latency = 4; }
+  def P8_VX_7C : SchedWriteRes<[P8_VX]> { let Latency = 7; }
+  def P8_XS_9C : SchedWriteRes<[P8_XS]> { let Latency = 9; }
+  def P8_CY_6C : SchedWriteRes<[P8_CY]> { let Latency = 6; }
+  def P8_DFU_13C : SchedWriteRes<[P8_DFU]> { let Latency = 13; }
+  def P8_DFU_15C : SchedWriteRes<[P8_DFU]> { let Latency = 15; }
+  def P8_DFU_17C : SchedWriteRes<[P8_DFU]> { let Latency = 17; }
+  def P8_DFU_25C : SchedWriteRes<[P8_DFU]> { let Latency = 25; }
+  def P8_DFU_32C : SchedWriteRes<[P8_DFU]> { let Latency = 32; }
+  def P8_DFU_34C : SchedWriteRes<[P8_DFU]> { let Latency = 34; }
+  def P8_DFU_40C : SchedWriteRes<[P8_DFU]> { let Latency = 40; }
+  def P8_DFU_90C : SchedWriteRes<[P8_DFU]> { let Latency = 90; }
+  def P8_DFU_96C : SchedWriteRes<[P8_DFU]> { let Latency = 96; }
+  def P8_DFU_172C : SchedWriteRes<[P8_DFU]> { let Latency = 172; }
+  // Direct move instructions
+  def P8_DM_5C : SchedWriteRes<[]> { let Latency = 5; }
+
+  // Instructions of CR pipeline
+
+  def : InstRW<[P8_CR_NONE, P8_ISSUE_CR], (instrs MFCR, MFCR8)>;
+  def : InstRW<[P8_CR_3C, P8_ISSUE_CR], (instrs MFOCRF, MFOCRF8)>;
+  def : InstRW<[P8_CR_5C, P8_ISSUE_CR], (instrs MFLR, MFLR8, MFCTR, MFCTR8)>;
+
+  // Instructions of CY pipeline
+
+  def : InstRW<[P8_CY_6C, P8_ISSUE_VSX], (instrs
+    VCIPHER, VCIPHERLAST, VNCIPHER, VNCIPHERLAST, VPMSUMB, VPMSUMD, VPMSUMH, VPMSUMW, VSBOX)>;
+
+  // Instructions of FPU pipeline
+
+  def : InstRW<[P8_FP_26C, P8_ISSUE_VSX], (instrs (instregex "^FDIVS(_rec)?$"), XSDIVSP)>;
+  def : InstRW<[P8_FP_28C, P8_ISSUE_VSX], (instrs XVDIVSP)>;
+  def : InstRW<[P8_FP_31C, P8_ISSUE_VSX], (instregex "^FSQRTS(_rec)?$")>;
+  def : InstRW<[P8_FP_Scal_32C, P8_ISSUE_VSX], (instrs FDIV, FDIV_rec, XSDIVDP)>;
+  def : InstRW<[P8_FP_2x64_32C, P8_ISSUE_VSX], (instrs XVDIVDP)>;
+  def : InstRW<[P8_FP_4x32_32C, P8_ISSUE_VSX], (instrs XVSQRTSP)>;
+  def : InstRW<[P8_FP_Scal_43C, P8_ISSUE_VSX], (instrs FSQRT, FSQRT_rec, XSSQRTDP)>;
+  def : InstRW<[P8_FP_2x64_43C, P8_ISSUE_VSX], (instrs XVSQRTDP)>;
+
+  def : InstRW<[P8_FP_3C, P8_ISSUE_VSX], (instrs
+    MTFSFI_rec, MTFSF_rec, MTFSFI, MTFSFIb, MTFSF, MTFSFb, MTFSB0, MTFSB1)>;
+
+  def : InstRW<[P8_FP_Scal_6C, P8_ISSUE_VSX], (instrs
+    (instregex "^F(N)?M(ADD|SUB)(S)?(_rec)?$"),
+    (instregex "^XS(N)?M(ADD|SUB)(A|M)(D|S)P$"),
+    (instregex "^FC(F|T)I(D|W)(U)?(S|Z)?(_rec)?$"),
+    (instregex "^(F|XS)(ABS|CPSGN|ADD|MUL|NABS|RE|NEG|SUB|SEL|RSQRTE)(D|S)?(P)?(s)?(_rec)?$"),
+    (instregex "^FRI(M|N|P|Z)(D|S)(_rec)?$"),
+    (instregex "^XSCVDP(S|U)X(W|D)S(s)?$"),
+    (instregex "^XSCV(S|U)XD(D|S)P$"),
+    (instregex "^XSCV(D|S)P(S|D)P(N)?$"),
+    (instregex "^XSRDPI(C|M|P|Z)?$"),
+    FMR, FRSP, FMR_rec, FRSP_rec, XSRSP)>;
+
+  def : InstRW<[P8_FP_4x32_6C, P8_ISSUE_VSX], (instrs
+    (instregex "^XV(N)?M(ADD|SUB)(A|M)SP$"),
+    (instregex "^VRFI(M|N|P|Z)$"),
+    XVRSQRTESP, XVSUBSP, VADDFP, VEXPTEFP, VLOGEFP, VMADDFP, VNMSUBFP, VREFP,
+    VRSQRTEFP, VSUBFP, XVCVSXWSP, XVCVUXWSP, XVMULSP, XVNABSSP, XVNEGSP, XVRESP,
+    XVCVDPSP, XVCVSXDSP, XVCVUXDSP, XVABSSP, XVADDSP, XVCPSGNSP)>;
+
+  def : InstRW<[P8_FP_2x64_6C, P8_ISSUE_VSX], (instrs
+    (instregex "^XVR(D|S)PI(C|M|P|Z)?$"),
+    (instregex "^XVCV(S|U)X(D|W)DP$"),
+    (instregex "^XVCV(D|W|S)P(S|U)X(D|W)S$"),
+    (instregex "^XV(N)?(M)?(RSQRTE|CPSGN|SUB|ADD|ABS|UL|NEG|RE)(A|M)?DP$"),
+    XVCVSPDP)>;
+
+  // Instructions of FX, LU or LS pipeline
+
+  def : InstRW<[P8_FX_NONE, P8_ISSUE_FX], (instrs TDI, TWI, TD, TW, MTCRF, MTCRF8, MTOCRF, MTOCRF8)>;
+  def : InstRW<[P8_FX_1C, P8_ISSUE_FX], (instregex "^RLWIMI(8)?$")>;
+  // TODO: Pipeline of logical instructions might be LS or FX
+  def : InstRW<[P8_FX_2C, P8_ISSUE_FX], (instrs
+    (instregex "^(N|X)?(EQV|AND|OR)(I)?(S|C)?(8)?(_rec)?$"),
+    (instregex "^EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
+    (instregex "^RL(D|W)(I)?(NM|C)(L|R)?(8)?(_32)?(_64)?(_rec)?$"),
+    (instregex "^S(L|R)(A)?(W|D)(I)?(8)?(_rec|_32)?$"),
+    (instregex "^(ADD|SUBF)(M|Z)?(C|E)?(4|8)?O(_rec)?$"),
+    (instregex "^(ADD|SUBF)(M|Z)?E(8)?_rec$"),
+    (instregex "^(ADD|SUBF|NEG)(4|8)?_rec$"),
+    NOP, ADDG6S, ADDG6S8, ADDZE, ADDZE8, ADDIC_rec, NEGO_rec, ADDC, ADDC8, SUBFC, SUBFC8,
+    ADDC_rec, ADDC8_rec, SUBFC_rec, SUBFC8_rec, COPY, NEG8O_rec,
+    RLDIMI, RLDIMI_rec, RLWIMI8_rec, RLWIMI_rec)>;
+
+  def : InstRW<[P8_FX_3C], (instregex "^(POP)?CNT(LZ)?(B|W|D)(8)?(_rec)?$")>;
+  def : InstRW<[P8_FX_5C, P8_ISSUE_FX], (instrs
+    (instregex "^MUL(H|L)(I|W|D)(8)?(U|O)?(_rec)?$"),
+    CMPDI,CMPWI,CMPD,CMPW,CMPLDI,CMPLWI,CMPLD,CMPLW,
+    ISEL, ISEL8, MTLR, MTLR8, MTCTR, MTCTR8, MTCTR8loop, MTCTRloop)>;
+
+  def : InstRW<[P8_FX_10C, P8_ISSUE_VSX], (instregex "^MFTB(8)?$")>;
+  def : InstRW<[P8_FX_15C, P8_ISSUE_FX], (instregex "^DIVW(U)?$")>;
+
+  def : InstRW<[P8_FX_23C, P8_ISSUE_FX], (instregex "^DIV(D|WE)(U)?$")>;
+  def : InstRW<[P8_FX_41C], (instrs
+    (instregex "^DIV(D|W)(E)?(U)?O(_rec)?$"),
+    (instregex "^DIV(D|W)(E)?(U)?_rec$"),
+    DIVDE, DIVDEU)>;
+
+  def : InstRW<[P8_LS_3C, P8_ISSUE_FX], (instrs MFSR, MFSRIN)>;
+
+  def : InstRW<[P8_LU_5C, P8_ISSUE_LD], (instrs
+    LFS, LFSX, LFD, LFDX, LFDXTLS, LFDXTLS_, LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX,
+    LVX, LVXL, LXSDX, LFIWAX, LFIWZX, LFSXTLS, LFSXTLS_, LXVB16X, LXVD2X, LXSIWZX,
+    DFLOADf64, XFLOADf64, LIWZX)>;
+
+  def : InstRW<[P8_LS_FX_3C, P8_ISSUE_FXLD], (instrs LQ)>;
+  def : InstRW<[P8_LU_FX_5C, P8_ISSUE_LD], (instregex "^LF(D|S)U(X)?$")>;
+
+  def : InstRW<[P8_LS_FP_NONE, P8_ISSUE_ST], (instrs
+    STXSDX, STXVD2X, STXVW4X, STFIWX, STFS, STFSX, STFD, STFDX,
+    STFDEPX, STFDXTLS, STFDXTLS_, STFSXTLS, STFSXTLS_, STXSIWX, STXSSP, STXSSPX)>;
+
+  def : InstRW<[P8_LS_FP_FX_2C, P8_ISSUE_ST], (instrs STVEBX, STVEHX, STVEWX, STVX, STVXL)>;
+  def : InstRW<[P8_LS_FP_FX_3C, P8_ISSUE_ST], (instregex "^STF(D|S)U(X)?$")>;
+
+  def : InstRW<[P8_LS_LU_NONE, P8_ISSUE_ST], (instrs
+    (instregex "^ST(B|H|W|D)(U)?(X)?(8|TLS)?(_)?(32)?$"),
+    STBCIX, STBCX, STBEPX, STDBRX, STDCIX, STDCX, STHBRX, STHCIX, STHCX, STHEPX,
+    STMW, STSWI, STWBRX, STWCIX, STWCX, STWEPX)>;
+
+  def : InstRW<[P8_LU_or_LS_FX_3C, P8_ISSUE_FXLD],
+    (instregex "^L(B|H|W|D)(A|Z)?(U)?(X)?(8|TLS)?(_)?(32)?$")>;
+
+  def : InstRW<[P8_LU_or_LS_3C, P8_ISSUE_FXLD], (instrs
+    LBARX, LBARXL, LBEPX, LBZCIX, LDARX, LDARXL, LDBRX, LDCIX, LFDEPX, LHARX, LHARXL, LHBRX, LXSIWAX,
+    LHBRX8, LHEPX, LHZCIX, LMW, LSWI, LVSL, LVSR, LWARX, LWARXL, LWBRX, LWBRX8, LWEPX, LWZCIX)>;
+
+  def : InstRW<[P8_LU_or_LS_or_FX_2C, P8_ISSUE_FX], (instrs
+    (instregex "^ADDI(C)?(dtprel|tlsgd|toc)?(L)?(ADDR)?(32|8)?$"),
+    (instregex "^ADDIS(dtprel|tlsgd|toc|gotTprel)?(HA)?(32|8)?$"),
+    (instregex "^LI(S)?(8)?$"),
+    (instregex "^ADD(M)?(E)?(4|8)?(TLS)?(_)?$"),
+    (instregex "^SUBF(M|Z)?(E)?(IC)?(4|8)?$"),
+    (instregex "^NEG(8)?(O)?$"))>;
+
+  // Instructions of PM pipeline
+
+  def : InstRW<[P8_PM_2C, P8_ISSUE_VSX], (instrs
+    (instregex "^VPK(S|U)(H|W|D)(S|U)(M|S)$"),
+    (instregex "^VUPK(H|L)(P|S)(H|B|W|X)$"),
+    (instregex "^VSPLT(IS)?(B|H|W)(s)?$"),
+    (instregex "^(XX|V)MRG(E|O|H|L)(B|H|W)$"),
+    XXPERMDI, XXPERMDIs, XXSEL, XXSLDWI, XXSLDWIs, XXSPLTW, XXSPLTWs, VPERMXOR,
+    VPKPX, VPERM, VBPERMQ, VGBBD, VSEL, VSL, VSLDOI, VSLO, VSR, VSRO)>;
+
+  def : InstRW<[P8_XS_2C, P8_ISSUE_VSX], (instrs
+    (instregex "^V(ADD|SUB)(S|U)(B|H|W|D)(M|S)$"),
+    (instregex "^X(S|V)(MAX|MIN)(D|S)P$"),
+    (instregex "^V(S)?(R)?(L)?(A)?(B|D|H|W)$"),
+    (instregex "^VAVG(S|U)(B|H|W)$"),
+    (instregex "^VM(AX|IN)(S|U)(B|H|W|D)$"),
+    (instregex "^(XX|V)(L)?(N)?(X)?(AND|OR|EQV)(C)?$"),
+    (instregex "^(X)?VCMP(EQ|GT|GE|B)(F|S|U)?(B|H|W|D|P|S)(P)?(_rec)?$"),
+    (instregex "^VCLZ(B|H|W|D)$"),
+    (instregex "^VPOPCNT(B|H|W)$"),
+    XXLORf, XXLXORdpz, XXLXORspz, XXLXORz, VEQV, VMAXFP, VMINFP,
+    VSHASIGMAD, VSHASIGMAW, VSUBCUW, VADDCUW, MFVSCR, MTVSCR)>;
+
+  def : InstRW<[P8_XS_4C, P8_ISSUE_VSX], (instrs
+    (instregex "^V(ADD|SUB)(E)?(C)?UQ(M)?$"),
+    VPOPCNTD)>;
+
+  def : InstRW<[P8_XS_9C, P8_ISSUE_CR], (instrs
+    (instregex "^(F|XS)CMP(O|U)(D|S)(P)?$"),
+    (instregex "^(F|XS|XV)T(DIV|SQRT)((D|S)P)?$"))>;
+
+  // Instructions of VX pipeline
+
+  def : InstRW<[P8_VX_7C, P8_ISSUE_VSX], (instrs
+    (instregex "^V(M)?SUM(2|4)?(M|S|U)(B|H|W)(M|S)$"),
+    (instregex "^VMUL(E|O)?(S|U)(B|H|W)(M)?$"),
+    VMHADDSHS, VMHRADDSHS, VMLADDUHM)>;
+
+  // Instructions of BR pipeline
+
+  def : InstRW<[P8_BR_2C, P8_ISSUE_BR], (instrs
+    (instregex "^(g)?B(C)?(C)?(CTR)?(L)?(A)?(R)?(L)?(8)?(_LD|_LWZ)?(always|into_toc|at)?(_RM)?(n)?$"),
+    (instregex "^BD(N)?Z(L)?(R|A)?(L)?(m|p|8)?$"),
+    (instregex "^BL(R|A)?(8)?(_NOP)?(_TLS)?(_)?(RM)?$"))>;
+
+  // Instructions of DFP pipeline
+  // DFP operations also use float/vector/crypto issue ports.
+  def : InstRW<[P8_DFU_13C, P8_ISSUE_VSX], (instrs
+    (instregex "^DTST(D|S)(C|F|G)(Q)?$"),
+    (instregex "^D(Q|X)EX(Q)?(_rec)?$"),
+    (instregex "^D(ADD|SUB|IEX|QUA|RRND|RINTX|RINTN|CTDP|DEDPD|ENBCD)(_rec)?$"),
+    (instregex "^DSC(L|R)I(_rec)?$"),
+    BCDADD_rec, BCDSUB_rec, DCMPO, DCMPU, DTSTEX, DQUAI)>;
+
+  def : InstRW<[P8_DFU_15C, P8_ISSUE_VSX], (instrs
+    (instregex "^DRINT(N|X)Q(_rec)?$"),
+    DCMPOQ, DCMPUQ, DRRNDQ, DRRNDQ_rec, DIEXQ, DIEXQ_rec, DQUAIQ, DQUAIQ_rec,
+    DTSTEXQ, DDEDPDQ, DDEDPDQ_rec, DENBCDQ, DENBCDQ_rec, DSCLIQ, DSCLIQ_rec,
+    DSCRIQ, DSCRIQ_rec, DCTQPQ, DCTQPQ_rec)>;
+
+  def : InstRW<[P8_DFU_17C, P8_ISSUE_VSX], (instregex "^D(ADD|SUB|QUA)Q(_rec)?$")>;
+  def : InstRW<[P8_DFU_25C, P8_ISSUE_VSX], (instrs DRSP, DRSP_rec, DCTFIX, DCTFIX_rec)>;
+  def : InstRW<[P8_DFU_32C, P8_ISSUE_VSX], (instrs DCFFIX, DCFFIX_rec)>;
+  def : InstRW<[P8_DFU_34C, P8_ISSUE_VSX], (instrs DCFFIXQ, DCFFIXQ_rec)>;
+  def : InstRW<[P8_DFU_40C, P8_ISSUE_VSX], (instrs DMUL, DMUL_rec)>;
+  def : InstRW<[P8_DFU_90C, P8_ISSUE_VSX], (instrs DMULQ, DMULQ_rec)>;
+  def : InstRW<[P8_DFU_96C, P8_ISSUE_VSX], (instrs DDIV, DDIV_rec)>;
+  def : InstRW<[P8_DFU_172C, P8_ISSUE_VSX], (instrs DDIVQ, DDIVQ_rec)>;
+
+  // Direct move instructions
+
+   def : InstRW<[P8_DM_5C, P8_ISSUE_VSX], (instrs
+     MFVRD, MFVSRD, MFVRWZ, MFVSRWZ, MTVRD, MTVSRD, MTVRWA, MTVSRWA, MTVRWZ, MTVSRWZ)>;
+}

diff  --git a/llvm/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll b/llvm/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
index c30efc1434b8c8e..4815b5ac5cebf01 100644
--- a/llvm/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
+++ b/llvm/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
@@ -9,14 +9,14 @@ define i32 @test(i32 %i) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 4, 2, .LC0 at toc@ha
 ; CHECK-NEXT:    extsw 3, 3
-; CHECK-NEXT:    addis 5, 2, .LC1 at toc@ha
 ; CHECK-NEXT:    ld 4, .LC0 at toc@l(4)
 ; CHECK-NEXT:    ld 4, 0(4)
 ; CHECK-NEXT:    lbzx 3, 4, 3
-; CHECK-NEXT:    ld 4, .LC1 at toc@l(5)
+; CHECK-NEXT:    addis 4, 2, .LC1 at toc@ha
+; CHECK-NEXT:    ld 4, .LC1 at toc@l(4)
 ; CHECK-NEXT:    subfic 3, 3, 1
-; CHECK-NEXT:    ld 4, 0(4)
 ; CHECK-NEXT:    extsw 3, 3
+; CHECK-NEXT:    ld 4, 0(4)
 ; CHECK-NEXT:    sldi 3, 3, 2
 ; CHECK-NEXT:    lwzx 3, 4, 3
 ; CHECK-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll b/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll
index 5302257076071e1..1b62dff024e87fc 100644
--- a/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll
+++ b/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll
@@ -202,58 +202,52 @@ define void @_ZN5clang6format22BreakableStringLiteral11insertBreakEjjSt4pairImjE
 ; CHECK-LABEL: _ZN5clang6format22BreakableStringLiteral11insertBreakEjjSt4pairImjERNS0_17WhitespaceManagerE:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    ld 10, 56(3)
-; CHECK-NEXT:    lwz 0, 40(3)
+; CHECK-NEXT:    lwz 4, 40(3)
 ; CHECK-NEXT:    mr 12, 8
 ; CHECK-NEXT:    cmpldi 10, 0
 ; CHECK-NEXT:    beq 0, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %if.end.i.i
 ; CHECK-NEXT:    ld 9, 48(3)
-; CHECK-NEXT:    lbz 4, 0(9)
-; CHECK-NEXT:    cmpwi 4, 64
+; CHECK-NEXT:    lbz 8, 0(9)
+; CHECK-NEXT:    cmpwi 8, 64
 ; CHECK-NEXT:    b .LBB0_3
 ; CHECK-NEXT:  .LBB0_2: # %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge
 ; CHECK-NEXT:    ld 9, 48(3)
 ; CHECK-NEXT:    crxor 2, 2, 2
 ; CHECK-NEXT:  .LBB0_3: # %_ZNK4llvm9StringRef10startswithES0_.exit
-; CHECK-NEXT:    mflr 4
-; CHECK-NEXT:    .cfi_def_cfa_offset 160
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    .cfi_def_cfa_offset 144
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r28, -32
-; CHECK-NEXT:    .cfi_offset r29, -24
 ; CHECK-NEXT:    .cfi_offset r30, -16
-; CHECK-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stdu 1, -160(1)
-; CHECK-NEXT:    std 4, 176(1)
+; CHECK-NEXT:    stdu 1, -144(1)
 ; CHECK-NEXT:    li 8, 0
 ; CHECK-NEXT:    li 11, 1
+; CHECK-NEXT:    std 0, 160(1)
 ; CHECK-NEXT:    add 5, 6, 5
-; CHECK-NEXT:    iseleq 30, 11, 8
+; CHECK-NEXT:    lbz 30, 20(3)
+; CHECK-NEXT:    clrldi 6, 7, 32
+; CHECK-NEXT:    iseleq 8, 11, 8
 ; CHECK-NEXT:    ld 11, 64(3)
-; CHECK-NEXT:    lbz 29, 20(3)
-; CHECK-NEXT:    lwz 28, 16(3)
 ; CHECK-NEXT:    add 5, 5, 10
+; CHECK-NEXT:    clrldi 5, 5, 32
+; CHECK-NEXT:    mr 7, 11
+; CHECK-NEXT:    sub 0, 4, 8
 ; CHECK-NEXT:    ld 4, 8(3)
 ; CHECK-NEXT:    ld 8, 72(3)
-; CHECK-NEXT:    sub 3, 0, 30
-; CHECK-NEXT:    clrldi 5, 5, 32
-; CHECK-NEXT:    li 0, 1
-; CHECK-NEXT:    clrldi 6, 7, 32
-; CHECK-NEXT:    extsw 30, 3
+; CHECK-NEXT:    lwz 3, 16(3)
+; CHECK-NEXT:    std 30, 96(1)
+; CHECK-NEXT:    extsw 0, 0
+; CHECK-NEXT:    std 3, 112(1)
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    std 0, 120(1)
+; CHECK-NEXT:    std 3, 104(1)
 ; CHECK-NEXT:    mr 3, 12
-; CHECK-NEXT:    mr 7, 11
-; CHECK-NEXT:    std 0, 104(1)
-; CHECK-NEXT:    std 28, 112(1)
-; CHECK-NEXT:    std 29, 96(1)
-; CHECK-NEXT:    std 30, 120(1)
 ; CHECK-NEXT:    bl _ZN5clang6format17WhitespaceManager24replaceWhitespaceInTokenERKNS0_11FormatTokenEjjN4llvm9StringRefES6_bjji
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi 1, 1, 160
+; CHECK-NEXT:    addi 1, 1, 144
 ; CHECK-NEXT:    ld 0, 16(1)
 ; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    mtlr 0
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/CSR-fit.ll b/llvm/test/CodeGen/PowerPC/CSR-fit.ll
index e88f38178e65edd..0c63b029ff15639 100644
--- a/llvm/test/CodeGen/PowerPC/CSR-fit.ll
+++ b/llvm/test/CodeGen/PowerPC/CSR-fit.ll
@@ -21,8 +21,8 @@ define dso_local signext i32 @caller1(i32 signext %a, i32 signext %b) local_unna
 ; CHECK-PWR8-NEXT:    #APP
 ; CHECK-PWR8-NEXT:    add r3, r3, r4
 ; CHECK-PWR8-NEXT:    #NO_APP
-; CHECK-PWR8-NEXT:    std r0, 192(r1)
 ; CHECK-PWR8-NEXT:    extsw r3, r3
+; CHECK-PWR8-NEXT:    std r0, 192(r1)
 ; CHECK-PWR8-NEXT:    bl callee
 ; CHECK-PWR8-NEXT:    nop
 ; CHECK-PWR8-NEXT:    addi r1, r1, 176
@@ -75,8 +75,8 @@ define dso_local signext i32 @caller2(i32 signext %a, i32 signext %b) local_unna
 ; CHECK-PWR8-NEXT:    #APP
 ; CHECK-PWR8-NEXT:    add r3, r3, r4
 ; CHECK-PWR8-NEXT:    #NO_APP
-; CHECK-PWR8-NEXT:    std r0, 192(r1)
 ; CHECK-PWR8-NEXT:    extsw r3, r3
+; CHECK-PWR8-NEXT:    std r0, 192(r1)
 ; CHECK-PWR8-NEXT:    bl callee
 ; CHECK-PWR8-NEXT:    nop
 ; CHECK-PWR8-NEXT:    addi r1, r1, 176

diff  --git a/llvm/test/CodeGen/PowerPC/CompareEliminationSpillIssue.ll b/llvm/test/CodeGen/PowerPC/CompareEliminationSpillIssue.ll
index ac2ae51de62e492..480827b15d64640 100644
--- a/llvm/test/CodeGen/PowerPC/CompareEliminationSpillIssue.ll
+++ b/llvm/test/CodeGen/PowerPC/CompareEliminationSpillIssue.ll
@@ -42,10 +42,10 @@ entry:
 ; CHECK: extsw r3,
 ; CHECK: bl call
 ; CHECK: sub r3,
-; CHECK: rldicl r3, r3, 1, 63
 ; CHECK: std r3, [[OFF:[0-9]+]](r1)
 ; CHECK: #APP
 ; CHECK: ld r3, [[OFF]](r1)
+; CHECK: rldicl r3, r3, 1, 63
 ; CHECK: xori r3, r3, 1
 ; CHECK: blr
 

diff  --git a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
index 5c0f721eed98fc7..7a3ef75f0aae145 100644
--- a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
+++ b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
@@ -22,15 +22,15 @@ define dso_local signext i32 @test_32byte_vector() nounwind {
 ; CHECK-LE-NEXT:    subfic r0, r0, -96
 ; CHECK-LE-NEXT:    stdux r1, r1, r0
 ; CHECK-LE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-NEXT:    addis r4, r2, .LCPI0_1 at toc@ha
 ; CHECK-LE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-NEXT:    addi r4, r4, .LCPI0_1 at toc@l
 ; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-LE-NEXT:    addi r4, r1, 48
+; CHECK-LE-NEXT:    addi r3, r1, 48
+; CHECK-LE-NEXT:    stxvd2x vs0, 0, r3
+; CHECK-LE-NEXT:    addis r3, r2, .LCPI0_1 at toc@ha
+; CHECK-LE-NEXT:    addi r3, r3, .LCPI0_1 at toc@l
+; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    addi r3, r1, 32
-; CHECK-LE-NEXT:    stxvd2x vs0, 0, r4
-; CHECK-LE-NEXT:    stxvd2x vs1, 0, r3
+; CHECK-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    bl test
 ; CHECK-LE-NEXT:    nop
 ; CHECK-LE-NEXT:    lwa r3, 32(r1)
@@ -158,25 +158,24 @@ define dso_local void @test_Array() nounwind {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mflr r0
 ; CHECK-LE-NEXT:    stdu r1, -176(r1)
-; CHECK-LE-NEXT:    addis r4, r2, Arr1 at toc@ha
 ; CHECK-LE-NEXT:    li r3, 0
-; CHECK-LE-NEXT:    std r0, 192(r1)
+; CHECK-LE-NEXT:    addis r4, r2, Arr1 at toc@ha
 ; CHECK-LE-NEXT:    li r6, 65
+; CHECK-LE-NEXT:    std r0, 192(r1)
 ; CHECK-LE-NEXT:    addi r5, r1, 46
-; CHECK-LE-NEXT:    addi r4, r4, Arr1 at toc@l
 ; CHECK-LE-NEXT:    stw r3, 44(r1)
-; CHECK-LE-NEXT:    addi r4, r4, -1
+; CHECK-LE-NEXT:    addi r4, r4, Arr1 at toc@l
 ; CHECK-LE-NEXT:    mtctr r6
+; CHECK-LE-NEXT:    addi r4, r4, -1
 ; CHECK-LE-NEXT:    bdz .LBB2_2
 ; CHECK-LE-NEXT:    .p2align 5
 ; CHECK-LE-NEXT:  .LBB2_1: # %for.body
 ; CHECK-LE-NEXT:    #
 ; CHECK-LE-NEXT:    lbz r6, 1(r4)
-; CHECK-LE-NEXT:    addi r7, r5, 2
 ; CHECK-LE-NEXT:    addi r4, r4, 1
 ; CHECK-LE-NEXT:    addi r3, r3, 1
 ; CHECK-LE-NEXT:    sth r6, 2(r5)
-; CHECK-LE-NEXT:    mr r5, r7
+; CHECK-LE-NEXT:    addi r5, r5, 2
 ; CHECK-LE-NEXT:    bdnz .LBB2_1
 ; CHECK-LE-NEXT:  .LBB2_2: # %for.cond.cleanup
 ; CHECK-LE-NEXT:    addi r3, r1, 48

diff  --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
index 924a2dec8b6996c..b4567f9452a66e0 100644
--- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
+++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -19,8 +19,8 @@ define signext i32 @main() nounwind {
 ; CHECK-NEXT:    addi 6, 1, 46
 ; CHECK-NEXT:    sth 3, 46(1)
 ; CHECK-NEXT:    lis 3, 0
-; CHECK-NEXT:    ori 3, 3, 33059
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    ori 3, 3, 33059
 ; CHECK-NEXT:  .LBB0_1: # %L.entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 5, 0, 6

diff  --git a/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
index 6409023adfc53c7..25c91fa076f7401 100644
--- a/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
+++ b/llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll
@@ -15,7 +15,6 @@ define void @testExpandPostRAPseudo(ptr nocapture readonly %ptr) {
 ; CHECK-P8:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8:    xxspltw vs0, vs0, 1
 ; CHECK-P8;    stxvd2x vs0, 0, r4
-; CHECK-P8:    lis r4, 1024
 ; CHECK-P8:    lfiwax f0, 0, r3
 ; CHECK-P8:    addis r3, r2, .LC1 at toc@ha
 ; CHECK-P8:    ld r3, .LC1 at toc@l(r3)

diff  --git a/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll b/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll
index 122ceb7b7d6833f..0173a88be7eecb5 100644
--- a/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll
@@ -86,12 +86,12 @@ entry:
 ; EXTABI:         bb.0.entry:
 ; EXTABI:         liveins: $f1, $x4
 ; EXTABI-DAG:     renamable $f0 = LFD 0, renamable $x4 :: (volatile load (s64) from %ir.b, align 4)
-; EXTABI-DAG:     renamable $f0 = nofpexcept XSADDDP killed renamable $f0, renamable $f1, implicit $rm
-; EXTABI-DAG:     renamable $vf31 = nofpexcept XSMULDP killed renamable $f1, renamable $f1, implicit $rm
+; EXTABI-DAG:     renamable $f0 = nofpexcept XSADDDP killed renamable $f0, $f1, implicit $rm
 ; EXTABI:         STFD killed renamable $f0, 0, renamable $x4 :: (volatile store (s64) into %ir.b, align 4)
 ; EXTABI-LABEL:   INLINEASM
-; EXTABI-DAG:     renamable $f0 = LFD 0, renamable $x4 :: (volatile load (s64) from %ir.b, align 4)
-; EXTABI-DAG:     renamable $f0 = nofpexcept XSADDDP killed renamable $vf31, killed renamable $f0, implicit $rm
+; EXTABI-DAG:     renamable $f0 = nofpexcept XSMULDP killed renamable $vf31, renamable $vf31, implicit $rm
+; EXTABI-DAG:     renamable $f1 = LFD 0, renamable $x4 :: (volatile load (s64) from %ir.b, align 4)
+; EXTABI-DAG:     renamable $f0 = nofpexcept XSADDDP killed renamable $f0, killed renamable $f1, implicit $rm
 ; EXTABI-DAG:     STFD killed renamable $f0, 0, renamable $x4 :: (volatile store (s64) into %ir.b, align 4)
 ; EXTABI:         renamable $f1 = LFD 0, killed renamable $x4 :: (volatile load (s64) from %ir.b, align 4)
 
@@ -144,12 +144,12 @@ entry:
 ; EXTABI:         body:             |
 ; EXTABI-DAG:     bb.0.entry:
 ; EXTABI-DAG:     liveins: $v2, $x3
-; EXTABI-DAG:     renamable $v3 = LXVW4X $zero8, renamable $x3 :: (volatile load (s128) from %ir.b, align 4)
 ; EXTABI-DAG:     renamable $v31 = COPY $v2
-; EXTABI-DAG:     renamable $v2 = VADDUWM killed renamable $v3, $v2
-; EXTABI-LABEL:   INLINEASM    
 ; EXTABI-DAG:     renamable $v2 = LXVW4X $zero8, renamable $x3 :: (volatile load (s128) from %ir.b, align 4)
-; EXTABI-DAG:     renamable $v3 = VMULUWM killed renamable $v31, renamable $v31
-; EXTABI-DAG:     renamable $v2 = VADDUWM killed renamable $v3, killed renamable $v2
+; EXTABI-DAG:     renamable $v2 = VADDUWM killed renamable $v2, renamable $v31
+; EXTABI-LABEL:   INLINEASM
+; EXTABI-DAG:     renamable $v2 = VMULUWM killed renamable $v31, renamable $v31
+; EXTABI-DAG:     renamable $v3 = LXVW4X $zero8, renamable $x3 :: (volatile load (s128) from %ir.b, align 4)
+; EXTABI-DAG:     renamable $v2 = VADDUWM killed renamable $v2, killed renamable $v3
 ; EXTABI-DAG:     STXVW4X killed renamable $v2, $zero8, renamable $x3 :: (volatile store (s128) into %ir.b, align 4)
 ; EXTABI:         renamable $v2 = LXVW4X $zero8, killed renamable $x3 :: (volatile load (s128) from %ir.b, align 4)

diff  --git a/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll b/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
index 249bd34cabbb076..be091b0777293f2 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll
@@ -36,20 +36,20 @@ define void @test_aix_splatimm(i32 %arg, i32 %arg1, i32 %arg2) {
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    bclr 12, 20, 0
 ; CHECK-NEXT:  # %bb.1: # %bb3
+; CHECK-NEXT:    slwi 3, 3, 8
 ; CHECK-NEXT:    srwi 4, 4, 16
+; CHECK-NEXT:    neg 3, 3
 ; CHECK-NEXT:    srwi 5, 5, 16
 ; CHECK-NEXT:    mullw 4, 5, 4
 ; CHECK-NEXT:    lwz 5, 0(3)
-; CHECK-NEXT:    slwi 3, 3, 8
-; CHECK-NEXT:    neg 3, 3
-; CHECK-NEXT:    srwi 5, 5, 1
 ; CHECK-NEXT:    mtvsrd 34, 3
 ; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    srwi 5, 5, 1
 ; CHECK-NEXT:    mullw 4, 4, 5
-; CHECK-NEXT:    vsplth 2, 2, 3
-; CHECK-NEXT:    stxvd2x 34, 0, 3
 ; CHECK-NEXT:    neg 4, 4
 ; CHECK-NEXT:    mtvsrd 35, 4
+; CHECK-NEXT:    vsplth 2, 2, 3
+; CHECK-NEXT:    stxvd2x 34, 0, 3
 ; CHECK-NEXT:    vsplth 3, 3, 3
 ; CHECK-NEXT:    stxvd2x 35, 0, 3
 bb:

diff  --git a/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
index dca6f8b0c1a428a..19e298a633e0bd2 100644
--- a/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
@@ -25,9 +25,9 @@ entry:
 define <8 x i16> @builds(i16 zeroext %a) {
 ; CHECK-LABEL: builds:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi 4, 1, -16
 ; CHECK-NEXT:    sth 3, -16(1)
-; CHECK-NEXT:    lxvw4x 34, 0, 4
+; CHECK-NEXT:    addi 3, 1, -16
+; CHECK-NEXT:    lxvw4x 34, 0, 3
 ; CHECK-NEXT:    vsplth 2, 2, 0
 ; CHECK-NEXT:    blr
 entry:
@@ -40,9 +40,9 @@ entry:
 define <4 x i32> @buildi(i32 zeroext %a) {
 ; CHECK-LABEL: buildi:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi 4, 1, -16
 ; CHECK-NEXT:    stw 3, -16(1)
-; CHECK-NEXT:    lxvw4x 0, 0, 4
+; CHECK-NEXT:    addi 3, 1, -16
+; CHECK-NEXT:    lxvw4x 0, 0, 3
 ; CHECK-NEXT:    xxspltw 34, 0, 0
 ; CHECK-NEXT:    blr
 entry:
@@ -55,14 +55,14 @@ entry:
 define <2 x i64> @buildl(i64 %a) {
 ; CHECK-LABEL: buildl:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lwz 5, L..C0(2) # %const.0
-; CHECK-NEXT:    stw 4, -16(1)
 ; CHECK-NEXT:    stw 3, -32(1)
+; CHECK-NEXT:    lwz 3, L..C0(2) # %const.0
+; CHECK-NEXT:    stw 4, -16(1)
+; CHECK-NEXT:    lxvw4x 34, 0, 3
 ; CHECK-NEXT:    addi 3, 1, -16
-; CHECK-NEXT:    addi 4, 1, -32
 ; CHECK-NEXT:    lxvw4x 35, 0, 3
-; CHECK-NEXT:    lxvw4x 36, 0, 4
-; CHECK-NEXT:    lxvw4x 34, 0, 5
+; CHECK-NEXT:    addi 3, 1, -32
+; CHECK-NEXT:    lxvw4x 36, 0, 3
 ; CHECK-NEXT:    vperm 2, 4, 3, 2
 ; CHECK-NEXT:    blr
 entry:
@@ -990,11 +990,11 @@ entry:
 define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
 ; CHECK-LABEL: getvelsl:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    add 3, 3, 3
+; CHECK-NEXT:    add 5, 3, 3
 ; CHECK-NEXT:    addi 4, 1, -16
-; CHECK-NEXT:    addi 5, 3, 1
+; CHECK-NEXT:    rlwinm 3, 5, 2, 28, 29
+; CHECK-NEXT:    addi 5, 5, 1
 ; CHECK-NEXT:    stxvw4x 34, 0, 4
-; CHECK-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-NEXT:    rlwinm 5, 5, 2, 28, 29
 ; CHECK-NEXT:    lwzx 3, 4, 3
 ; CHECK-NEXT:    lwzx 4, 4, 5
@@ -1008,11 +1008,11 @@ entry:
 define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
 ; CHECK-LABEL: getvelul:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    add 3, 3, 3
+; CHECK-NEXT:    add 5, 3, 3
 ; CHECK-NEXT:    addi 4, 1, -16
-; CHECK-NEXT:    addi 5, 3, 1
+; CHECK-NEXT:    rlwinm 3, 5, 2, 28, 29
+; CHECK-NEXT:    addi 5, 5, 1
 ; CHECK-NEXT:    stxvw4x 34, 0, 4
-; CHECK-NEXT:    rlwinm 3, 3, 2, 28, 29
 ; CHECK-NEXT:    rlwinm 5, 5, 2, 28, 29
 ; CHECK-NEXT:    lwzx 3, 4, 3
 ; CHECK-NEXT:    lwzx 4, 4, 5

diff  --git a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
index c86727167c2dd58..99262694b353773 100644
--- a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
+++ b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
@@ -32,17 +32,17 @@ define void @test_f2(ptr %P, ptr %Q, ptr %S) {
 ; AIX-P8-32:       # %bb.0:
 ; AIX-P8-32-NEXT:    li r6, 4
 ; AIX-P8-32-NEXT:    lxsiwzx v3, 0, r3
-; AIX-P8-32-NEXT:    lxsiwzx v5, 0, r4
+; AIX-P8-32-NEXT:    lxsiwzx v4, 0, r4
 ; AIX-P8-32-NEXT:    lxsiwzx v2, r3, r6
-; AIX-P8-32-NEXT:    lxsiwzx v4, r4, r6
 ; AIX-P8-32-NEXT:    vmrgow v2, v3, v2
-; AIX-P8-32-NEXT:    vmrgow v3, v5, v4
+; AIX-P8-32-NEXT:    lxsiwzx v3, r4, r6
+; AIX-P8-32-NEXT:    vmrgow v3, v4, v3
 ; AIX-P8-32-NEXT:    xvaddsp vs0, v2, v3
 ; AIX-P8-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-P8-32-NEXT:    xscvspdpn f0, vs0
-; AIX-P8-32-NEXT:    xscvspdpn f1, vs1
 ; AIX-P8-32-NEXT:    stfs f0, 0(r5)
-; AIX-P8-32-NEXT:    stfs f1, 4(r5)
+; AIX-P8-32-NEXT:    xscvspdpn f0, vs1
+; AIX-P8-32-NEXT:    stfs f0, 4(r5)
 ; AIX-P8-32-NEXT:    blr
 ;
 ; AIX-P9-64-LABEL: test_f2:

diff  --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 9e8b3a625841d4e..cfd395e0317116d 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -22,12 +22,12 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addis 3, 2, sc at toc@ha
 ; CHECK-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 27, -40(1) # 8-byte Folded Spill
+; CHECK-NEXT:    addi 4, 3, sc at toc@l
 ; CHECK-NEXT:    std 28, -32(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
-; CHECK-NEXT:    addi 4, 3, sc at toc@l
-; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:  .LBB0_1: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 5, 0, 4
@@ -37,8 +37,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.2: # %entry
 ; CHECK-NEXT:    addis 5, 2, uc at toc@ha
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    addi 5, 5, uc at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 5, 5, uc at toc@l
 ; CHECK-NEXT:  .LBB0_3: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 6, 0, 5
@@ -48,8 +48,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.4: # %entry
 ; CHECK-NEXT:    addis 6, 2, ss at toc@ha
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    addi 6, 6, ss at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 6, 6, ss at toc@l
 ; CHECK-NEXT:  .LBB0_5: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 7, 0, 6
@@ -59,8 +59,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.6: # %entry
 ; CHECK-NEXT:    addis 7, 2, us at toc@ha
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    addi 8, 7, us at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 8, 7, us at toc@l
 ; CHECK-NEXT:  .LBB0_7: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 7, 0, 8
@@ -70,8 +70,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.8: # %entry
 ; CHECK-NEXT:    addis 7, 2, si at toc@ha
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    addi 9, 7, si at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 9, 7, si at toc@l
 ; CHECK-NEXT:  .LBB0_9: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 7, 0, 9
@@ -81,8 +81,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.10: # %entry
 ; CHECK-NEXT:    addis 7, 2, ui at toc@ha
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    addi 10, 7, ui at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 10, 7, ui at toc@l
 ; CHECK-NEXT:  .LBB0_11: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 7, 0, 10
@@ -92,9 +92,9 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.12: # %entry
 ; CHECK-NEXT:    addis 7, 2, sll at toc@ha
 ; CHECK-NEXT:    lwsync
+; CHECK-NEXT:    sync
 ; CHECK-NEXT:    addi 11, 7, sll at toc@l
 ; CHECK-NEXT:    li 7, 1
-; CHECK-NEXT:    sync
 ; CHECK-NEXT:  .LBB0_13: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 12, 0, 11
@@ -104,8 +104,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.14: # %entry
 ; CHECK-NEXT:    addis 12, 2, ull at toc@ha
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    addi 12, 12, ull at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 12, 12, ull at toc@l
 ; CHECK-NEXT:  .LBB0_15: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 30, 0, 12
@@ -329,8 +329,8 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; CHECK-NEXT:    stdcx. 0, 0, 12
 ; CHECK-NEXT:    bne 0, .LBB0_63
 ; CHECK-NEXT:  # %bb.64: # %entry
-; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    addis 30, 2, u128 at toc@ha
+; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    addi 0, 30, u128 at toc@l
 ; CHECK-NEXT:    li 30, 0
@@ -342,10 +342,10 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; CHECK-NEXT:    stqcx. 26, 0, 0
 ; CHECK-NEXT:    bne 0, .LBB0_65
 ; CHECK-NEXT:  # %bb.66: # %entry
-; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    addis 29, 2, s128 at toc@ha
-; CHECK-NEXT:    sync
+; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    addi 0, 29, s128 at toc@l
+; CHECK-NEXT:    sync
 ; CHECK-NEXT:  .LBB0_67: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lqarx 28, 0, 0
@@ -1237,9 +1237,9 @@ define dso_local void @test_fetch_and_op() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addis 4, 2, sc at toc@ha
 ; CHECK-NEXT:    std 22, -80(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 23, -72(1) # 8-byte Folded Spill
+; CHECK-NEXT:    li 3, 11
 ; CHECK-NEXT:    std 24, -64(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 25, -56(1) # 8-byte Folded Spill
-; CHECK-NEXT:    li 3, 11
 ; CHECK-NEXT:    addi 6, 4, sc at toc@l
 ; CHECK-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 27, -40(1) # 8-byte Folded Spill
@@ -1257,8 +1257,8 @@ define dso_local void @test_fetch_and_op() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stb 5, sc at toc@l(4)
 ; CHECK-NEXT:    addis 5, 2, uc at toc@ha
-; CHECK-NEXT:    addi 8, 5, uc at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 8, 5, uc at toc@l
 ; CHECK-NEXT:  .LBB1_3: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 7, 0, 8
@@ -1269,8 +1269,8 @@ define dso_local void @test_fetch_and_op() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stb 7, uc at toc@l(5)
 ; CHECK-NEXT:    addis 7, 2, ss at toc@ha
-; CHECK-NEXT:    addi 10, 7, ss at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 10, 7, ss at toc@l
 ; CHECK-NEXT:  .LBB1_5: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 9, 0, 10
@@ -1281,8 +1281,8 @@ define dso_local void @test_fetch_and_op() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 9, ss at toc@l(7)
 ; CHECK-NEXT:    addis 9, 2, us at toc@ha
-; CHECK-NEXT:    addi 0, 9, us at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 0, 9, us at toc@l
 ; CHECK-NEXT:  .LBB1_7: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 11, 0, 0
@@ -1293,8 +1293,8 @@ define dso_local void @test_fetch_and_op() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addis 12, 2, si at toc@ha
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 11, us at toc@l(9)
-; CHECK-NEXT:    addi 29, 12, si at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 29, 12, si at toc@l
 ; CHECK-NEXT:  .LBB1_9: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 11, 0, 29
@@ -1305,8 +1305,8 @@ define dso_local void @test_fetch_and_op() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addis 30, 2, ui at toc@ha
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stw 11, si at toc@l(12)
-; CHECK-NEXT:    addi 27, 30, ui at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 27, 30, ui at toc@l
 ; CHECK-NEXT:  .LBB1_11: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 11, 0, 27
@@ -1318,8 +1318,8 @@ define dso_local void @test_fetch_and_op() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stw 11, ui at toc@l(30)
 ; CHECK-NEXT:    li 11, 11
-; CHECK-NEXT:    addi 25, 28, sll at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 25, 28, sll at toc@l
 ; CHECK-NEXT:  .LBB1_13: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 26, 0, 25
@@ -1330,8 +1330,8 @@ define dso_local void @test_fetch_and_op() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    std 26, sll at toc@l(28)
 ; CHECK-NEXT:    addis 26, 2, ull at toc@ha
-; CHECK-NEXT:    addi 24, 26, ull at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 24, 26, ull at toc@l
 ; CHECK-NEXT:  .LBB1_15: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 23, 0, 24
@@ -2591,11 +2591,11 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
 ; CHECK-NEXT:    std 17, -120(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 18, -112(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 19, -104(1) # 8-byte Folded Spill
-; CHECK-NEXT:    addi 4, 3, uc at toc@l
-; CHECK-NEXT:    addi 7, 5, sc at toc@l
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    std 20, -96(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 21, -88(1) # 8-byte Folded Spill
+; CHECK-NEXT:    addi 4, 3, uc at toc@l
+; CHECK-NEXT:    addi 7, 5, sc at toc@l
 ; CHECK-NEXT:    std 22, -80(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 24, -64(1) # 8-byte Folded Spill
@@ -2628,8 +2628,8 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stb 8, uc at toc@l(3)
 ; CHECK-NEXT:    clrlwi 8, 8, 24
-; CHECK-NEXT:    addi 9, 6, ss at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 9, 6, ss at toc@l
 ; CHECK-NEXT:  .LBB2_5: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 10, 0, 9
@@ -2638,11 +2638,11 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
 ; CHECK-NEXT:    bne 0, .LBB2_5
 ; CHECK-NEXT:  # %bb.6: # %entry
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    sth 10, ss at toc@l(6)
 ; CHECK-NEXT:    addis 8, 2, us at toc@ha
+; CHECK-NEXT:    sth 10, ss at toc@l(6)
 ; CHECK-NEXT:    lbz 10, uc at toc@l(3)
-; CHECK-NEXT:    addi 11, 8, us at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 11, 8, us at toc@l
 ; CHECK-NEXT:  .LBB2_7: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 12, 0, 11
@@ -2651,11 +2651,11 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
 ; CHECK-NEXT:    bne 0, .LBB2_7
 ; CHECK-NEXT:  # %bb.8: # %entry
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    sth 12, us at toc@l(8)
 ; CHECK-NEXT:    addis 10, 2, si at toc@ha
+; CHECK-NEXT:    sth 12, us at toc@l(8)
 ; CHECK-NEXT:    lbz 12, uc at toc@l(3)
-; CHECK-NEXT:    addi 0, 10, si at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 0, 10, si at toc@l
 ; CHECK-NEXT:  .LBB2_9: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 30, 0, 0
@@ -2664,11 +2664,11 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
 ; CHECK-NEXT:    bne 0, .LBB2_9
 ; CHECK-NEXT:  # %bb.10: # %entry
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    stw 30, si at toc@l(10)
 ; CHECK-NEXT:    addis 12, 2, ui at toc@ha
+; CHECK-NEXT:    stw 30, si at toc@l(10)
 ; CHECK-NEXT:    lbz 30, uc at toc@l(3)
-; CHECK-NEXT:    addi 29, 12, ui at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 29, 12, ui at toc@l
 ; CHECK-NEXT:  .LBB2_11: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 28, 0, 29
@@ -2677,11 +2677,11 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
 ; CHECK-NEXT:    bne 0, .LBB2_11
 ; CHECK-NEXT:  # %bb.12: # %entry
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    stw 28, ui at toc@l(12)
 ; CHECK-NEXT:    addis 30, 2, sll at toc@ha
+; CHECK-NEXT:    stw 28, ui at toc@l(12)
 ; CHECK-NEXT:    lbz 28, uc at toc@l(3)
-; CHECK-NEXT:    addi 27, 30, sll at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 27, 30, sll at toc@l
 ; CHECK-NEXT:  .LBB2_13: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 26, 0, 27
@@ -2693,8 +2693,8 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addis 28, 2, ull at toc@ha
 ; CHECK-NEXT:    std 26, sll at toc@l(30)
 ; CHECK-NEXT:    lbz 25, uc at toc@l(3)
-; CHECK-NEXT:    addi 26, 28, ull at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 26, 28, ull at toc@l
 ; CHECK-NEXT:  .LBB2_15: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 24, 0, 26
@@ -3070,26 +3070,26 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
 ; CHECK-NEXT:    bne 0, .LBB2_81
 ; CHECK-NEXT:  # %bb.82: # %entry
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    lbz 20, uc at toc@l(3)
 ; CHECK-NEXT:    nand 25, 21, 25
 ; CHECK-NEXT:    li 21, -1
-; CHECK-NEXT:    std 21, 8(23)
 ; CHECK-NEXT:    std 25, u128 at toc@l(24)
-; CHECK-NEXT:    addis 25, 2, s128 at toc@ha
+; CHECK-NEXT:    addis 24, 2, s128 at toc@ha
+; CHECK-NEXT:    lbz 25, uc at toc@l(3)
+; CHECK-NEXT:    std 21, 8(23)
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    addi 24, 25, s128 at toc@l
+; CHECK-NEXT:    addi 23, 24, s128 at toc@l
 ; CHECK-NEXT:  .LBB2_83: # %entry
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lqarx 18, 0, 24
-; CHECK-NEXT:    nand 17, 20, 19
+; CHECK-NEXT:    lqarx 18, 0, 23
+; CHECK-NEXT:    nand 17, 25, 19
 ; CHECK-NEXT:    nand 16, 22, 18
-; CHECK-NEXT:    stqcx. 16, 0, 24
+; CHECK-NEXT:    stqcx. 16, 0, 23
 ; CHECK-NEXT:    bne 0, .LBB2_83
 ; CHECK-NEXT:  # %bb.84: # %entry
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    std 21, 8(24)
-; CHECK-NEXT:    nand 23, 19, 20
-; CHECK-NEXT:    std 23, s128 at toc@l(25)
+; CHECK-NEXT:    std 21, 8(23)
+; CHECK-NEXT:    nand 25, 19, 25
+; CHECK-NEXT:    std 25, s128 at toc@l(24)
 ; CHECK-NEXT:    lbz 25, uc at toc@l(3)
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:  .LBB2_85: # %entry
@@ -4358,10 +4358,10 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    std 28, -32(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    addi 6, 3, uc at toc@l
-; CHECK-NEXT:    addi 0, 4, sc at toc@l
 ; CHECK-NEXT:    lbz 5, uc at toc@l(3)
 ; CHECK-NEXT:    lbz 8, sc at toc@l(4)
+; CHECK-NEXT:    addi 6, 3, uc at toc@l
+; CHECK-NEXT:    addi 0, 4, sc at toc@l
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:  .LBB3_1: # %entry
 ; CHECK-NEXT:    #
@@ -4388,12 +4388,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    bne 0, .LBB3_4
 ; CHECK-NEXT:  .LBB3_6: # %entry
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    addis 7, 2, ss at toc@ha
 ; CHECK-NEXT:    stb 5, uc at toc@l(3)
-; CHECK-NEXT:    lbz 8, sc at toc@l(4)
-; CHECK-NEXT:    addi 12, 7, ss at toc@l
+; CHECK-NEXT:    lbz 7, sc at toc@l(4)
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    extsb 8, 8
+; CHECK-NEXT:    extsb 8, 7
+; CHECK-NEXT:    addis 7, 2, ss at toc@ha
+; CHECK-NEXT:    addi 12, 7, ss at toc@l
 ; CHECK-NEXT:  .LBB3_7: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 9, 0, 12
@@ -4406,12 +4406,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_9: # %entry
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 9, ss at toc@l(7)
-; CHECK-NEXT:    addis 7, 2, us at toc@ha
-; CHECK-NEXT:    lbz 8, sc at toc@l(4)
+; CHECK-NEXT:    lbz 7, sc at toc@l(4)
 ; CHECK-NEXT:    lbz 5, uc at toc@l(3)
-; CHECK-NEXT:    addi 11, 7, us at toc@l
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    extsb 8, 8
+; CHECK-NEXT:    extsb 8, 7
+; CHECK-NEXT:    addis 7, 2, us at toc@ha
+; CHECK-NEXT:    addi 11, 7, us at toc@l
 ; CHECK-NEXT:  .LBB3_10: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 9, 0, 11
@@ -4424,12 +4424,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_12: # %entry
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 9, us at toc@l(7)
-; CHECK-NEXT:    addis 7, 2, si at toc@ha
-; CHECK-NEXT:    lbz 8, sc at toc@l(4)
+; CHECK-NEXT:    lbz 7, sc at toc@l(4)
 ; CHECK-NEXT:    lbz 5, uc at toc@l(3)
-; CHECK-NEXT:    addi 10, 7, si at toc@l
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    extsb 8, 8
+; CHECK-NEXT:    extsb 8, 7
+; CHECK-NEXT:    addis 7, 2, si at toc@ha
+; CHECK-NEXT:    addi 10, 7, si at toc@l
 ; CHECK-NEXT:  .LBB3_13: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 9, 0, 10
@@ -4442,12 +4442,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_15: # %entry
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stw 9, si at toc@l(7)
-; CHECK-NEXT:    addis 5, 2, ui at toc@ha
-; CHECK-NEXT:    lbz 8, sc at toc@l(4)
+; CHECK-NEXT:    lbz 5, sc at toc@l(4)
 ; CHECK-NEXT:    lbz 7, uc at toc@l(3)
-; CHECK-NEXT:    addi 9, 5, ui at toc@l
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    extsb 8, 8
+; CHECK-NEXT:    extsb 8, 5
+; CHECK-NEXT:    addis 5, 2, ui at toc@ha
+; CHECK-NEXT:    addi 9, 5, ui at toc@l
 ; CHECK-NEXT:  .LBB3_16: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 30, 0, 9
@@ -4477,8 +4477,8 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    bne 0, .LBB3_19
 ; CHECK-NEXT:  .LBB3_21: # %entry
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    std 28, sll at toc@l(30)
 ; CHECK-NEXT:    addis 29, 2, ull at toc@ha
+; CHECK-NEXT:    std 28, sll at toc@l(30)
 ; CHECK-NEXT:    lbz 7, sc at toc@l(4)
 ; CHECK-NEXT:    lbz 30, uc at toc@l(3)
 ; CHECK-NEXT:    sync
@@ -4511,8 +4511,8 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_27: # %entry
 ; CHECK-NEXT:    xor 0, 28, 30
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    cntlzw 0, 0
 ; CHECK-NEXT:    lbz 30, sc at toc@l(4)
+; CHECK-NEXT:    cntlzw 0, 0
 ; CHECK-NEXT:    srwi 0, 0, 5
 ; CHECK-NEXT:    stw 0, ui at toc@l(5)
 ; CHECK-NEXT:    lbz 0, uc at toc@l(3)
@@ -4529,12 +4529,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_30: # %entry
 ; CHECK-NEXT:    xor 6, 29, 0
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    cntlzw 6, 6
 ; CHECK-NEXT:    lbz 0, sc at toc@l(4)
+; CHECK-NEXT:    cntlzw 6, 6
+; CHECK-NEXT:    extsb 0, 0
 ; CHECK-NEXT:    srwi 6, 6, 5
 ; CHECK-NEXT:    stw 6, ui at toc@l(5)
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
-; CHECK-NEXT:    extsb 0, 0
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:  .LBB3_31: # %entry
 ; CHECK-NEXT:    #
@@ -4548,12 +4548,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_33: # %entry
 ; CHECK-NEXT:    xor 6, 30, 6
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    cntlzw 6, 6
 ; CHECK-NEXT:    lbz 12, sc at toc@l(4)
+; CHECK-NEXT:    cntlzw 6, 6
+; CHECK-NEXT:    extsb 12, 12
 ; CHECK-NEXT:    srwi 6, 6, 5
 ; CHECK-NEXT:    stw 6, ui at toc@l(5)
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
-; CHECK-NEXT:    extsb 12, 12
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:  .LBB3_34: # %entry
 ; CHECK-NEXT:    #
@@ -4567,12 +4567,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_36: # %entry
 ; CHECK-NEXT:    xor 6, 0, 6
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    cntlzw 6, 6
 ; CHECK-NEXT:    lbz 11, sc at toc@l(4)
+; CHECK-NEXT:    cntlzw 6, 6
+; CHECK-NEXT:    extsb 11, 11
 ; CHECK-NEXT:    srwi 6, 6, 5
 ; CHECK-NEXT:    stw 6, ui at toc@l(5)
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
-; CHECK-NEXT:    extsb 11, 11
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:  .LBB3_37: # %entry
 ; CHECK-NEXT:    #
@@ -4586,12 +4586,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_39: # %entry
 ; CHECK-NEXT:    xor 6, 12, 6
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    cntlzw 6, 6
 ; CHECK-NEXT:    lbz 10, sc at toc@l(4)
+; CHECK-NEXT:    cntlzw 6, 6
+; CHECK-NEXT:    extsb 10, 10
 ; CHECK-NEXT:    srwi 6, 6, 5
 ; CHECK-NEXT:    stw 6, ui at toc@l(5)
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
-; CHECK-NEXT:    extsb 10, 10
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:  .LBB3_40: # %entry
 ; CHECK-NEXT:    #
@@ -4605,12 +4605,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_42: # %entry
 ; CHECK-NEXT:    xor 6, 11, 6
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    cntlzw 6, 6
 ; CHECK-NEXT:    lbz 9, sc at toc@l(4)
+; CHECK-NEXT:    cntlzw 6, 6
+; CHECK-NEXT:    extsb 9, 9
 ; CHECK-NEXT:    srwi 6, 6, 5
 ; CHECK-NEXT:    stw 6, ui at toc@l(5)
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
-; CHECK-NEXT:    extsb 9, 9
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:  .LBB3_43: # %entry
 ; CHECK-NEXT:    #
@@ -4624,12 +4624,12 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_45: # %entry
 ; CHECK-NEXT:    xor 6, 10, 6
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    cntlzd 6, 6
 ; CHECK-NEXT:    lbz 4, sc at toc@l(4)
 ; CHECK-NEXT:    lbz 3, uc at toc@l(3)
+; CHECK-NEXT:    cntlzd 6, 6
+; CHECK-NEXT:    extsb 4, 4
 ; CHECK-NEXT:    rldicl 6, 6, 58, 63
 ; CHECK-NEXT:    stw 6, ui at toc@l(5)
-; CHECK-NEXT:    extsb 4, 4
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:  .LBB3_46: # %entry
 ; CHECK-NEXT:    #
@@ -4643,9 +4643,9 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB3_48: # %entry
 ; CHECK-NEXT:    xor 3, 6, 3
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    cntlzd 3, 3
 ; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
+; CHECK-NEXT:    cntlzd 3, 3
 ; CHECK-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 27, -40(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    rldicl 3, 3, 58, 63
@@ -5169,8 +5169,8 @@ define dso_local void @test_lock() local_unnamed_addr #0 {
 ; CHECK-LABEL: test_lock:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, sc at toc@ha
-; CHECK-NEXT:    li 7, 1
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    li 7, 1
 ; CHECK-NEXT:    addi 4, 3, sc at toc@l
 ; CHECK-NEXT:  .LBB4_1: # %entry
 ; CHECK-NEXT:    #
@@ -5181,8 +5181,8 @@ define dso_local void @test_lock() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addis 4, 2, uc at toc@ha
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stb 5, sc at toc@l(3)
-; CHECK-NEXT:    addi 6, 4, uc at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 6, 4, uc at toc@l
 ; CHECK-NEXT:  .LBB4_3: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 5, 0, 6
@@ -5192,8 +5192,8 @@ define dso_local void @test_lock() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stb 5, uc at toc@l(4)
 ; CHECK-NEXT:    addis 5, 2, ss at toc@ha
-; CHECK-NEXT:    addi 8, 5, ss at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 8, 5, ss at toc@l
 ; CHECK-NEXT:  .LBB4_5: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 6, 0, 8
@@ -5203,8 +5203,8 @@ define dso_local void @test_lock() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 6, ss at toc@l(5)
 ; CHECK-NEXT:    addis 6, 2, us at toc@ha
-; CHECK-NEXT:    addi 9, 6, us at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 9, 6, us at toc@l
 ; CHECK-NEXT:  .LBB4_7: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 8, 0, 9
@@ -5214,8 +5214,8 @@ define dso_local void @test_lock() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 8, us at toc@l(6)
 ; CHECK-NEXT:    addis 8, 2, si at toc@ha
-; CHECK-NEXT:    addi 10, 8, si at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 10, 8, si at toc@l
 ; CHECK-NEXT:  .LBB4_9: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 9, 0, 10
@@ -5225,8 +5225,8 @@ define dso_local void @test_lock() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stw 9, si at toc@l(8)
 ; CHECK-NEXT:    addis 9, 2, ui at toc@ha
-; CHECK-NEXT:    addi 11, 9, ui at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 11, 9, ui at toc@l
 ; CHECK-NEXT:  .LBB4_11: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 10, 0, 11
@@ -5237,8 +5237,8 @@ define dso_local void @test_lock() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stw 10, ui at toc@l(9)
 ; CHECK-NEXT:    li 11, 1
-; CHECK-NEXT:    addi 10, 7, sll at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 10, 7, sll at toc@l
 ; CHECK-NEXT:  .LBB4_13: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 12, 0, 10
@@ -5248,8 +5248,8 @@ define dso_local void @test_lock() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addis 10, 2, ull at toc@ha
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    std 12, sll at toc@l(7)
-; CHECK-NEXT:    addi 0, 10, ull at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 0, 10, ull at toc@l
 ; CHECK-NEXT:  .LBB4_15: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 12, 0, 0
@@ -5506,8 +5506,8 @@ define dso_local void @test_atomic() local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB5_3: # %entry
 ; CHECK-NEXT:    stw 5, ui at toc@l(4)
 ; CHECK-NEXT:    addis 5, 2, si at toc@ha
-; CHECK-NEXT:    addi 7, 5, si at toc@l
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    addi 7, 5, si at toc@l
 ; CHECK-NEXT:  .LBB5_4: # %entry
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 8, 0, 7

diff  --git a/llvm/test/CodeGen/PowerPC/and-extend-combine.ll b/llvm/test/CodeGen/PowerPC/and-extend-combine.ll
index b05d0097154a556..301a28ac64d1999 100644
--- a/llvm/test/CodeGen/PowerPC/and-extend-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/and-extend-combine.ll
@@ -5,10 +5,10 @@ define dso_local ptr @foo(i32 noundef zeroext %arg, ptr nocapture noundef readon
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    rlwinm r3, r3, 31, 17, 28
-; CHECK-NEXT:    ldx r4, r4, r3
-; CHECK-NEXT:    clrldi r3, r4, 56
-; CHECK-NEXT:    add r3, r5, r3
-; CHECK-NEXT:    std r4, 0(r5)
+; CHECK-NEXT:    ldx r3, r4, r3
+; CHECK-NEXT:    clrldi r4, r3, 56
+; CHECK-NEXT:    std r3, 0(r5)
+; CHECK-NEXT:    add r3, r5, r4
 ; CHECK-NEXT:    blr
 bb:
   %i = lshr i32 %arg, 1

diff  --git a/llvm/test/CodeGen/PowerPC/asm-template-I.ll b/llvm/test/CodeGen/PowerPC/asm-template-I.ll
index 7c9f90d0a58f601..902928fa7b5c44a 100644
--- a/llvm/test/CodeGen/PowerPC/asm-template-I.ll
+++ b/llvm/test/CodeGen/PowerPC/asm-template-I.ll
@@ -6,12 +6,12 @@ define dso_local signext i32 @main(i32 signext %argc, ptr %argv) {
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    stw 3, -4(1)
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    addi 4, 1, -4
+; CHECK-NEXT:    addi 3, 1, -4
 ; CHECK-NEXT:    #APP
-; CHECK-NEXT:    .ascii "-1 at 0(4)"
+; CHECK-NEXT:    .ascii "-1 at 0(3)"
 ; CHECK-NEXT:    .byte 0
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    blr
 entry:
   call void asm sideeffect " .asciz \22${0:n}@${1:I}$1\22 ", "n,nZr"(i32 1, i32 %argc)

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
index 621e18f59a9a99d..98d00b0e01b4bc1 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
@@ -361,9 +361,9 @@ define dso_local i128 @lq_seqcst(ptr %src) {
 ; P8-NEXT:    lq r4, 0(r3)
 ; P8-NEXT:    cmpd cr7, r5, r5
 ; P8-NEXT:    mr r3, r4
-; P8-NEXT:    mr r4, r5
 ; P8-NEXT:    bne- cr7, .+4
 ; P8-NEXT:    isync
+; P8-NEXT:    mr r4, r5
 ; P8-NEXT:    blr
 ;
 ; PWR7-LABEL: lq_seqcst:
@@ -397,9 +397,9 @@ define dso_local i128 @lq_seqcst(ptr %src) {
 ; AIX64-PWR8-NEXT:    lq r4, 0(r3)
 ; AIX64-PWR8-NEXT:    cmpd cr7, r5, r5
 ; AIX64-PWR8-NEXT:    mr r3, r4
-; AIX64-PWR8-NEXT:    mr r4, r5
 ; AIX64-PWR8-NEXT:    bne- cr7, $+4
 ; AIX64-PWR8-NEXT:    isync
+; AIX64-PWR8-NEXT:    mr r4, r5
 ; AIX64-PWR8-NEXT:    blr
 ;
 ; PPC-PWR8-LABEL: lq_seqcst:
@@ -552,10 +552,10 @@ define dso_local void @stqx_unordered(i128 %val, ptr %dst, i64 %idx) {
 ; PPC-PWR8-NEXT:    stw r6, 28(r1)
 ; PPC-PWR8-NEXT:    stw r5, 24(r1)
 ; PPC-PWR8-NEXT:    addi r5, r1, 16
-; PPC-PWR8-NEXT:    add r6, r7, r8
 ; PPC-PWR8-NEXT:    stw r4, 20(r1)
 ; PPC-PWR8-NEXT:    stw r3, 16(r1)
 ; PPC-PWR8-NEXT:    li r3, 16
+; PPC-PWR8-NEXT:    add r6, r7, r8
 ; PPC-PWR8-NEXT:    mr r4, r6
 ; PPC-PWR8-NEXT:    li r6, 0
 ; PPC-PWR8-NEXT:    bl __atomic_store
@@ -572,11 +572,11 @@ entry:
 define dso_local void @stq_big_offset_unordered(i128 %val, ptr %dst) {
 ; P8-LABEL: stq_big_offset_unordered:
 ; P8:       # %bb.0: # %entry
-; P8-NEXT:    lis r6, 32
-; P8-NEXT:    mr r9, r4
-; P8-NEXT:    mr r8, r3
-; P8-NEXT:    add r3, r5, r6
-; P8-NEXT:    stq r8, 0(r3)
+; P8-NEXT:    mr r7, r4
+; P8-NEXT:    mr r6, r3
+; P8-NEXT:    lis r3, 32
+; P8-NEXT:    add r3, r5, r3
+; P8-NEXT:    stq r6, 0(r3)
 ; P8-NEXT:    blr
 ;
 ; PWR7-LABEL: stq_big_offset_unordered:
@@ -600,20 +600,20 @@ define dso_local void @stq_big_offset_unordered(i128 %val, ptr %dst) {
 ;
 ; LE-PWR8-LABEL: stq_big_offset_unordered:
 ; LE-PWR8:       # %bb.0: # %entry
-; LE-PWR8-NEXT:    lis r6, 32
-; LE-PWR8-NEXT:    mr r9, r3
-; LE-PWR8-NEXT:    mr r8, r4
-; LE-PWR8-NEXT:    add r3, r5, r6
-; LE-PWR8-NEXT:    stq r8, 0(r3)
+; LE-PWR8-NEXT:    mr r7, r3
+; LE-PWR8-NEXT:    mr r6, r4
+; LE-PWR8-NEXT:    lis r3, 32
+; LE-PWR8-NEXT:    add r3, r5, r3
+; LE-PWR8-NEXT:    stq r6, 0(r3)
 ; LE-PWR8-NEXT:    blr
 ;
 ; AIX64-PWR8-LABEL: stq_big_offset_unordered:
 ; AIX64-PWR8:       # %bb.0: # %entry
-; AIX64-PWR8-NEXT:    lis r6, 32
-; AIX64-PWR8-NEXT:    mr r9, r4
-; AIX64-PWR8-NEXT:    mr r8, r3
-; AIX64-PWR8-NEXT:    add r3, r5, r6
-; AIX64-PWR8-NEXT:    stq r8, 0(r3)
+; AIX64-PWR8-NEXT:    mr r7, r4
+; AIX64-PWR8-NEXT:    mr r6, r3
+; AIX64-PWR8-NEXT:    lis r3, 32
+; AIX64-PWR8-NEXT:    add r3, r5, r3
+; AIX64-PWR8-NEXT:    stq r6, 0(r3)
 ; AIX64-PWR8-NEXT:    blr
 ;
 ; PPC-PWR8-LABEL: stq_big_offset_unordered:

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
index 610e4966f708abb..66d727caed69fad 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
@@ -98,10 +98,10 @@ define i128 @swap(ptr %a, i128 %x) {
 ; PPC-PWR8-NEXT:    stw r7, 40(r1)
 ; PPC-PWR8-NEXT:    stw r6, 36(r1)
 ; PPC-PWR8-NEXT:    addi r6, r1, 16
-; PPC-PWR8-NEXT:    li r3, 16
-; PPC-PWR8-NEXT:    li r7, 5
 ; PPC-PWR8-NEXT:    stw r5, 32(r1)
 ; PPC-PWR8-NEXT:    addi r5, r1, 32
+; PPC-PWR8-NEXT:    li r3, 16
+; PPC-PWR8-NEXT:    li r7, 5
 ; PPC-PWR8-NEXT:    stw r8, 44(r1)
 ; PPC-PWR8-NEXT:    bl __atomic_exchange
 ; PPC-PWR8-NEXT:    lwz r6, 28(r1)
@@ -199,18 +199,18 @@ define i128 @add(ptr %a, i128 %x) {
 ; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r27, r5
 ; PPC-PWR8-NEXT:    mr r26, r3
-; PPC-PWR8-NEXT:    lwz r5, 8(r3)
-; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r28, r6
 ; PPC-PWR8-NEXT:    lwz r6, 12(r3)
-; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT:    addi r24, r1, 16
+; PPC-PWR8-NEXT:    lwz r5, 8(r3)
+; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r29, r7
-; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    addi r24, r1, 16
 ; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r30, r8
 ; PPC-PWR8-NEXT:    .p2align 4
@@ -223,23 +223,24 @@ define i128 @add(ptr %a, i128 %x) {
 ; PPC-PWR8-NEXT:    stw r5, 40(r1)
 ; PPC-PWR8-NEXT:    stw r6, 44(r1)
 ; PPC-PWR8-NEXT:    mr r5, r25
+; PPC-PWR8-NEXT:    mr r6, r24
 ; PPC-PWR8-NEXT:    adde r4, r4, r28
 ; PPC-PWR8-NEXT:    stw r7, 28(r1)
-; PPC-PWR8-NEXT:    stw r8, 24(r1)
-; PPC-PWR8-NEXT:    mr r6, r24
+; PPC-PWR8-NEXT:    li r7, 5
 ; PPC-PWR8-NEXT:    adde r3, r3, r27
+; PPC-PWR8-NEXT:    stw r8, 24(r1)
+; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    stw r4, 20(r1)
 ; PPC-PWR8-NEXT:    mr r4, r26
-; PPC-PWR8-NEXT:    li r7, 5
 ; PPC-PWR8-NEXT:    stw r3, 16(r1)
 ; PPC-PWR8-NEXT:    li r3, 16
-; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
-; PPC-PWR8-NEXT:    cmplwi r3, 0
+; PPC-PWR8-NEXT:    mr r7, r3
 ; PPC-PWR8-NEXT:    lwz r6, 44(r1)
 ; PPC-PWR8-NEXT:    lwz r5, 40(r1)
 ; PPC-PWR8-NEXT:    lwz r4, 36(r1)
 ; PPC-PWR8-NEXT:    lwz r3, 32(r1)
+; PPC-PWR8-NEXT:    cmplwi r7, 0
 ; PPC-PWR8-NEXT:    beq cr0, .LBB1_1
 ; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
 ; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
@@ -340,18 +341,18 @@ define i128 @sub(ptr %a, i128 %x) {
 ; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r27, r5
 ; PPC-PWR8-NEXT:    mr r26, r3
-; PPC-PWR8-NEXT:    lwz r5, 8(r3)
-; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r28, r6
 ; PPC-PWR8-NEXT:    lwz r6, 12(r3)
-; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT:    addi r24, r1, 16
+; PPC-PWR8-NEXT:    lwz r5, 8(r3)
+; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r29, r7
-; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    addi r24, r1, 16
 ; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r30, r8
 ; PPC-PWR8-NEXT:    .p2align 4
@@ -364,23 +365,24 @@ define i128 @sub(ptr %a, i128 %x) {
 ; PPC-PWR8-NEXT:    stw r5, 40(r1)
 ; PPC-PWR8-NEXT:    stw r6, 44(r1)
 ; PPC-PWR8-NEXT:    mr r5, r25
+; PPC-PWR8-NEXT:    mr r6, r24
 ; PPC-PWR8-NEXT:    subfe r4, r28, r4
 ; PPC-PWR8-NEXT:    stw r7, 28(r1)
-; PPC-PWR8-NEXT:    stw r8, 24(r1)
-; PPC-PWR8-NEXT:    mr r6, r24
+; PPC-PWR8-NEXT:    li r7, 5
 ; PPC-PWR8-NEXT:    subfe r3, r27, r3
+; PPC-PWR8-NEXT:    stw r8, 24(r1)
+; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    stw r4, 20(r1)
 ; PPC-PWR8-NEXT:    mr r4, r26
-; PPC-PWR8-NEXT:    li r7, 5
 ; PPC-PWR8-NEXT:    stw r3, 16(r1)
 ; PPC-PWR8-NEXT:    li r3, 16
-; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
-; PPC-PWR8-NEXT:    cmplwi r3, 0
+; PPC-PWR8-NEXT:    mr r7, r3
 ; PPC-PWR8-NEXT:    lwz r6, 44(r1)
 ; PPC-PWR8-NEXT:    lwz r5, 40(r1)
 ; PPC-PWR8-NEXT:    lwz r4, 36(r1)
 ; PPC-PWR8-NEXT:    lwz r3, 32(r1)
+; PPC-PWR8-NEXT:    cmplwi r7, 0
 ; PPC-PWR8-NEXT:    beq cr0, .LBB2_1
 ; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
 ; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
@@ -481,47 +483,48 @@ define i128 @and(ptr %a, i128 %x) {
 ; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r27, r5
 ; PPC-PWR8-NEXT:    mr r26, r3
-; PPC-PWR8-NEXT:    lwz r5, 8(r3)
-; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r28, r6
 ; PPC-PWR8-NEXT:    lwz r6, 12(r3)
-; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT:    addi r24, r1, 16
+; PPC-PWR8-NEXT:    lwz r5, 8(r3)
+; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r29, r7
-; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    addi r24, r1, 16
 ; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r30, r8
 ; PPC-PWR8-NEXT:    .p2align 4
 ; PPC-PWR8-NEXT:  .LBB3_1: # %atomicrmw.start
 ; PPC-PWR8-NEXT:    #
 ; PPC-PWR8-NEXT:    stw r3, 32(r1)
-; PPC-PWR8-NEXT:    stw r4, 36(r1)
-; PPC-PWR8-NEXT:    and r7, r5, r29
-; PPC-PWR8-NEXT:    and r8, r6, r30
 ; PPC-PWR8-NEXT:    and r3, r3, r27
+; PPC-PWR8-NEXT:    stw r4, 36(r1)
 ; PPC-PWR8-NEXT:    and r4, r4, r28
+; PPC-PWR8-NEXT:    and r7, r5, r29
 ; PPC-PWR8-NEXT:    stw r5, 40(r1)
+; PPC-PWR8-NEXT:    and r5, r6, r30
 ; PPC-PWR8-NEXT:    stw r6, 44(r1)
-; PPC-PWR8-NEXT:    mr r5, r25
-; PPC-PWR8-NEXT:    mr r6, r24
-; PPC-PWR8-NEXT:    stw r8, 28(r1)
+; PPC-PWR8-NEXT:    stw r5, 28(r1)
 ; PPC-PWR8-NEXT:    stw r7, 24(r1)
+; PPC-PWR8-NEXT:    mr r5, r25
 ; PPC-PWR8-NEXT:    li r7, 5
-; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    stw r4, 20(r1)
 ; PPC-PWR8-NEXT:    stw r3, 16(r1)
 ; PPC-PWR8-NEXT:    li r3, 16
 ; PPC-PWR8-NEXT:    mr r4, r26
+; PPC-PWR8-NEXT:    mr r6, r24
+; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
-; PPC-PWR8-NEXT:    cmplwi r3, 0
+; PPC-PWR8-NEXT:    mr r7, r3
 ; PPC-PWR8-NEXT:    lwz r6, 44(r1)
 ; PPC-PWR8-NEXT:    lwz r5, 40(r1)
 ; PPC-PWR8-NEXT:    lwz r4, 36(r1)
 ; PPC-PWR8-NEXT:    lwz r3, 32(r1)
+; PPC-PWR8-NEXT:    cmplwi r7, 0
 ; PPC-PWR8-NEXT:    beq cr0, .LBB3_1
 ; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
 ; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
@@ -622,47 +625,48 @@ define i128 @or(ptr %a, i128 %x) {
 ; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r27, r5
 ; PPC-PWR8-NEXT:    mr r26, r3
-; PPC-PWR8-NEXT:    lwz r5, 8(r3)
-; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r28, r6
 ; PPC-PWR8-NEXT:    lwz r6, 12(r3)
-; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT:    addi r24, r1, 16
+; PPC-PWR8-NEXT:    lwz r5, 8(r3)
+; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r29, r7
-; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    addi r24, r1, 16
 ; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r30, r8
 ; PPC-PWR8-NEXT:    .p2align 4
 ; PPC-PWR8-NEXT:  .LBB4_1: # %atomicrmw.start
 ; PPC-PWR8-NEXT:    #
 ; PPC-PWR8-NEXT:    stw r3, 32(r1)
-; PPC-PWR8-NEXT:    stw r4, 36(r1)
-; PPC-PWR8-NEXT:    or r7, r5, r29
-; PPC-PWR8-NEXT:    or r8, r6, r30
 ; PPC-PWR8-NEXT:    or r3, r3, r27
+; PPC-PWR8-NEXT:    stw r4, 36(r1)
 ; PPC-PWR8-NEXT:    or r4, r4, r28
+; PPC-PWR8-NEXT:    or r7, r5, r29
 ; PPC-PWR8-NEXT:    stw r5, 40(r1)
+; PPC-PWR8-NEXT:    or r5, r6, r30
 ; PPC-PWR8-NEXT:    stw r6, 44(r1)
-; PPC-PWR8-NEXT:    mr r5, r25
-; PPC-PWR8-NEXT:    mr r6, r24
-; PPC-PWR8-NEXT:    stw r8, 28(r1)
+; PPC-PWR8-NEXT:    stw r5, 28(r1)
 ; PPC-PWR8-NEXT:    stw r7, 24(r1)
+; PPC-PWR8-NEXT:    mr r5, r25
 ; PPC-PWR8-NEXT:    li r7, 5
-; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    stw r4, 20(r1)
 ; PPC-PWR8-NEXT:    stw r3, 16(r1)
 ; PPC-PWR8-NEXT:    li r3, 16
 ; PPC-PWR8-NEXT:    mr r4, r26
+; PPC-PWR8-NEXT:    mr r6, r24
+; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
-; PPC-PWR8-NEXT:    cmplwi r3, 0
+; PPC-PWR8-NEXT:    mr r7, r3
 ; PPC-PWR8-NEXT:    lwz r6, 44(r1)
 ; PPC-PWR8-NEXT:    lwz r5, 40(r1)
 ; PPC-PWR8-NEXT:    lwz r4, 36(r1)
 ; PPC-PWR8-NEXT:    lwz r3, 32(r1)
+; PPC-PWR8-NEXT:    cmplwi r7, 0
 ; PPC-PWR8-NEXT:    beq cr0, .LBB4_1
 ; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
 ; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
@@ -763,47 +767,48 @@ define i128 @xor(ptr %a, i128 %x) {
 ; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r27, r5
 ; PPC-PWR8-NEXT:    mr r26, r3
-; PPC-PWR8-NEXT:    lwz r5, 8(r3)
-; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r28, r6
 ; PPC-PWR8-NEXT:    lwz r6, 12(r3)
-; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT:    addi r24, r1, 16
+; PPC-PWR8-NEXT:    lwz r5, 8(r3)
+; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r29, r7
-; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    addi r24, r1, 16
 ; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r30, r8
 ; PPC-PWR8-NEXT:    .p2align 4
 ; PPC-PWR8-NEXT:  .LBB5_1: # %atomicrmw.start
 ; PPC-PWR8-NEXT:    #
 ; PPC-PWR8-NEXT:    stw r3, 32(r1)
-; PPC-PWR8-NEXT:    stw r4, 36(r1)
-; PPC-PWR8-NEXT:    xor r7, r5, r29
-; PPC-PWR8-NEXT:    xor r8, r6, r30
 ; PPC-PWR8-NEXT:    xor r3, r3, r27
+; PPC-PWR8-NEXT:    stw r4, 36(r1)
 ; PPC-PWR8-NEXT:    xor r4, r4, r28
+; PPC-PWR8-NEXT:    xor r7, r5, r29
 ; PPC-PWR8-NEXT:    stw r5, 40(r1)
+; PPC-PWR8-NEXT:    xor r5, r6, r30
 ; PPC-PWR8-NEXT:    stw r6, 44(r1)
-; PPC-PWR8-NEXT:    mr r5, r25
-; PPC-PWR8-NEXT:    mr r6, r24
-; PPC-PWR8-NEXT:    stw r8, 28(r1)
+; PPC-PWR8-NEXT:    stw r5, 28(r1)
 ; PPC-PWR8-NEXT:    stw r7, 24(r1)
+; PPC-PWR8-NEXT:    mr r5, r25
 ; PPC-PWR8-NEXT:    li r7, 5
-; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    stw r4, 20(r1)
 ; PPC-PWR8-NEXT:    stw r3, 16(r1)
 ; PPC-PWR8-NEXT:    li r3, 16
 ; PPC-PWR8-NEXT:    mr r4, r26
+; PPC-PWR8-NEXT:    mr r6, r24
+; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
-; PPC-PWR8-NEXT:    cmplwi r3, 0
+; PPC-PWR8-NEXT:    mr r7, r3
 ; PPC-PWR8-NEXT:    lwz r6, 44(r1)
 ; PPC-PWR8-NEXT:    lwz r5, 40(r1)
 ; PPC-PWR8-NEXT:    lwz r4, 36(r1)
 ; PPC-PWR8-NEXT:    lwz r3, 32(r1)
+; PPC-PWR8-NEXT:    cmplwi r7, 0
 ; PPC-PWR8-NEXT:    beq cr0, .LBB5_1
 ; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
 ; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
@@ -904,47 +909,48 @@ define i128 @nand(ptr %a, i128 %x) {
 ; PPC-PWR8-NEXT:    stw r27, 60(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r27, r5
 ; PPC-PWR8-NEXT:    mr r26, r3
-; PPC-PWR8-NEXT:    lwz r5, 8(r3)
-; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r28, 64(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r28, r6
 ; PPC-PWR8-NEXT:    lwz r6, 12(r3)
-; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r24, 48(r1) # 4-byte Folded Spill
-; PPC-PWR8-NEXT:    addi r24, r1, 16
+; PPC-PWR8-NEXT:    lwz r5, 8(r3)
+; PPC-PWR8-NEXT:    lwz r4, 4(r3)
 ; PPC-PWR8-NEXT:    stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    lwz r3, 0(r3)
 ; PPC-PWR8-NEXT:    stw r29, 68(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r29, r7
-; PPC-PWR8-NEXT:    addi r25, r1, 32
+; PPC-PWR8-NEXT:    addi r24, r1, 16
 ; PPC-PWR8-NEXT:    stw r30, 72(r1) # 4-byte Folded Spill
 ; PPC-PWR8-NEXT:    mr r30, r8
 ; PPC-PWR8-NEXT:    .p2align 4
 ; PPC-PWR8-NEXT:  .LBB6_1: # %atomicrmw.start
 ; PPC-PWR8-NEXT:    #
 ; PPC-PWR8-NEXT:    stw r3, 32(r1)
-; PPC-PWR8-NEXT:    stw r4, 36(r1)
-; PPC-PWR8-NEXT:    nand r7, r5, r29
-; PPC-PWR8-NEXT:    nand r8, r6, r30
 ; PPC-PWR8-NEXT:    nand r3, r3, r27
+; PPC-PWR8-NEXT:    stw r4, 36(r1)
 ; PPC-PWR8-NEXT:    nand r4, r4, r28
+; PPC-PWR8-NEXT:    nand r7, r5, r29
 ; PPC-PWR8-NEXT:    stw r5, 40(r1)
+; PPC-PWR8-NEXT:    nand r5, r6, r30
 ; PPC-PWR8-NEXT:    stw r6, 44(r1)
-; PPC-PWR8-NEXT:    mr r5, r25
-; PPC-PWR8-NEXT:    mr r6, r24
-; PPC-PWR8-NEXT:    stw r8, 28(r1)
+; PPC-PWR8-NEXT:    stw r5, 28(r1)
 ; PPC-PWR8-NEXT:    stw r7, 24(r1)
+; PPC-PWR8-NEXT:    mr r5, r25
 ; PPC-PWR8-NEXT:    li r7, 5
-; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    stw r4, 20(r1)
 ; PPC-PWR8-NEXT:    stw r3, 16(r1)
 ; PPC-PWR8-NEXT:    li r3, 16
 ; PPC-PWR8-NEXT:    mr r4, r26
+; PPC-PWR8-NEXT:    mr r6, r24
+; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
-; PPC-PWR8-NEXT:    cmplwi r3, 0
+; PPC-PWR8-NEXT:    mr r7, r3
 ; PPC-PWR8-NEXT:    lwz r6, 44(r1)
 ; PPC-PWR8-NEXT:    lwz r5, 40(r1)
 ; PPC-PWR8-NEXT:    lwz r4, 36(r1)
 ; PPC-PWR8-NEXT:    lwz r3, 32(r1)
+; PPC-PWR8-NEXT:    cmplwi r7, 0
 ; PPC-PWR8-NEXT:    beq cr0, .LBB6_1
 ; PPC-PWR8-NEXT:  # %bb.2: # %atomicrmw.end
 ; PPC-PWR8-NEXT:    lwz r30, 72(r1) # 4-byte Folded Reload
@@ -1068,21 +1074,21 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
-; PPC-PWR8-NEXT:    lwz r3, 56(r1)
-; PPC-PWR8-NEXT:    lwz r11, 60(r1)
+; PPC-PWR8-NEXT:    lwz r3, 60(r1)
 ; PPC-PWR8-NEXT:    stw r8, 44(r1)
 ; PPC-PWR8-NEXT:    stw r7, 40(r1)
-; PPC-PWR8-NEXT:    li r7, 2
-; PPC-PWR8-NEXT:    li r8, 2
 ; PPC-PWR8-NEXT:    stw r6, 36(r1)
 ; PPC-PWR8-NEXT:    stw r5, 32(r1)
 ; PPC-PWR8-NEXT:    addi r5, r1, 32
 ; PPC-PWR8-NEXT:    addi r6, r1, 16
-; PPC-PWR8-NEXT:    stw r3, 24(r1)
-; PPC-PWR8-NEXT:    li r3, 16
-; PPC-PWR8-NEXT:    stw r11, 28(r1)
+; PPC-PWR8-NEXT:    li r7, 2
+; PPC-PWR8-NEXT:    li r8, 2
 ; PPC-PWR8-NEXT:    stw r10, 20(r1)
 ; PPC-PWR8-NEXT:    stw r9, 16(r1)
+; PPC-PWR8-NEXT:    stw r3, 28(r1)
+; PPC-PWR8-NEXT:    lwz r3, 56(r1)
+; PPC-PWR8-NEXT:    stw r3, 24(r1)
+; PPC-PWR8-NEXT:    li r3, 16
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
 ; PPC-PWR8-NEXT:    lwz r6, 44(r1)
 ; PPC-PWR8-NEXT:    lwz r5, 40(r1)
@@ -1202,21 +1208,21 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
-; PPC-PWR8-NEXT:    lwz r3, 56(r1)
-; PPC-PWR8-NEXT:    lwz r11, 60(r1)
+; PPC-PWR8-NEXT:    lwz r3, 60(r1)
 ; PPC-PWR8-NEXT:    stw r8, 44(r1)
 ; PPC-PWR8-NEXT:    stw r7, 40(r1)
-; PPC-PWR8-NEXT:    li r7, 3
-; PPC-PWR8-NEXT:    li r8, 0
 ; PPC-PWR8-NEXT:    stw r6, 36(r1)
 ; PPC-PWR8-NEXT:    stw r5, 32(r1)
 ; PPC-PWR8-NEXT:    addi r5, r1, 32
 ; PPC-PWR8-NEXT:    addi r6, r1, 16
-; PPC-PWR8-NEXT:    stw r3, 24(r1)
-; PPC-PWR8-NEXT:    li r3, 16
-; PPC-PWR8-NEXT:    stw r11, 28(r1)
+; PPC-PWR8-NEXT:    li r7, 3
+; PPC-PWR8-NEXT:    li r8, 0
 ; PPC-PWR8-NEXT:    stw r10, 20(r1)
 ; PPC-PWR8-NEXT:    stw r9, 16(r1)
+; PPC-PWR8-NEXT:    stw r3, 28(r1)
+; PPC-PWR8-NEXT:    lwz r3, 56(r1)
+; PPC-PWR8-NEXT:    stw r3, 24(r1)
+; PPC-PWR8-NEXT:    li r3, 16
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
 ; PPC-PWR8-NEXT:    lwz r6, 44(r1)
 ; PPC-PWR8-NEXT:    lwz r5, 40(r1)
@@ -1339,21 +1345,21 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
-; PPC-PWR8-NEXT:    lwz r3, 56(r1)
-; PPC-PWR8-NEXT:    lwz r11, 60(r1)
+; PPC-PWR8-NEXT:    lwz r3, 60(r1)
 ; PPC-PWR8-NEXT:    stw r8, 44(r1)
 ; PPC-PWR8-NEXT:    stw r7, 40(r1)
-; PPC-PWR8-NEXT:    li r7, 5
-; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    stw r6, 36(r1)
 ; PPC-PWR8-NEXT:    stw r5, 32(r1)
 ; PPC-PWR8-NEXT:    addi r5, r1, 32
 ; PPC-PWR8-NEXT:    addi r6, r1, 16
-; PPC-PWR8-NEXT:    stw r3, 24(r1)
-; PPC-PWR8-NEXT:    li r3, 16
-; PPC-PWR8-NEXT:    stw r11, 28(r1)
+; PPC-PWR8-NEXT:    li r7, 5
+; PPC-PWR8-NEXT:    li r8, 5
 ; PPC-PWR8-NEXT:    stw r10, 20(r1)
 ; PPC-PWR8-NEXT:    stw r9, 16(r1)
+; PPC-PWR8-NEXT:    stw r3, 28(r1)
+; PPC-PWR8-NEXT:    lwz r3, 56(r1)
+; PPC-PWR8-NEXT:    stw r3, 24(r1)
+; PPC-PWR8-NEXT:    li r3, 16
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
 ; PPC-PWR8-NEXT:    lwz r6, 44(r1)
 ; PPC-PWR8-NEXT:    lwz r5, 40(r1)
@@ -1476,21 +1482,21 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
-; PPC-PWR8-NEXT:    lwz r3, 56(r1)
-; PPC-PWR8-NEXT:    lwz r11, 60(r1)
+; PPC-PWR8-NEXT:    lwz r3, 60(r1)
 ; PPC-PWR8-NEXT:    stw r8, 44(r1)
 ; PPC-PWR8-NEXT:    stw r7, 40(r1)
-; PPC-PWR8-NEXT:    li r7, 4
-; PPC-PWR8-NEXT:    li r8, 2
 ; PPC-PWR8-NEXT:    stw r6, 36(r1)
 ; PPC-PWR8-NEXT:    stw r5, 32(r1)
 ; PPC-PWR8-NEXT:    addi r5, r1, 32
 ; PPC-PWR8-NEXT:    addi r6, r1, 16
-; PPC-PWR8-NEXT:    stw r3, 24(r1)
-; PPC-PWR8-NEXT:    li r3, 16
-; PPC-PWR8-NEXT:    stw r11, 28(r1)
+; PPC-PWR8-NEXT:    li r7, 4
+; PPC-PWR8-NEXT:    li r8, 2
 ; PPC-PWR8-NEXT:    stw r10, 20(r1)
 ; PPC-PWR8-NEXT:    stw r9, 16(r1)
+; PPC-PWR8-NEXT:    stw r3, 28(r1)
+; PPC-PWR8-NEXT:    lwz r3, 56(r1)
+; PPC-PWR8-NEXT:    stw r3, 24(r1)
+; PPC-PWR8-NEXT:    li r3, 16
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
 ; PPC-PWR8-NEXT:    lwz r6, 44(r1)
 ; PPC-PWR8-NEXT:    lwz r5, 40(r1)
@@ -1620,21 +1626,21 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
-; PPC-PWR8-NEXT:    lwz r3, 56(r1)
-; PPC-PWR8-NEXT:    lwz r11, 60(r1)
+; PPC-PWR8-NEXT:    lwz r3, 60(r1)
 ; PPC-PWR8-NEXT:    stw r8, 44(r1)
 ; PPC-PWR8-NEXT:    stw r7, 40(r1)
-; PPC-PWR8-NEXT:    li r7, 4
-; PPC-PWR8-NEXT:    li r8, 2
 ; PPC-PWR8-NEXT:    stw r6, 36(r1)
 ; PPC-PWR8-NEXT:    stw r5, 32(r1)
 ; PPC-PWR8-NEXT:    addi r5, r1, 32
 ; PPC-PWR8-NEXT:    addi r6, r1, 16
-; PPC-PWR8-NEXT:    stw r3, 24(r1)
-; PPC-PWR8-NEXT:    li r3, 16
-; PPC-PWR8-NEXT:    stw r11, 28(r1)
+; PPC-PWR8-NEXT:    li r7, 4
+; PPC-PWR8-NEXT:    li r8, 2
 ; PPC-PWR8-NEXT:    stw r10, 20(r1)
 ; PPC-PWR8-NEXT:    stw r9, 16(r1)
+; PPC-PWR8-NEXT:    stw r3, 28(r1)
+; PPC-PWR8-NEXT:    lwz r3, 56(r1)
+; PPC-PWR8-NEXT:    stw r3, 24(r1)
+; PPC-PWR8-NEXT:    li r3, 16
 ; PPC-PWR8-NEXT:    bl __atomic_compare_exchange
 ; PPC-PWR8-NEXT:    lwz r0, 52(r1)
 ; PPC-PWR8-NEXT:    addi r1, r1, 48

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-i16-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i16-ldst.ll
index 6bc524b8371aff3..61f06c608cc5b76 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i16-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i16-ldst.ll
@@ -7,16 +7,16 @@
 ; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P9
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P9
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P8
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P8
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 
 ; Function Attrs: nofree norecurse nounwind uwtable willreturn
 define dso_local signext i16 @ld_0_int16_t_uint8_t(i64 %ptr) {
@@ -174,23 +174,14 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int16_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -468,25 +459,15 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int16_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -763,25 +744,15 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsh r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    extsh r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    extsh r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int16_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsh r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1061,25 +1032,15 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsh r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwzx r3, r3, r4
-; CHECK-P9-NEXT:    extsh r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwzx r3, r3, r4
-; CHECK-P8-NEXT:    extsh r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int16_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsh r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1365,25 +1326,15 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsh r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    ldx r3, r3, r4
-; CHECK-P9-NEXT:    extsh r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    ldx r3, r3, r4
-; CHECK-P8-NEXT:    extsh r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int16_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    ldx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsh r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1659,23 +1610,14 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint16_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1965,27 +1907,16 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 48
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    clrldi r3, r3, 48
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    clrldi r3, r3, 48
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint16_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 48
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2260,23 +2191,14 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint16_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2548,25 +2470,15 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 48
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwzx r3, r3, r4
-; CHECK-P9-NEXT:    clrldi r3, r3, 48
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwzx r3, r3, r4
-; CHECK-P8-NEXT:    clrldi r3, r3, 48
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint16_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwzx r3, r3, r4
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 48
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2852,25 +2764,15 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 48
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    ldx r3, r3, r4
-; CHECK-P9-NEXT:    clrldi r3, r3, 48
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    ldx r3, r3, r4
-; CHECK-P8-NEXT:    clrldi r3, r3, 48
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint16_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    ldx r3, r3, r4
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 48
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3144,23 +3046,14 @@ define dso_local void @st_disjoint_align32_uint16_t_uint8_t(i64 %ptr, i16 zeroex
 ; CHECK-P10-NEXT:    pstb r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint16_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stbx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint16_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stbx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint16_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stbx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3417,23 +3310,14 @@ define dso_local void @st_disjoint_align32_uint16_t_uint16_t(i64 %ptr, i16 zeroe
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint16_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint16_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint16_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3693,23 +3577,14 @@ define dso_local void @st_disjoint_align32_uint16_t_uint32_t(i64 %ptr, i16 zeroe
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint16_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint16_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint16_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3975,23 +3850,14 @@ define dso_local void @st_disjoint_align32_uint16_t_uint64_t(i64 %ptr, i16 zeroe
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint16_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint16_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint16_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4257,23 +4123,14 @@ define dso_local void @st_disjoint_align32_int16_t_uint32_t(i64 %ptr, i16 signex
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int16_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int16_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int16_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4539,23 +4396,14 @@ define dso_local void @st_disjoint_align32_int16_t_uint64_t(i64 %ptr, i16 signex
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int16_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int16_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int16_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-i32-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i32-ldst.ll
index b98d8403c94b19d..4ca3a852719e08e 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i32-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i32-ldst.ll
@@ -7,16 +7,16 @@
 ; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P9
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P9
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P8
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P8
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 
 ; Function Attrs: nofree norecurse nounwind uwtable willreturn
 define dso_local signext i32 @ld_0_int32_t_uint8_t(i64 %ptr) {
@@ -174,23 +174,14 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -468,25 +459,15 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -762,23 +743,14 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1056,25 +1028,15 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_int16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsh r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_int16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    extsh r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_int16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    extsh r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_int16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsh r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1341,23 +1303,14 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsw r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwax r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwax r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwax r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1629,25 +1582,15 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsw r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    ldx r3, r3, r4
-; CHECK-P9-NEXT:    extsw r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    ldx r3, r3, r4
-; CHECK-P8-NEXT:    extsw r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    ldx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsw r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1923,23 +1866,14 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2229,27 +2163,16 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    clrldi r3, r3, 32
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    clrldi r3, r3, 32
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 32
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2533,23 +2456,14 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2829,25 +2743,15 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_int16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_int16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhax r3, r3, r4
-; CHECK-P9-NEXT:    clrldi r3, r3, 32
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_int16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhax r3, r3, r4
-; CHECK-P8-NEXT:    clrldi r3, r3, 32
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_int16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhax r3, r3, r4
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 32
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3114,23 +3018,14 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3402,25 +3297,15 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    ldx r3, r3, r4
-; CHECK-P9-NEXT:    clrldi r3, r3, 32
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    ldx r3, r3, r4
-; CHECK-P8-NEXT:    clrldi r3, r3, 32
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    ldx r3, r3, r4
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 32
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3694,23 +3579,14 @@ define dso_local void @st_disjoint_align32_uint32_t_uint8_t(i64 %ptr, i32 zeroex
 ; CHECK-P10-NEXT:    pstb r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint32_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stbx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint32_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stbx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint32_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stbx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3976,23 +3852,14 @@ define dso_local void @st_disjoint_align32_uint32_t_uint16_t(i64 %ptr, i32 zeroe
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint32_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint32_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint32_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4249,23 +4116,14 @@ define dso_local void @st_disjoint_align32_uint32_t_uint32_t(i64 %ptr, i32 zeroe
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint32_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint32_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint32_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4525,23 +4383,14 @@ define dso_local void @st_disjoint_align32_uint32_t_uint64_t(i64 %ptr, i32 zeroe
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint32_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint32_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint32_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4807,23 +4656,14 @@ define dso_local void @st_disjoint_align32_int32_t_uint64_t(i64 %ptr, i32 signex
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int32_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int32_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int32_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-i64-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i64-ldst.ll
index 61a4353dfcfc7f3..0e53ea8d6cd588e 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i64-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i64-ldst.ll
@@ -174,23 +174,14 @@ define dso_local i64 @ld_disjoint_align32_int64_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int64_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int64_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int64_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -468,25 +459,15 @@ define dso_local i64 @ld_disjoint_align32_int64_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int64_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int64_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int64_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -762,23 +743,14 @@ define dso_local i64 @ld_disjoint_align32_int64_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int64_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int64_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int64_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1056,25 +1028,15 @@ define dso_local i64 @ld_disjoint_align32_int64_t_int16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsh r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int64_t_int16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    extsh r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int64_t_int16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    extsh r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int64_t_int16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsh r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1350,23 +1312,14 @@ define dso_local i64 @ld_disjoint_align32_int64_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int64_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int64_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int64_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1634,23 +1587,14 @@ define dso_local i64 @ld_disjoint_align32_int64_t_int32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsw r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int64_t_int32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwax r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int64_t_int32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwax r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int64_t_int32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwax r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1907,23 +1851,14 @@ define dso_local i64 @ld_disjoint_align32_int64_t_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    pld r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int64_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    ldx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int64_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    ldx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int64_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    ldx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2185,23 +2120,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2479,25 +2405,15 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2773,23 +2689,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3067,25 +2974,15 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_int16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsh r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_int16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    extsh r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_int16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    extsh r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_int16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsh r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3361,23 +3258,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3645,23 +3533,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_int32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsw r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_int32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwax r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_int32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwax r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_int32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwax r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3918,23 +3797,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    pld r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    ldx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    ldx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    ldx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4194,23 +4064,14 @@ define dso_local void @st_disjoint_align32_uint64_t_uint8_t(i64 %ptr, i64 %str)
 ; CHECK-P10-NEXT:    pstb r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint64_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stbx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stbx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint64_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stbx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4476,23 +4337,14 @@ define dso_local void @st_disjoint_align32_uint64_t_uint16_t(i64 %ptr, i64 %str)
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint64_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint64_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4758,23 +4610,14 @@ define dso_local void @st_disjoint_align32_uint64_t_uint32_t(i64 %ptr, i64 %str)
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint64_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint64_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -5031,23 +4874,14 @@ define dso_local void @st_disjoint_align32_uint64_t_uint64_t(i64 %ptr, i64 %str)
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint64_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint64_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -5152,3 +4986,6 @@ entry:
   store atomic i64 %str, ptr inttoptr (i64 1000000000000 to ptr) monotonic, align 4096
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-P8: {{.*}}
+; CHECK-P9: {{.*}}

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-i8-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i8-ldst.ll
index 15ae630eaaeb2f7..2f8f48009bae240 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i8-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i8-ldst.ll
@@ -7,16 +7,16 @@
 ; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P9
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P9
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P8
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P8
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 
 ; Function Attrs: nofree norecurse nounwind uwtable willreturn
 define dso_local signext i8 @ld_0_int8_t_uint8_t(i64 %ptr) {
@@ -175,25 +175,15 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int8_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int8_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int8_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -473,25 +463,15 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int8_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int8_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int8_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -777,25 +757,15 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int8_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int8_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int8_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1081,25 +1051,15 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int8_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    ldx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int8_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    ldx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int8_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    ldx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1366,23 +1326,14 @@ define dso_local zeroext i8 @ld_disjoint_align32_uint8_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint8_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint8_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint8_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1654,25 +1605,15 @@ define dso_local zeroext i8 @ld_disjoint_align32_uint8_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 56
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint8_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    clrldi r3, r3, 56
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint8_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    clrldi r3, r3, 56
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint8_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 56
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1958,25 +1899,15 @@ define dso_local zeroext i8 @ld_disjoint_align32_uint8_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 56
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint8_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwzx r3, r3, r4
-; CHECK-P9-NEXT:    clrldi r3, r3, 56
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint8_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwzx r3, r3, r4
-; CHECK-P8-NEXT:    clrldi r3, r3, 56
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint8_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwzx r3, r3, r4
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 56
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2262,25 +2193,15 @@ define dso_local zeroext i8 @ld_disjoint_align32_uint8_t_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 56
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint8_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    ldx r3, r3, r4
-; CHECK-P9-NEXT:    clrldi r3, r3, 56
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint8_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    ldx r3, r3, r4
-; CHECK-P8-NEXT:    clrldi r3, r3, 56
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint8_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    ldx r3, r3, r4
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 56
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2545,23 +2466,14 @@ define dso_local void @st_disjoint_align32_uint8_t_uint8_t(i64 %ptr, i8 zeroext
 ; CHECK-P10-NEXT:    pstb r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint8_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stbx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint8_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stbx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint8_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stbx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2821,23 +2733,14 @@ define dso_local void @st_disjoint_align32_uint8_t_uint16_t(i64 %ptr, i8 zeroext
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint8_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint8_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint8_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3103,23 +3006,14 @@ define dso_local void @st_disjoint_align32_uint8_t_uint32_t(i64 %ptr, i8 zeroext
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint8_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint8_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint8_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3385,23 +3279,14 @@ define dso_local void @st_disjoint_align32_uint8_t_uint64_t(i64 %ptr, i8 zeroext
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint8_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint8_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint8_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3667,23 +3552,14 @@ define dso_local void @st_disjoint_align32_int8_t_uint16_t(i64 %ptr, i8 signext
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int8_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int8_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int8_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3949,23 +3825,14 @@ define dso_local void @st_disjoint_align32_int8_t_uint32_t(i64 %ptr, i8 signext
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int8_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int8_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int8_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4231,23 +4098,14 @@ define dso_local void @st_disjoint_align32_int8_t_uint64_t(i64 %ptr, i8 signext
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int8_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int8_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int8_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
index fb5baee3762809e..b31be701454da03 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
@@ -453,8 +453,8 @@ define void @test42(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test43:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB43_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -471,8 +471,8 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test44:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB44_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -490,8 +490,8 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test45(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test45:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB45_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -509,8 +509,8 @@ define void @test45(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test46(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test46:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB46_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -528,8 +528,8 @@ define void @test46(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test47(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test47:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB47_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -547,8 +547,8 @@ define void @test47(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test48(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test48:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB48_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -566,8 +566,8 @@ define void @test48(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test49(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test49:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB49_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -638,8 +638,8 @@ define void @test52(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test53:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB53_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -656,8 +656,8 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test54:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB54_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -675,8 +675,8 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test55(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test55:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB55_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -694,8 +694,8 @@ define void @test55(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test56(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test56:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB56_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -713,8 +713,8 @@ define void @test56(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test57(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test57:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB57_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -732,8 +732,8 @@ define void @test57(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test58(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test58:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB58_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -751,8 +751,8 @@ define void @test58(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test59(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test59:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB59_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1173,8 +1173,8 @@ define void @test82(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test83:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB83_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1191,8 +1191,8 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test84:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB84_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1210,8 +1210,8 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test85(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test85:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB85_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1229,8 +1229,8 @@ define void @test85(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test86(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test86:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB86_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1248,8 +1248,8 @@ define void @test86(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test87(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test87:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB87_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1267,8 +1267,8 @@ define void @test87(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test88(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test88:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB88_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1286,8 +1286,8 @@ define void @test88(ptr %ptr, i8 %cmp, i8 %val) {
 define void @test89(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test89:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB89_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1358,8 +1358,8 @@ define void @test92(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test93:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB93_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1376,8 +1376,8 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test94:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB94_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1395,8 +1395,8 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test95(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test95:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB95_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1414,8 +1414,8 @@ define void @test95(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test96(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test96:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB96_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1433,8 +1433,8 @@ define void @test96(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test97(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test97:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB97_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1452,8 +1452,8 @@ define void @test97(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test98(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test98:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB98_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1471,8 +1471,8 @@ define void @test98(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test99(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test99:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB99_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 6, 4
@@ -1890,8 +1890,8 @@ define i8 @test123(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB123_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -1906,8 +1906,8 @@ define i8 @test124(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB124_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -1966,8 +1966,8 @@ define i16 @test128(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB128_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -1982,8 +1982,8 @@ define i16 @test129(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB129_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -2042,8 +2042,8 @@ define i32 @test133(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB133_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -2058,8 +2058,8 @@ define i32 @test134(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB134_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -2118,8 +2118,8 @@ define i64 @test138(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB138_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -2134,8 +2134,8 @@ define i64 @test139(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB139_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -2198,8 +2198,8 @@ define i8 @test143(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB143_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -2215,8 +2215,8 @@ define i8 @test144(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB144_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -2279,8 +2279,8 @@ define i16 @test148(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB148_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -2296,8 +2296,8 @@ define i16 @test149(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB149_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -2360,8 +2360,8 @@ define i32 @test153(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB153_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -2377,8 +2377,8 @@ define i32 @test154(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB154_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -2441,8 +2441,8 @@ define i64 @test158(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB158_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -2458,8 +2458,8 @@ define i64 @test159(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB159_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -2522,8 +2522,8 @@ define i8 @test163(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB163_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -2539,8 +2539,8 @@ define i8 @test164(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB164_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -2603,8 +2603,8 @@ define i16 @test168(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB168_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -2620,8 +2620,8 @@ define i16 @test169(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB169_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -2684,8 +2684,8 @@ define i32 @test173(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB173_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -2701,8 +2701,8 @@ define i32 @test174(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB174_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -2765,8 +2765,8 @@ define i64 @test178(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB178_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -2782,8 +2782,8 @@ define i64 @test179(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB179_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -2846,8 +2846,8 @@ define i8 @test183(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB183_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -2863,8 +2863,8 @@ define i8 @test184(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB184_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -2927,8 +2927,8 @@ define i16 @test188(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB188_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -2944,8 +2944,8 @@ define i16 @test189(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB189_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -3008,8 +3008,8 @@ define i32 @test193(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB193_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -3025,8 +3025,8 @@ define i32 @test194(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB194_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -3089,8 +3089,8 @@ define i64 @test198(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB198_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -3106,8 +3106,8 @@ define i64 @test199(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB199_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -3170,8 +3170,8 @@ define i8 @test203(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB203_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -3187,8 +3187,8 @@ define i8 @test204(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB204_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -3251,8 +3251,8 @@ define i16 @test208(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB208_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -3268,8 +3268,8 @@ define i16 @test209(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB209_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -3332,8 +3332,8 @@ define i32 @test213(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB213_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -3349,8 +3349,8 @@ define i32 @test214(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB214_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -3413,8 +3413,8 @@ define i64 @test218(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB218_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -3430,8 +3430,8 @@ define i64 @test219(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB219_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -3494,8 +3494,8 @@ define i8 @test223(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB223_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -3511,8 +3511,8 @@ define i8 @test224(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB224_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -3575,8 +3575,8 @@ define i16 @test228(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB228_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -3592,8 +3592,8 @@ define i16 @test229(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB229_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -3656,8 +3656,8 @@ define i32 @test233(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB233_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -3673,8 +3673,8 @@ define i32 @test234(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB234_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -3737,8 +3737,8 @@ define i64 @test238(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB238_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -3754,8 +3754,8 @@ define i64 @test239(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB239_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -3818,8 +3818,8 @@ define i8 @test243(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB243_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -3835,8 +3835,8 @@ define i8 @test244(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB244_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -3899,8 +3899,8 @@ define i16 @test248(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB248_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -3916,8 +3916,8 @@ define i16 @test249(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB249_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -3980,8 +3980,8 @@ define i32 @test253(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB253_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -3997,8 +3997,8 @@ define i32 @test254(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB254_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -4061,8 +4061,8 @@ define i64 @test258(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB258_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -4078,8 +4078,8 @@ define i64 @test259(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB259_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -4117,8 +4117,8 @@ define i8 @test261(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB261_1
 ; PPC64LE-NEXT:  .LBB261_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i8 %val acquire
   ret i8 %ret
@@ -4127,8 +4127,8 @@ define i8 @test261(ptr %ptr, i8 %val) {
 define i8 @test262(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test262:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB262_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -4147,8 +4147,8 @@ define i8 @test262(ptr %ptr, i8 %val) {
 define i8 @test263(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test263:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB263_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -4158,8 +4158,8 @@ define i8 @test263(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB263_1
 ; PPC64LE-NEXT:  .LBB263_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -4168,8 +4168,8 @@ define i8 @test263(ptr %ptr, i8 %val) {
 define i8 @test264(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test264:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB264_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -4179,8 +4179,8 @@ define i8 @test264(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB264_1
 ; PPC64LE-NEXT:  .LBB264_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -4218,8 +4218,8 @@ define i16 @test266(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB266_1
 ; PPC64LE-NEXT:  .LBB266_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i16 %val acquire
   ret i16 %ret
@@ -4228,8 +4228,8 @@ define i16 @test266(ptr %ptr, i16 %val) {
 define i16 @test267(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test267:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB267_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -4248,8 +4248,8 @@ define i16 @test267(ptr %ptr, i16 %val) {
 define i16 @test268(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test268:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB268_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -4259,8 +4259,8 @@ define i16 @test268(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB268_1
 ; PPC64LE-NEXT:  .LBB268_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -4269,8 +4269,8 @@ define i16 @test268(ptr %ptr, i16 %val) {
 define i16 @test269(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test269:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB269_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -4280,8 +4280,8 @@ define i16 @test269(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB269_1
 ; PPC64LE-NEXT:  .LBB269_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -4352,8 +4352,8 @@ define i32 @test273(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB273_1
 ; PPC64LE-NEXT:  .LBB273_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -4371,8 +4371,8 @@ define i32 @test274(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB274_1
 ; PPC64LE-NEXT:  .LBB274_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -4443,8 +4443,8 @@ define i64 @test278(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB278_1
 ; PPC64LE-NEXT:  .LBB278_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -4462,8 +4462,8 @@ define i64 @test279(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB279_1
 ; PPC64LE-NEXT:  .LBB279_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -4501,8 +4501,8 @@ define i8 @test281(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB281_1
 ; PPC64LE-NEXT:  .LBB281_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i8 %val acquire
   ret i8 %ret
@@ -4511,8 +4511,8 @@ define i8 @test281(ptr %ptr, i8 %val) {
 define i8 @test282(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test282:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB282_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -4531,8 +4531,8 @@ define i8 @test282(ptr %ptr, i8 %val) {
 define i8 @test283(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test283:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB283_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -4542,8 +4542,8 @@ define i8 @test283(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB283_1
 ; PPC64LE-NEXT:  .LBB283_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -4552,8 +4552,8 @@ define i8 @test283(ptr %ptr, i8 %val) {
 define i8 @test284(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test284:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB284_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -4563,8 +4563,8 @@ define i8 @test284(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB284_1
 ; PPC64LE-NEXT:  .LBB284_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -4602,8 +4602,8 @@ define i16 @test286(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB286_1
 ; PPC64LE-NEXT:  .LBB286_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i16 %val acquire
   ret i16 %ret
@@ -4612,8 +4612,8 @@ define i16 @test286(ptr %ptr, i16 %val) {
 define i16 @test287(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test287:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB287_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -4632,8 +4632,8 @@ define i16 @test287(ptr %ptr, i16 %val) {
 define i16 @test288(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test288:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB288_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -4643,8 +4643,8 @@ define i16 @test288(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB288_1
 ; PPC64LE-NEXT:  .LBB288_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -4653,8 +4653,8 @@ define i16 @test288(ptr %ptr, i16 %val) {
 define i16 @test289(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test289:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB289_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -4664,8 +4664,8 @@ define i16 @test289(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB289_1
 ; PPC64LE-NEXT:  .LBB289_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -4736,8 +4736,8 @@ define i32 @test293(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB293_1
 ; PPC64LE-NEXT:  .LBB293_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -4755,8 +4755,8 @@ define i32 @test294(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB294_1
 ; PPC64LE-NEXT:  .LBB294_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -4827,8 +4827,8 @@ define i64 @test298(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB298_1
 ; PPC64LE-NEXT:  .LBB298_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -4846,8 +4846,8 @@ define i64 @test299(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB299_1
 ; PPC64LE-NEXT:  .LBB299_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -4918,8 +4918,8 @@ define i8 @test303(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB303_1
 ; PPC64LE-NEXT:  .LBB303_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -4937,8 +4937,8 @@ define i8 @test304(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB304_1
 ; PPC64LE-NEXT:  .LBB304_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -5009,8 +5009,8 @@ define i16 @test308(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB308_1
 ; PPC64LE-NEXT:  .LBB308_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -5028,8 +5028,8 @@ define i16 @test309(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB309_1
 ; PPC64LE-NEXT:  .LBB309_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -5100,8 +5100,8 @@ define i32 @test313(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB313_1
 ; PPC64LE-NEXT:  .LBB313_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -5119,8 +5119,8 @@ define i32 @test314(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB314_1
 ; PPC64LE-NEXT:  .LBB314_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -5191,8 +5191,8 @@ define i64 @test318(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB318_1
 ; PPC64LE-NEXT:  .LBB318_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -5210,8 +5210,8 @@ define i64 @test319(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB319_1
 ; PPC64LE-NEXT:  .LBB319_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -5282,8 +5282,8 @@ define i8 @test323(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB323_1
 ; PPC64LE-NEXT:  .LBB323_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i8 %val acq_rel
   ret i8 %ret
@@ -5301,8 +5301,8 @@ define i8 @test324(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB324_1
 ; PPC64LE-NEXT:  .LBB324_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i8 %val seq_cst
   ret i8 %ret
@@ -5373,8 +5373,8 @@ define i16 @test328(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB328_1
 ; PPC64LE-NEXT:  .LBB328_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i16 %val acq_rel
   ret i16 %ret
@@ -5392,8 +5392,8 @@ define i16 @test329(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB329_1
 ; PPC64LE-NEXT:  .LBB329_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i16 %val seq_cst
   ret i16 %ret
@@ -5464,8 +5464,8 @@ define i32 @test333(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB333_1
 ; PPC64LE-NEXT:  .LBB333_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i32 %val acq_rel
   ret i32 %ret
@@ -5483,8 +5483,8 @@ define i32 @test334(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB334_1
 ; PPC64LE-NEXT:  .LBB334_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i32 %val seq_cst
   ret i32 %ret
@@ -5555,8 +5555,8 @@ define i64 @test338(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB338_1
 ; PPC64LE-NEXT:  .LBB338_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i64 %val acq_rel
   ret i64 %ret
@@ -5574,8 +5574,8 @@ define i64 @test339(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB339_1
 ; PPC64LE-NEXT:  .LBB339_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i64 %val seq_cst
   ret i64 %ret
@@ -5634,8 +5634,8 @@ define i8 @test343(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB343_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -5650,8 +5650,8 @@ define i8 @test344(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB344_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -5710,8 +5710,8 @@ define i16 @test348(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB348_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -5726,8 +5726,8 @@ define i16 @test349(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB349_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -5786,8 +5786,8 @@ define i32 @test353(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB353_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -5802,8 +5802,8 @@ define i32 @test354(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB354_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -5862,8 +5862,8 @@ define i64 @test358(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB358_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -5878,8 +5878,8 @@ define i64 @test359(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB359_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xchg ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret
@@ -5942,8 +5942,8 @@ define i8 @test363(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB363_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -5959,8 +5959,8 @@ define i8 @test364(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB364_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -6023,8 +6023,8 @@ define i16 @test368(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB368_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -6040,8 +6040,8 @@ define i16 @test369(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB369_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -6104,8 +6104,8 @@ define i32 @test373(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB373_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -6121,8 +6121,8 @@ define i32 @test374(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB374_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -6185,8 +6185,8 @@ define i64 @test378(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB378_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -6202,8 +6202,8 @@ define i64 @test379(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB379_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw add ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret
@@ -6266,8 +6266,8 @@ define i8 @test383(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB383_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -6283,8 +6283,8 @@ define i8 @test384(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB384_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -6347,8 +6347,8 @@ define i16 @test388(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB388_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -6364,8 +6364,8 @@ define i16 @test389(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB389_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -6428,8 +6428,8 @@ define i32 @test393(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB393_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -6445,8 +6445,8 @@ define i32 @test394(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB394_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -6509,8 +6509,8 @@ define i64 @test398(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB398_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -6526,8 +6526,8 @@ define i64 @test399(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB399_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw sub ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret
@@ -6590,8 +6590,8 @@ define i8 @test403(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB403_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -6607,8 +6607,8 @@ define i8 @test404(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB404_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -6671,8 +6671,8 @@ define i16 @test408(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB408_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -6688,8 +6688,8 @@ define i16 @test409(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB409_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -6752,8 +6752,8 @@ define i32 @test413(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB413_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -6769,8 +6769,8 @@ define i32 @test414(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB414_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -6833,8 +6833,8 @@ define i64 @test418(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB418_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -6850,8 +6850,8 @@ define i64 @test419(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB419_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw and ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret
@@ -6914,8 +6914,8 @@ define i8 @test423(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB423_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -6931,8 +6931,8 @@ define i8 @test424(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB424_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -6995,8 +6995,8 @@ define i16 @test428(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB428_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -7012,8 +7012,8 @@ define i16 @test429(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB429_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -7076,8 +7076,8 @@ define i32 @test433(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB433_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -7093,8 +7093,8 @@ define i32 @test434(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB434_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -7157,8 +7157,8 @@ define i64 @test438(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB438_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -7174,8 +7174,8 @@ define i64 @test439(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB439_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw nand ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret
@@ -7238,8 +7238,8 @@ define i8 @test443(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB443_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -7255,8 +7255,8 @@ define i8 @test444(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB444_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -7319,8 +7319,8 @@ define i16 @test448(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB448_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -7336,8 +7336,8 @@ define i16 @test449(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB449_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -7400,8 +7400,8 @@ define i32 @test453(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB453_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -7417,8 +7417,8 @@ define i32 @test454(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB454_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -7481,8 +7481,8 @@ define i64 @test458(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB458_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -7498,8 +7498,8 @@ define i64 @test459(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB459_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw or ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret
@@ -7562,8 +7562,8 @@ define i8 @test463(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB463_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -7579,8 +7579,8 @@ define i8 @test464(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB464_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -7643,8 +7643,8 @@ define i16 @test468(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB468_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -7660,8 +7660,8 @@ define i16 @test469(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB469_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -7724,8 +7724,8 @@ define i32 @test473(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB473_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -7741,8 +7741,8 @@ define i32 @test474(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB474_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -7805,8 +7805,8 @@ define i64 @test478(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB478_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -7822,8 +7822,8 @@ define i64 @test479(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 6, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB479_1
 ; PPC64LE-NEXT:  # %bb.2:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw xor ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret
@@ -7861,8 +7861,8 @@ define i8 @test481(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB481_1
 ; PPC64LE-NEXT:  .LBB481_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i8 %val syncscope("singlethread") acquire
   ret i8 %ret
@@ -7871,8 +7871,8 @@ define i8 @test481(ptr %ptr, i8 %val) {
 define i8 @test482(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test482:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB482_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -7891,8 +7891,8 @@ define i8 @test482(ptr %ptr, i8 %val) {
 define i8 @test483(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test483:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB483_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -7902,8 +7902,8 @@ define i8 @test483(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB483_1
 ; PPC64LE-NEXT:  .LBB483_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -7912,8 +7912,8 @@ define i8 @test483(ptr %ptr, i8 %val) {
 define i8 @test484(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test484:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB484_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -7923,8 +7923,8 @@ define i8 @test484(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB484_1
 ; PPC64LE-NEXT:  .LBB484_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -7962,8 +7962,8 @@ define i16 @test486(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB486_1
 ; PPC64LE-NEXT:  .LBB486_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i16 %val syncscope("singlethread") acquire
   ret i16 %ret
@@ -7972,8 +7972,8 @@ define i16 @test486(ptr %ptr, i16 %val) {
 define i16 @test487(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test487:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB487_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -7992,8 +7992,8 @@ define i16 @test487(ptr %ptr, i16 %val) {
 define i16 @test488(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test488:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB488_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -8003,8 +8003,8 @@ define i16 @test488(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB488_1
 ; PPC64LE-NEXT:  .LBB488_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -8013,8 +8013,8 @@ define i16 @test488(ptr %ptr, i16 %val) {
 define i16 @test489(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test489:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB489_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -8024,8 +8024,8 @@ define i16 @test489(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB489_1
 ; PPC64LE-NEXT:  .LBB489_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -8096,8 +8096,8 @@ define i32 @test493(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB493_1
 ; PPC64LE-NEXT:  .LBB493_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -8115,8 +8115,8 @@ define i32 @test494(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB494_1
 ; PPC64LE-NEXT:  .LBB494_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -8187,8 +8187,8 @@ define i64 @test498(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB498_1
 ; PPC64LE-NEXT:  .LBB498_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -8206,8 +8206,8 @@ define i64 @test499(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB499_1
 ; PPC64LE-NEXT:  .LBB499_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw max ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret
@@ -8245,8 +8245,8 @@ define i8 @test501(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB501_1
 ; PPC64LE-NEXT:  .LBB501_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i8 %val syncscope("singlethread") acquire
   ret i8 %ret
@@ -8255,8 +8255,8 @@ define i8 @test501(ptr %ptr, i8 %val) {
 define i8 @test502(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test502:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB502_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -8275,8 +8275,8 @@ define i8 @test502(ptr %ptr, i8 %val) {
 define i8 @test503(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test503:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB503_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -8286,8 +8286,8 @@ define i8 @test503(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB503_1
 ; PPC64LE-NEXT:  .LBB503_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -8296,8 +8296,8 @@ define i8 @test503(ptr %ptr, i8 %val) {
 define i8 @test504(ptr %ptr, i8 %val) {
 ; PPC64LE-LABEL: test504:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    extsb 5, 4
 ; PPC64LE-NEXT:  .LBB504_1:
 ; PPC64LE-NEXT:    lbarx 4, 0, 3
 ; PPC64LE-NEXT:    extsb 6, 4
@@ -8307,8 +8307,8 @@ define i8 @test504(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB504_1
 ; PPC64LE-NEXT:  .LBB504_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -8346,8 +8346,8 @@ define i16 @test506(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB506_1
 ; PPC64LE-NEXT:  .LBB506_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i16 %val syncscope("singlethread") acquire
   ret i16 %ret
@@ -8356,8 +8356,8 @@ define i16 @test506(ptr %ptr, i16 %val) {
 define i16 @test507(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test507:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB507_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -8376,8 +8376,8 @@ define i16 @test507(ptr %ptr, i16 %val) {
 define i16 @test508(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test508:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB508_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -8387,8 +8387,8 @@ define i16 @test508(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB508_1
 ; PPC64LE-NEXT:  .LBB508_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -8397,8 +8397,8 @@ define i16 @test508(ptr %ptr, i16 %val) {
 define i16 @test509(ptr %ptr, i16 %val) {
 ; PPC64LE-LABEL: test509:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    extsh 5, 4
 ; PPC64LE-NEXT:  .LBB509_1:
 ; PPC64LE-NEXT:    lharx 4, 0, 3
 ; PPC64LE-NEXT:    extsh 6, 4
@@ -8408,8 +8408,8 @@ define i16 @test509(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB509_1
 ; PPC64LE-NEXT:  .LBB509_3:
-; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 4
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -8480,8 +8480,8 @@ define i32 @test513(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB513_1
 ; PPC64LE-NEXT:  .LBB513_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -8499,8 +8499,8 @@ define i32 @test514(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB514_1
 ; PPC64LE-NEXT:  .LBB514_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -8571,8 +8571,8 @@ define i64 @test518(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB518_1
 ; PPC64LE-NEXT:  .LBB518_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -8590,8 +8590,8 @@ define i64 @test519(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB519_1
 ; PPC64LE-NEXT:  .LBB519_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw min ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret
@@ -8662,8 +8662,8 @@ define i8 @test523(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB523_1
 ; PPC64LE-NEXT:  .LBB523_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -8681,8 +8681,8 @@ define i8 @test524(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB524_1
 ; PPC64LE-NEXT:  .LBB524_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -8753,8 +8753,8 @@ define i16 @test528(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB528_1
 ; PPC64LE-NEXT:  .LBB528_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -8772,8 +8772,8 @@ define i16 @test529(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB529_1
 ; PPC64LE-NEXT:  .LBB529_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -8844,8 +8844,8 @@ define i32 @test533(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB533_1
 ; PPC64LE-NEXT:  .LBB533_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -8863,8 +8863,8 @@ define i32 @test534(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB534_1
 ; PPC64LE-NEXT:  .LBB534_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -8935,8 +8935,8 @@ define i64 @test538(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB538_1
 ; PPC64LE-NEXT:  .LBB538_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -8954,8 +8954,8 @@ define i64 @test539(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB539_1
 ; PPC64LE-NEXT:  .LBB539_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umax ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret
@@ -9026,8 +9026,8 @@ define i8 @test543(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB543_1
 ; PPC64LE-NEXT:  .LBB543_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i8 %val syncscope("singlethread") acq_rel
   ret i8 %ret
@@ -9045,8 +9045,8 @@ define i8 @test544(ptr %ptr, i8 %val) {
 ; PPC64LE-NEXT:    stbcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB544_1
 ; PPC64LE-NEXT:  .LBB544_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i8 %val syncscope("singlethread") seq_cst
   ret i8 %ret
@@ -9117,8 +9117,8 @@ define i16 @test548(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB548_1
 ; PPC64LE-NEXT:  .LBB548_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i16 %val syncscope("singlethread") acq_rel
   ret i16 %ret
@@ -9136,8 +9136,8 @@ define i16 @test549(ptr %ptr, i16 %val) {
 ; PPC64LE-NEXT:    sthcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB549_1
 ; PPC64LE-NEXT:  .LBB549_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i16 %val syncscope("singlethread") seq_cst
   ret i16 %ret
@@ -9208,8 +9208,8 @@ define i32 @test553(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB553_1
 ; PPC64LE-NEXT:  .LBB553_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i32 %val syncscope("singlethread") acq_rel
   ret i32 %ret
@@ -9227,8 +9227,8 @@ define i32 @test554(ptr %ptr, i32 %val) {
 ; PPC64LE-NEXT:    stwcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB554_1
 ; PPC64LE-NEXT:  .LBB554_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i32 %val syncscope("singlethread") seq_cst
   ret i32 %ret
@@ -9299,8 +9299,8 @@ define i64 @test558(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB558_1
 ; PPC64LE-NEXT:  .LBB558_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i64 %val syncscope("singlethread") acq_rel
   ret i64 %ret
@@ -9318,8 +9318,8 @@ define i64 @test559(ptr %ptr, i64 %val) {
 ; PPC64LE-NEXT:    stdcx. 4, 0, 3
 ; PPC64LE-NEXT:    bne 0, .LBB559_1
 ; PPC64LE-NEXT:  .LBB559_3:
-; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    mr 3, 5
 ; PPC64LE-NEXT:    blr
   %ret = atomicrmw umin ptr %ptr, i64 %val syncscope("singlethread") seq_cst
   ret i64 %ret

diff  --git a/llvm/test/CodeGen/PowerPC/bool-math.ll b/llvm/test/CodeGen/PowerPC/bool-math.ll
index 2016e5ad4ffebf8..244ed2eccb0c4c4 100644
--- a/llvm/test/CodeGen/PowerPC/bool-math.ll
+++ b/llvm/test/CodeGen/PowerPC/bool-math.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=powerpc64le-- -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-- -mcpu=pwr8 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr8 -verify-machineinstrs | FileCheck %s
 
 define i32 @sub_zext_cmp_mask_same_size_result(i32 %x) {
 ; CHECK-LABEL: sub_zext_cmp_mask_same_size_result:
@@ -120,8 +120,8 @@ define i16 @low_bit_select_constants_bigger_false_narrower_result(i32 %x) {
 define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, -29
 ; CHECK-NEXT:    clrldi 3, 3, 63
+; CHECK-NEXT:    li 4, -29
 ; CHECK-NEXT:    xor 3, 3, 4
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1

diff  --git a/llvm/test/CodeGen/PowerPC/branch_coalesce.ll b/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
index 5f9c4f3804750c0..61d6ef5a9d48b74 100644
--- a/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
+++ b/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
@@ -12,10 +12,10 @@ define double @testBranchCoal(double %a, double %b, double %c, i32 %x) {
 ; CHECK-NEXT:    beq 0, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LCPI0_1 at toc@ha
 ; CHECK-NEXT:    xxlxor 2, 2, 2
 ; CHECK-NEXT:    lfd 1, .LCPI0_0 at toc@l(3)
-; CHECK-NEXT:    lfd 3, .LCPI0_1 at toc@l(4)
+; CHECK-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
+; CHECK-NEXT:    lfd 3, .LCPI0_1 at toc@l(3)
 ; CHECK-NEXT:  .LBB0_2: # %entry
 ; CHECK-NEXT:    xsadddp 0, 1, 2
 ; CHECK-NEXT:    xsadddp 1, 0, 3

diff  --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index 691738c298d8a4a..6410738af6c6e0e 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -939,21 +939,21 @@ define <4 x i32> @fromDiffMemConsDi(ptr nocapture readonly %arr) {
 ;
 ; P8BE-LABEL: fromDiffMemConsDi:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
 ; P8BE-NEXT:    lxvw4x v2, 0, r3
-; P8BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; P8BE-NEXT:    lxvw4x v3, 0, r4
+; P8BE-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r3
 ; P8BE-NEXT:    vperm v2, v2, v2, v3
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromDiffMemConsDi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
 ; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
@@ -1047,26 +1047,26 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
 ; P8BE-LABEL: fromDiffMemVarDi:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    sldi r4, r4, 2
-; P8BE-NEXT:    addis r5, r2, .LCPI9_0 at toc@ha
 ; P8BE-NEXT:    add r3, r3, r4
-; P8BE-NEXT:    addi r4, r5, .LCPI9_0 at toc@l
 ; P8BE-NEXT:    addi r3, r3, -12
-; P8BE-NEXT:    lxvw4x v3, 0, r4
 ; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r3
 ; P8BE-NEXT:    vperm v2, v2, v2, v3
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromDiffMemVarDi:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    sldi r4, r4, 2
-; P8LE-NEXT:    addis r5, r2, .LCPI9_0 at toc@ha
 ; P8LE-NEXT:    add r3, r3, r4
-; P8LE-NEXT:    addi r4, r5, .LCPI9_0 at toc@l
 ; P8LE-NEXT:    addi r3, r3, -12
-; P8LE-NEXT:    lxvd2x vs1, 0, r4
 ; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
 ; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
@@ -1117,14 +1117,14 @@ define <4 x i32> @fromRandMemConsi(ptr nocapture readonly %arr) {
 ;
 ; P8BE-LABEL: fromRandMemConsi:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lwz r4, 8(r3)
-; P8BE-NEXT:    lwz r5, 352(r3)
-; P8BE-NEXT:    lwz r6, 16(r3)
-; P8BE-NEXT:    lwz r3, 72(r3)
-; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    lwz r4, 16(r3)
+; P8BE-NEXT:    lwz r5, 72(r3)
+; P8BE-NEXT:    lwz r6, 8(r3)
+; P8BE-NEXT:    lwz r3, 352(r3)
 ; P8BE-NEXT:    rldimi r3, r6, 32, 0
-; P8BE-NEXT:    mtfprd f0, r5
-; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    mtfprd f0, r3
+; P8BE-NEXT:    mtfprd f1, r5
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -1187,14 +1187,14 @@ define <4 x i32> @fromRandMemVari(ptr nocapture readonly %arr, i32 signext %elem
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    sldi r4, r4, 2
 ; P8BE-NEXT:    add r3, r3, r4
-; P8BE-NEXT:    lwz r4, 8(r3)
-; P8BE-NEXT:    lwz r5, 32(r3)
-; P8BE-NEXT:    lwz r6, 16(r3)
-; P8BE-NEXT:    lwz r3, 4(r3)
-; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    lwz r4, 16(r3)
+; P8BE-NEXT:    lwz r5, 4(r3)
+; P8BE-NEXT:    lwz r6, 8(r3)
+; P8BE-NEXT:    lwz r3, 32(r3)
 ; P8BE-NEXT:    rldimi r3, r6, 32, 0
-; P8BE-NEXT:    mtfprd f0, r5
-; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    mtfprd f0, r3
+; P8BE-NEXT:    mtfprd f1, r5
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -1346,10 +1346,10 @@ define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) {
 ;
 ; P8BE-LABEL: fromRegsConvftoi:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
 ; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
 ; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
 ; P8BE-NEXT:    xvcvdpsxws v2, vs0
@@ -1359,10 +1359,10 @@ define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) {
 ;
 ; P8LE-LABEL: fromRegsConvftoi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
 ; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
 ; P8LE-NEXT:    xvcvdpsxws v2, vs0
@@ -1468,22 +1468,22 @@ define <4 x i32> @fromDiffMemConsDConvftoi(ptr nocapture readonly %ptr) {
 ;
 ; P8BE-LABEL: fromDiffMemConsDConvftoi:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r4, r2, .LCPI18_0 at toc@ha
 ; P8BE-NEXT:    lxvw4x v2, 0, r3
-; P8BE-NEXT:    addi r4, r4, .LCPI18_0 at toc@l
-; P8BE-NEXT:    lxvw4x v3, 0, r4
+; P8BE-NEXT:    addis r3, r2, .LCPI18_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI18_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r3
 ; P8BE-NEXT:    vperm v2, v2, v2, v3
 ; P8BE-NEXT:    xvcvspsxws v2, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromDiffMemConsDConvftoi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r4, r2, .LCPI18_0 at toc@ha
 ; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    addi r4, r4, .LCPI18_0 at toc@l
-; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    addis r3, r2, .LCPI18_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI18_0 at toc@l
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    xvcvspsxws v2, v2
 ; P8LE-NEXT:    blr
@@ -1543,10 +1543,10 @@ define <4 x i32> @fromDiffMemVarAConvftoi(ptr nocapture readonly %arr, i32 signe
 ; P8BE-NEXT:    lfsux f0, r3, r4
 ; P8BE-NEXT:    lfs f1, 12(r3)
 ; P8BE-NEXT:    lfs f2, 4(r3)
-; P8BE-NEXT:    lfs f3, 8(r3)
 ; P8BE-NEXT:    xxmrghd vs1, vs2, vs1
-; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    lfs f2, 8(r3)
 ; P8BE-NEXT:    xvcvdpsp v2, vs1
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P8BE-NEXT:    xvcvdpsp v3, vs0
 ; P8BE-NEXT:    vmrgew v2, v3, v2
 ; P8BE-NEXT:    xvcvspsxws v2, v2
@@ -1557,11 +1557,11 @@ define <4 x i32> @fromDiffMemVarAConvftoi(ptr nocapture readonly %arr, i32 signe
 ; P8LE-NEXT:    sldi r4, r4, 2
 ; P8LE-NEXT:    lfsux f0, r3, r4
 ; P8LE-NEXT:    lfs f1, 8(r3)
-; P8LE-NEXT:    lfs f2, 4(r3)
-; P8LE-NEXT:    lfs f3, 12(r3)
 ; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
-; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    lfs f1, 4(r3)
+; P8LE-NEXT:    lfs f2, 12(r3)
 ; P8LE-NEXT:    xvcvdpsp v2, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs2, vs1
 ; P8LE-NEXT:    xvcvdpsp v3, vs1
 ; P8LE-NEXT:    vmrgew v2, v3, v2
 ; P8LE-NEXT:    xvcvspsxws v2, v2
@@ -1630,10 +1630,10 @@ define <4 x i32> @fromDiffMemVarDConvftoi(ptr nocapture readonly %arr, i32 signe
 ; P8BE-NEXT:    lfsux f0, r3, r4
 ; P8BE-NEXT:    lfs f1, -12(r3)
 ; P8BE-NEXT:    lfs f2, -4(r3)
-; P8BE-NEXT:    lfs f3, -8(r3)
 ; P8BE-NEXT:    xxmrghd vs1, vs2, vs1
-; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    lfs f2, -8(r3)
 ; P8BE-NEXT:    xvcvdpsp v2, vs1
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P8BE-NEXT:    xvcvdpsp v3, vs0
 ; P8BE-NEXT:    vmrgew v2, v3, v2
 ; P8BE-NEXT:    xvcvspsxws v2, v2
@@ -1644,11 +1644,11 @@ define <4 x i32> @fromDiffMemVarDConvftoi(ptr nocapture readonly %arr, i32 signe
 ; P8LE-NEXT:    sldi r4, r4, 2
 ; P8LE-NEXT:    lfsux f0, r3, r4
 ; P8LE-NEXT:    lfs f1, -8(r3)
-; P8LE-NEXT:    lfs f2, -4(r3)
-; P8LE-NEXT:    lfs f3, -12(r3)
 ; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
-; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    lfs f1, -4(r3)
+; P8LE-NEXT:    lfs f2, -12(r3)
 ; P8LE-NEXT:    xvcvdpsp v2, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs2, vs1
 ; P8LE-NEXT:    xvcvdpsp v3, vs1
 ; P8LE-NEXT:    vmrgew v2, v3, v2
 ; P8LE-NEXT:    xvcvspsxws v2, v2
@@ -1801,10 +1801,10 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
 ;
 ; P8BE-LABEL: fromRegsConvdtoi:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
 ; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
 ; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
 ; P8BE-NEXT:    xvcvdpsxws v2, vs0
@@ -1814,10 +1814,10 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
 ;
 ; P8LE-LABEL: fromRegsConvdtoi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
 ; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
 ; P8LE-NEXT:    xvcvdpsxws v2, vs0
@@ -1956,25 +1956,25 @@ define <4 x i32> @fromDiffMemConsDConvdtoi(ptr nocapture readonly %ptr) {
 ;
 ; P8BE-LABEL: fromDiffMemConsDConvdtoi:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lfd f0, 16(r3)
-; P8BE-NEXT:    lfd f1, 0(r3)
-; P8BE-NEXT:    lfd f2, 24(r3)
-; P8BE-NEXT:    lfd f3, 8(r3)
-; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
-; P8BE-NEXT:    xxmrghd vs1, vs2, vs3
-; P8BE-NEXT:    xvcvdpsxws v2, vs0
-; P8BE-NEXT:    xvcvdpsxws v3, vs1
+; P8BE-NEXT:    lfd f0, 24(r3)
+; P8BE-NEXT:    lfd f1, 16(r3)
+; P8BE-NEXT:    lfd f2, 8(r3)
+; P8BE-NEXT:    lfd f3, 0(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs2
+; P8BE-NEXT:    xvcvdpsxws v2, vs1
+; P8BE-NEXT:    xvcvdpsxws v3, vs0
 ; P8BE-NEXT:    vmrgew v2, v3, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromDiffMemConsDConvdtoi:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    lfd f0, 24(r3)
-; P8LE-NEXT:    lfd f1, 8(r3)
-; P8LE-NEXT:    lfd f2, 16(r3)
+; P8LE-NEXT:    lfd f1, 16(r3)
+; P8LE-NEXT:    lfd f2, 8(r3)
 ; P8LE-NEXT:    lfd f3, 0(r3)
-; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
-; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs1
 ; P8LE-NEXT:    xvcvdpsxws v2, vs0
 ; P8LE-NEXT:    xvcvdpsxws v3, vs1
 ; P8LE-NEXT:    vmrgew v2, v3, v2
@@ -2032,10 +2032,10 @@ define <4 x i32> @fromDiffMemVarAConvdtoi(ptr nocapture readonly %arr, i32 signe
 ; P8BE-NEXT:    sldi r4, r4, 3
 ; P8BE-NEXT:    lfdux f0, r3, r4
 ; P8BE-NEXT:    lfd f1, 8(r3)
-; P8BE-NEXT:    lfd f2, 24(r3)
-; P8BE-NEXT:    lfd f3, 16(r3)
-; P8BE-NEXT:    xxmrghd vs1, vs1, vs2
-; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    lfd f2, 16(r3)
+; P8BE-NEXT:    lfd f3, 24(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P8BE-NEXT:    xvcvdpsxws v2, vs1
 ; P8BE-NEXT:    xvcvdpsxws v3, vs0
 ; P8BE-NEXT:    vmrgew v2, v3, v2
@@ -2045,11 +2045,11 @@ define <4 x i32> @fromDiffMemVarAConvdtoi(ptr nocapture readonly %arr, i32 signe
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    sldi r4, r4, 3
 ; P8LE-NEXT:    lfdux f0, r3, r4
-; P8LE-NEXT:    lfd f1, 16(r3)
-; P8LE-NEXT:    lfd f2, 8(r3)
+; P8LE-NEXT:    lfd f1, 8(r3)
+; P8LE-NEXT:    lfd f2, 16(r3)
 ; P8LE-NEXT:    lfd f3, 24(r3)
-; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
-; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs1
 ; P8LE-NEXT:    xvcvdpsxws v2, vs0
 ; P8LE-NEXT:    xvcvdpsxws v3, vs1
 ; P8LE-NEXT:    vmrgew v2, v3, v2
@@ -2115,10 +2115,10 @@ define <4 x i32> @fromDiffMemVarDConvdtoi(ptr nocapture readonly %arr, i32 signe
 ; P8BE-NEXT:    sldi r4, r4, 3
 ; P8BE-NEXT:    lfdux f0, r3, r4
 ; P8BE-NEXT:    lfd f1, -8(r3)
-; P8BE-NEXT:    lfd f2, -24(r3)
-; P8BE-NEXT:    lfd f3, -16(r3)
-; P8BE-NEXT:    xxmrghd vs1, vs1, vs2
-; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    lfd f2, -16(r3)
+; P8BE-NEXT:    lfd f3, -24(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P8BE-NEXT:    xvcvdpsxws v2, vs1
 ; P8BE-NEXT:    xvcvdpsxws v3, vs0
 ; P8BE-NEXT:    vmrgew v2, v3, v2
@@ -2128,11 +2128,11 @@ define <4 x i32> @fromDiffMemVarDConvdtoi(ptr nocapture readonly %arr, i32 signe
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    sldi r4, r4, 3
 ; P8LE-NEXT:    lfdux f0, r3, r4
-; P8LE-NEXT:    lfd f1, -16(r3)
-; P8LE-NEXT:    lfd f2, -8(r3)
+; P8LE-NEXT:    lfd f1, -8(r3)
+; P8LE-NEXT:    lfd f2, -16(r3)
 ; P8LE-NEXT:    lfd f3, -24(r3)
-; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
-; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs1
 ; P8LE-NEXT:    xvcvdpsxws v2, vs0
 ; P8LE-NEXT:    xvcvdpsxws v3, vs1
 ; P8LE-NEXT:    vmrgew v2, v3, v2
@@ -2461,21 +2461,21 @@ define <4 x i32> @fromDiffMemConsDui(ptr nocapture readonly %arr) {
 ;
 ; P8BE-LABEL: fromDiffMemConsDui:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r4, r2, .LCPI39_0 at toc@ha
 ; P8BE-NEXT:    lxvw4x v2, 0, r3
-; P8BE-NEXT:    addi r4, r4, .LCPI39_0 at toc@l
-; P8BE-NEXT:    lxvw4x v3, 0, r4
+; P8BE-NEXT:    addis r3, r2, .LCPI39_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI39_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r3
 ; P8BE-NEXT:    vperm v2, v2, v2, v3
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromDiffMemConsDui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r4, r2, .LCPI39_0 at toc@ha
 ; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    addi r4, r4, .LCPI39_0 at toc@l
-; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    addis r3, r2, .LCPI39_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI39_0 at toc@l
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
@@ -2569,26 +2569,26 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
 ; P8BE-LABEL: fromDiffMemVarDui:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    sldi r4, r4, 2
-; P8BE-NEXT:    addis r5, r2, .LCPI41_0 at toc@ha
 ; P8BE-NEXT:    add r3, r3, r4
-; P8BE-NEXT:    addi r4, r5, .LCPI41_0 at toc@l
 ; P8BE-NEXT:    addi r3, r3, -12
-; P8BE-NEXT:    lxvw4x v3, 0, r4
 ; P8BE-NEXT:    lxvw4x v2, 0, r3
+; P8BE-NEXT:    addis r3, r2, .LCPI41_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI41_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r3
 ; P8BE-NEXT:    vperm v2, v2, v2, v3
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromDiffMemVarDui:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    sldi r4, r4, 2
-; P8LE-NEXT:    addis r5, r2, .LCPI41_0 at toc@ha
 ; P8LE-NEXT:    add r3, r3, r4
-; P8LE-NEXT:    addi r4, r5, .LCPI41_0 at toc@l
 ; P8LE-NEXT:    addi r3, r3, -12
-; P8LE-NEXT:    lxvd2x vs1, 0, r4
 ; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    addis r3, r2, .LCPI41_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI41_0 at toc@l
 ; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
@@ -2639,14 +2639,14 @@ define <4 x i32> @fromRandMemConsui(ptr nocapture readonly %arr) {
 ;
 ; P8BE-LABEL: fromRandMemConsui:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lwz r4, 8(r3)
-; P8BE-NEXT:    lwz r5, 352(r3)
-; P8BE-NEXT:    lwz r6, 16(r3)
-; P8BE-NEXT:    lwz r3, 72(r3)
-; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    lwz r4, 16(r3)
+; P8BE-NEXT:    lwz r5, 72(r3)
+; P8BE-NEXT:    lwz r6, 8(r3)
+; P8BE-NEXT:    lwz r3, 352(r3)
 ; P8BE-NEXT:    rldimi r3, r6, 32, 0
-; P8BE-NEXT:    mtfprd f0, r5
-; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    mtfprd f0, r3
+; P8BE-NEXT:    mtfprd f1, r5
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -2709,14 +2709,14 @@ define <4 x i32> @fromRandMemVarui(ptr nocapture readonly %arr, i32 signext %ele
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    sldi r4, r4, 2
 ; P8BE-NEXT:    add r3, r3, r4
-; P8BE-NEXT:    lwz r4, 8(r3)
-; P8BE-NEXT:    lwz r5, 32(r3)
-; P8BE-NEXT:    lwz r6, 16(r3)
-; P8BE-NEXT:    lwz r3, 4(r3)
-; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    lwz r4, 16(r3)
+; P8BE-NEXT:    lwz r5, 4(r3)
+; P8BE-NEXT:    lwz r6, 8(r3)
+; P8BE-NEXT:    lwz r3, 32(r3)
 ; P8BE-NEXT:    rldimi r3, r6, 32, 0
-; P8BE-NEXT:    mtfprd f0, r5
-; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    rldimi r5, r4, 32, 0
+; P8BE-NEXT:    mtfprd f0, r3
+; P8BE-NEXT:    mtfprd f1, r5
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -2868,10 +2868,10 @@ define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) {
 ;
 ; P8BE-LABEL: fromRegsConvftoui:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
 ; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
 ; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
 ; P8BE-NEXT:    xvcvdpuxws v2, vs0
@@ -2881,10 +2881,10 @@ define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) {
 ;
 ; P8LE-LABEL: fromRegsConvftoui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
 ; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
 ; P8LE-NEXT:    xvcvdpuxws v2, vs0
@@ -2990,22 +2990,22 @@ define <4 x i32> @fromDiffMemConsDConvftoui(ptr nocapture readonly %ptr) {
 ;
 ; P8BE-LABEL: fromDiffMemConsDConvftoui:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r4, r2, .LCPI50_0 at toc@ha
 ; P8BE-NEXT:    lxvw4x v2, 0, r3
-; P8BE-NEXT:    addi r4, r4, .LCPI50_0 at toc@l
-; P8BE-NEXT:    lxvw4x v3, 0, r4
+; P8BE-NEXT:    addis r3, r2, .LCPI50_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI50_0 at toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r3
 ; P8BE-NEXT:    vperm v2, v2, v2, v3
 ; P8BE-NEXT:    xvcvspuxws v2, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromDiffMemConsDConvftoui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r4, r2, .LCPI50_0 at toc@ha
 ; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    addi r4, r4, .LCPI50_0 at toc@l
-; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    addis r3, r2, .LCPI50_0 at toc@ha
+; P8LE-NEXT:    addi r3, r3, .LCPI50_0 at toc@l
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    xvcvspuxws v2, v2
 ; P8LE-NEXT:    blr
@@ -3065,10 +3065,10 @@ define <4 x i32> @fromDiffMemVarAConvftoui(ptr nocapture readonly %arr, i32 sign
 ; P8BE-NEXT:    lfsux f0, r3, r4
 ; P8BE-NEXT:    lfs f1, 12(r3)
 ; P8BE-NEXT:    lfs f2, 4(r3)
-; P8BE-NEXT:    lfs f3, 8(r3)
 ; P8BE-NEXT:    xxmrghd vs1, vs2, vs1
-; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    lfs f2, 8(r3)
 ; P8BE-NEXT:    xvcvdpsp v2, vs1
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P8BE-NEXT:    xvcvdpsp v3, vs0
 ; P8BE-NEXT:    vmrgew v2, v3, v2
 ; P8BE-NEXT:    xvcvspuxws v2, v2
@@ -3079,11 +3079,11 @@ define <4 x i32> @fromDiffMemVarAConvftoui(ptr nocapture readonly %arr, i32 sign
 ; P8LE-NEXT:    sldi r4, r4, 2
 ; P8LE-NEXT:    lfsux f0, r3, r4
 ; P8LE-NEXT:    lfs f1, 8(r3)
-; P8LE-NEXT:    lfs f2, 4(r3)
-; P8LE-NEXT:    lfs f3, 12(r3)
 ; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
-; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    lfs f1, 4(r3)
+; P8LE-NEXT:    lfs f2, 12(r3)
 ; P8LE-NEXT:    xvcvdpsp v2, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs2, vs1
 ; P8LE-NEXT:    xvcvdpsp v3, vs1
 ; P8LE-NEXT:    vmrgew v2, v3, v2
 ; P8LE-NEXT:    xvcvspuxws v2, v2
@@ -3153,10 +3153,10 @@ define <4 x i32> @fromDiffMemVarDConvftoui(ptr nocapture readonly %arr, i32 sign
 ; P8BE-NEXT:    lfsux f0, r3, r4
 ; P8BE-NEXT:    lfs f1, -12(r3)
 ; P8BE-NEXT:    lfs f2, -4(r3)
-; P8BE-NEXT:    lfs f3, -8(r3)
 ; P8BE-NEXT:    xxmrghd vs1, vs2, vs1
-; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    lfs f2, -8(r3)
 ; P8BE-NEXT:    xvcvdpsp v2, vs1
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P8BE-NEXT:    xvcvdpsp v3, vs0
 ; P8BE-NEXT:    vmrgew v2, v3, v2
 ; P8BE-NEXT:    xvcvspuxws v2, v2
@@ -3167,11 +3167,11 @@ define <4 x i32> @fromDiffMemVarDConvftoui(ptr nocapture readonly %arr, i32 sign
 ; P8LE-NEXT:    sldi r4, r4, 2
 ; P8LE-NEXT:    lfsux f0, r3, r4
 ; P8LE-NEXT:    lfs f1, -8(r3)
-; P8LE-NEXT:    lfs f2, -4(r3)
-; P8LE-NEXT:    lfs f3, -12(r3)
 ; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
-; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    lfs f1, -4(r3)
+; P8LE-NEXT:    lfs f2, -12(r3)
 ; P8LE-NEXT:    xvcvdpsp v2, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs2, vs1
 ; P8LE-NEXT:    xvcvdpsp v3, vs1
 ; P8LE-NEXT:    vmrgew v2, v3, v2
 ; P8LE-NEXT:    xvcvspuxws v2, v2
@@ -3324,10 +3324,10 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
 ;
 ; P8BE-LABEL: fromRegsConvdtoui:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
 ; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
 ; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
 ; P8BE-NEXT:    xvcvdpuxws v2, vs0
@@ -3337,10 +3337,10 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
 ;
 ; P8LE-LABEL: fromRegsConvdtoui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
 ; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
 ; P8LE-NEXT:    xvcvdpuxws v2, vs0
@@ -3479,25 +3479,25 @@ define <4 x i32> @fromDiffMemConsDConvdtoui(ptr nocapture readonly %ptr) {
 ;
 ; P8BE-LABEL: fromDiffMemConsDConvdtoui:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    lfd f0, 16(r3)
-; P8BE-NEXT:    lfd f1, 0(r3)
-; P8BE-NEXT:    lfd f2, 24(r3)
-; P8BE-NEXT:    lfd f3, 8(r3)
-; P8BE-NEXT:    xxmrghd vs0, vs0, vs1
-; P8BE-NEXT:    xxmrghd vs1, vs2, vs3
-; P8BE-NEXT:    xvcvdpuxws v2, vs0
-; P8BE-NEXT:    xvcvdpuxws v3, vs1
+; P8BE-NEXT:    lfd f0, 24(r3)
+; P8BE-NEXT:    lfd f1, 16(r3)
+; P8BE-NEXT:    lfd f2, 8(r3)
+; P8BE-NEXT:    lfd f3, 0(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs2
+; P8BE-NEXT:    xvcvdpuxws v2, vs1
+; P8BE-NEXT:    xvcvdpuxws v3, vs0
 ; P8BE-NEXT:    vmrgew v2, v3, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromDiffMemConsDConvdtoui:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    lfd f0, 24(r3)
-; P8LE-NEXT:    lfd f1, 8(r3)
-; P8LE-NEXT:    lfd f2, 16(r3)
+; P8LE-NEXT:    lfd f1, 16(r3)
+; P8LE-NEXT:    lfd f2, 8(r3)
 ; P8LE-NEXT:    lfd f3, 0(r3)
-; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
-; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs1
 ; P8LE-NEXT:    xvcvdpuxws v2, vs0
 ; P8LE-NEXT:    xvcvdpuxws v3, vs1
 ; P8LE-NEXT:    vmrgew v2, v3, v2
@@ -3555,10 +3555,10 @@ define <4 x i32> @fromDiffMemVarAConvdtoui(ptr nocapture readonly %arr, i32 sign
 ; P8BE-NEXT:    sldi r4, r4, 3
 ; P8BE-NEXT:    lfdux f0, r3, r4
 ; P8BE-NEXT:    lfd f1, 8(r3)
-; P8BE-NEXT:    lfd f2, 24(r3)
-; P8BE-NEXT:    lfd f3, 16(r3)
-; P8BE-NEXT:    xxmrghd vs1, vs1, vs2
-; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    lfd f2, 16(r3)
+; P8BE-NEXT:    lfd f3, 24(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P8BE-NEXT:    xvcvdpuxws v2, vs1
 ; P8BE-NEXT:    xvcvdpuxws v3, vs0
 ; P8BE-NEXT:    vmrgew v2, v3, v2
@@ -3568,11 +3568,11 @@ define <4 x i32> @fromDiffMemVarAConvdtoui(ptr nocapture readonly %arr, i32 sign
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    sldi r4, r4, 3
 ; P8LE-NEXT:    lfdux f0, r3, r4
-; P8LE-NEXT:    lfd f1, 16(r3)
-; P8LE-NEXT:    lfd f2, 8(r3)
+; P8LE-NEXT:    lfd f1, 8(r3)
+; P8LE-NEXT:    lfd f2, 16(r3)
 ; P8LE-NEXT:    lfd f3, 24(r3)
-; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
-; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs1
 ; P8LE-NEXT:    xvcvdpuxws v2, vs0
 ; P8LE-NEXT:    xvcvdpuxws v3, vs1
 ; P8LE-NEXT:    vmrgew v2, v3, v2
@@ -3638,10 +3638,10 @@ define <4 x i32> @fromDiffMemVarDConvdtoui(ptr nocapture readonly %arr, i32 sign
 ; P8BE-NEXT:    sldi r4, r4, 3
 ; P8BE-NEXT:    lfdux f0, r3, r4
 ; P8BE-NEXT:    lfd f1, -8(r3)
-; P8BE-NEXT:    lfd f2, -24(r3)
-; P8BE-NEXT:    lfd f3, -16(r3)
-; P8BE-NEXT:    xxmrghd vs1, vs1, vs2
-; P8BE-NEXT:    xxmrghd vs0, vs0, vs3
+; P8BE-NEXT:    lfd f2, -16(r3)
+; P8BE-NEXT:    lfd f3, -24(r3)
+; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
+; P8BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P8BE-NEXT:    xvcvdpuxws v2, vs1
 ; P8BE-NEXT:    xvcvdpuxws v3, vs0
 ; P8BE-NEXT:    vmrgew v2, v3, v2
@@ -3651,11 +3651,11 @@ define <4 x i32> @fromDiffMemVarDConvdtoui(ptr nocapture readonly %arr, i32 sign
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    sldi r4, r4, 3
 ; P8LE-NEXT:    lfdux f0, r3, r4
-; P8LE-NEXT:    lfd f1, -16(r3)
-; P8LE-NEXT:    lfd f2, -8(r3)
+; P8LE-NEXT:    lfd f1, -8(r3)
+; P8LE-NEXT:    lfd f2, -16(r3)
 ; P8LE-NEXT:    lfd f3, -24(r3)
-; P8LE-NEXT:    xxmrghd vs0, vs1, vs0
-; P8LE-NEXT:    xxmrghd vs1, vs3, vs2
+; P8LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P8LE-NEXT:    xxmrghd vs1, vs3, vs1
 ; P8LE-NEXT:    xvcvdpuxws v2, vs0
 ; P8LE-NEXT:    xvcvdpuxws v3, vs1
 ; P8LE-NEXT:    vmrgew v2, v3, v2
@@ -4104,10 +4104,10 @@ define <2 x i64> @fromRandMemConsll(ptr nocapture readonly %arr) {
 ;
 ; P8BE-LABEL: fromRandMemConsll:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    ld r4, 144(r3)
-; P8BE-NEXT:    ld r3, 32(r3)
-; P8BE-NEXT:    mtfprd f0, r4
-; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    ld r4, 32(r3)
+; P8BE-NEXT:    ld r3, 144(r3)
+; P8BE-NEXT:    mtfprd f0, r3
+; P8BE-NEXT:    mtfprd f1, r4
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -4152,10 +4152,10 @@ define <2 x i64> @fromRandMemVarll(ptr nocapture readonly %arr, i32 signext %ele
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    sldi r4, r4, 3
 ; P8BE-NEXT:    add r3, r3, r4
-; P8BE-NEXT:    ld r4, 8(r3)
-; P8BE-NEXT:    ld r3, 32(r3)
-; P8BE-NEXT:    mtfprd f0, r4
-; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    ld r4, 32(r3)
+; P8BE-NEXT:    ld r3, 8(r3)
+; P8BE-NEXT:    mtfprd f0, r3
+; P8BE-NEXT:    mtfprd f1, r4
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -5286,10 +5286,10 @@ define <2 x i64> @fromRandMemConsull(ptr nocapture readonly %arr) {
 ;
 ; P8BE-LABEL: fromRandMemConsull:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    ld r4, 144(r3)
-; P8BE-NEXT:    ld r3, 32(r3)
-; P8BE-NEXT:    mtfprd f0, r4
-; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    ld r4, 32(r3)
+; P8BE-NEXT:    ld r3, 144(r3)
+; P8BE-NEXT:    mtfprd f0, r3
+; P8BE-NEXT:    mtfprd f1, r4
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -5334,10 +5334,10 @@ define <2 x i64> @fromRandMemVarull(ptr nocapture readonly %arr, i32 signext %el
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    sldi r4, r4, 3
 ; P8BE-NEXT:    add r3, r3, r4
-; P8BE-NEXT:    ld r4, 8(r3)
-; P8BE-NEXT:    ld r3, 32(r3)
-; P8BE-NEXT:    mtfprd f0, r4
-; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    ld r4, 32(r3)
+; P8BE-NEXT:    ld r3, 8(r3)
+; P8BE-NEXT:    mtfprd f0, r3
+; P8BE-NEXT:    mtfprd f1, r4
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll
index 09973727eee0b2a..66e287390c902ca 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+power8-vector -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+power8-vector -mattr=-vsx < %s | FileCheck %s -check-prefix=P7
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-VSX
 
 @vsc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>, align 16
@@ -22,9 +22,12 @@ entry:
   ret void
 ; CHECK-LABEL: @test1
 ; CHECK: lvx [[REG1:[0-9]+]], 0, 3
-; CHECK: lvx [[REG2:[0-9]+]], 0, 4
+; CHECK: lvx [[REG2:[0-9]+]], 0, 3
 ; CHECK: vbpermq {{[0-9]+}}, [[REG1]], [[REG2]]
 ; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; P7: lvx [[REG1:[0-9]+]], 0, 3
+; P7: lvx [[REG2:[0-9]+]], 0, 4
+; P7: vbpermq {{[0-9]+}}, [[REG1]], [[REG2]]
 }
 
 ; Function Attrs: nounwind
@@ -37,9 +40,12 @@ entry:
   ret void
 ; CHECK-LABEL: @test2
 ; CHECK: lvx [[REG1:[0-9]+]], 0, 3
-; CHECK: lvx [[REG2:[0-9]+]], 0, 4
+; CHECK: lvx [[REG2:[0-9]+]], 0, 3
 ; CHECK: vbpermq {{[0-9]+}}, [[REG1]], [[REG2]]
 ; CHECK-VSX: vbpermq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; P7: lvx [[REG1:[0-9]+]], 0, 3
+; P7: lvx [[REG2:[0-9]+]], 0, 4
+; P7: vbpermq {{[0-9]+}}, [[REG1]], [[REG2]]
 }
 
 ; Function Attrs: nounwind

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 070d058dceb0b19..1d42e1b62bcb6bf 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -499,15 +499,15 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
 ;
 ; CHECK-NOVSX-LABEL: testmrglb3:
 ; CHECK-NOVSX:       # %bb.0: # %entry
-; CHECK-NOVSX-NEXT:    vxor v2, v2, v2
 ; CHECK-NOVSX-NEXT:    ld r3, 0(r3)
-; CHECK-NOVSX-NEXT:    addis r4, r2, .LCPI12_0 at toc@ha
-; CHECK-NOVSX-NEXT:    addi r4, r4, .LCPI12_0 at toc@l
-; CHECK-NOVSX-NEXT:    lvx v3, 0, r4
+; CHECK-NOVSX-NEXT:    vxor v4, v4, v4
 ; CHECK-NOVSX-NEXT:    std r3, -16(r1)
 ; CHECK-NOVSX-NEXT:    addi r3, r1, -16
-; CHECK-NOVSX-NEXT:    lvx v4, 0, r3
-; CHECK-NOVSX-NEXT:    vperm v2, v4, v2, v3
+; CHECK-NOVSX-NEXT:    lvx v2, 0, r3
+; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI12_0 at toc@ha
+; CHECK-NOVSX-NEXT:    addi r3, r3, .LCPI12_0 at toc@l
+; CHECK-NOVSX-NEXT:    lvx v3, 0, r3
+; CHECK-NOVSX-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-NOVSX-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: testmrglb3:
@@ -527,10 +527,10 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
 ;
 ; P8-AIX-64-LABEL: testmrglb3:
 ; P8-AIX-64:       # %bb.0: # %entry
-; P8-AIX-64-NEXT:    ld r4, L..C0(r2) # %const.0
 ; P8-AIX-64-NEXT:    lxsdx v2, 0, r3
+; P8-AIX-64-NEXT:    ld r3, L..C0(r2) # %const.0
 ; P8-AIX-64-NEXT:    xxlxor v4, v4, v4
-; P8-AIX-64-NEXT:    lxvw4x v3, 0, r4
+; P8-AIX-64-NEXT:    lxvw4x v3, 0, r3
 ; P8-AIX-64-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-64-NEXT:    blr
 ;
@@ -539,12 +539,12 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
 ; P8-AIX-32-NEXT:    lwz r4, 4(r3)
 ; P8-AIX-32-NEXT:    xxlxor v3, v3, v3
 ; P8-AIX-32-NEXT:    stw r4, -16(r1)
-; P8-AIX-32-NEXT:    addi r4, r1, -32
 ; P8-AIX-32-NEXT:    lwz r3, 0(r3)
 ; P8-AIX-32-NEXT:    stw r3, -32(r1)
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
-; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r4
+; P8-AIX-32-NEXT:    addi r3, r1, -32
+; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
 ; P8-AIX-32-NEXT:    xxmrghw v2, vs1, vs0
 ; P8-AIX-32-NEXT:    vmrghb v2, v3, v2
 ; P8-AIX-32-NEXT:    blr
@@ -683,12 +683,12 @@ define dso_local <16 x i8> @no_crash_bitcast(i32 %a) {
 ;
 ; CHECK-NOVSX-LABEL: no_crash_bitcast:
 ; CHECK-NOVSX:       # %bb.0: # %entry
-; CHECK-NOVSX-NEXT:    addis r4, r2, .LCPI14_0 at toc@ha
 ; CHECK-NOVSX-NEXT:    stw r3, -16(r1)
+; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI14_0 at toc@ha
+; CHECK-NOVSX-NEXT:    addi r3, r3, .LCPI14_0 at toc@l
+; CHECK-NOVSX-NEXT:    lvx v2, 0, r3
 ; CHECK-NOVSX-NEXT:    addi r3, r1, -16
-; CHECK-NOVSX-NEXT:    addi r4, r4, .LCPI14_0 at toc@l
 ; CHECK-NOVSX-NEXT:    lvx v3, 0, r3
-; CHECK-NOVSX-NEXT:    lvx v2, 0, r4
 ; CHECK-NOVSX-NEXT:    vperm v2, v3, v3, v2
 ; CHECK-NOVSX-NEXT:    blr
 ;
@@ -713,11 +713,11 @@ define dso_local <16 x i8> @no_crash_bitcast(i32 %a) {
 ;
 ; P8-AIX-32-LABEL: no_crash_bitcast:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r4, L..C0(r2) # %const.0
 ; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lwz r3, L..C0(r2) # %const.0
+; P8-AIX-32-NEXT:    lxvw4x v2, 0, r3
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    lxvw4x v2, 0, r4
 ; P8-AIX-32-NEXT:    vperm v2, v3, v3, v2
 ; P8-AIX-32-NEXT:    blr
 entry:
@@ -758,10 +758,10 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_
 ; CHECK-NOVSX-LABEL: replace_undefs_in_splat:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI15_0 at toc@ha
-; CHECK-NOVSX-NEXT:    addis r4, r2, .LCPI15_1 at toc@ha
 ; CHECK-NOVSX-NEXT:    addi r3, r3, .LCPI15_0 at toc@l
 ; CHECK-NOVSX-NEXT:    lvx v3, 0, r3
-; CHECK-NOVSX-NEXT:    addi r3, r4, .LCPI15_1 at toc@l
+; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI15_1 at toc@ha
+; CHECK-NOVSX-NEXT:    addi r3, r3, .LCPI15_1 at toc@l
 ; CHECK-NOVSX-NEXT:    lvx v4, 0, r3
 ; CHECK-NOVSX-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-NOVSX-NEXT:    blr
@@ -782,18 +782,18 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_
 ; P8-AIX-64-LABEL: replace_undefs_in_splat:
 ; P8-AIX-64:       # %bb.0: # %entry
 ; P8-AIX-64-NEXT:    ld r3, L..C1(r2) # %const.0
-; P8-AIX-64-NEXT:    ld r4, L..C2(r2) # %const.1
 ; P8-AIX-64-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-64-NEXT:    lxvw4x v4, 0, r4
+; P8-AIX-64-NEXT:    ld r3, L..C2(r2) # %const.1
+; P8-AIX-64-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-64-NEXT:    vperm v2, v2, v4, v3
 ; P8-AIX-64-NEXT:    blr
 ;
 ; P8-AIX-32-LABEL: replace_undefs_in_splat:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    lwz r3, L..C1(r2) # %const.0
-; P8-AIX-32-NEXT:    lwz r4, L..C2(r2) # %const.1
 ; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    lxvw4x v4, 0, r4
+; P8-AIX-32-NEXT:    lwz r3, L..C2(r2) # %const.1
+; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    vperm v2, v2, v4, v3
 ; P8-AIX-32-NEXT:    blr
 entry:
@@ -831,12 +831,12 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
 ; CHECK-NOVSX-LABEL: no_RAUW_in_combine_during_legalize:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    sldi r4, r4, 2
-; CHECK-NOVSX-NEXT:    vxor v2, v2, v2
+; CHECK-NOVSX-NEXT:    vxor v3, v3, v3
 ; CHECK-NOVSX-NEXT:    lwzx r3, r3, r4
 ; CHECK-NOVSX-NEXT:    std r3, -16(r1)
 ; CHECK-NOVSX-NEXT:    addi r3, r1, -16
-; CHECK-NOVSX-NEXT:    lvx v3, 0, r3
-; CHECK-NOVSX-NEXT:    vmrglb v2, v2, v3
+; CHECK-NOVSX-NEXT:    lvx v2, 0, r3
+; CHECK-NOVSX-NEXT:    vmrglb v2, v3, v2
 ; CHECK-NOVSX-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: no_RAUW_in_combine_during_legalize:
@@ -863,11 +863,11 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
 ; P8-AIX-32-NEXT:    lwzx r3, r3, r4
 ; P8-AIX-32-NEXT:    li r4, 0
 ; P8-AIX-32-NEXT:    stw r4, -32(r1)
-; P8-AIX-32-NEXT:    addi r4, r1, -16
 ; P8-AIX-32-NEXT:    stw r3, -16(r1)
 ; P8-AIX-32-NEXT:    addi r3, r1, -32
 ; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
-; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r4
+; P8-AIX-32-NEXT:    addi r3, r1, -16
+; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
 ; P8-AIX-32-NEXT:    xxmrghw v2, vs0, vs1
 ; P8-AIX-32-NEXT:    vmrghb v2, v2, v3
 ; P8-AIX-32-NEXT:    blr
@@ -904,9 +904,9 @@ define dso_local <4 x i32> @testSplat4Low(ptr nocapture readonly %ptr) local_unn
 ; CHECK-NOVSX-LABEL: testSplat4Low:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    ld r3, 0(r3)
-; CHECK-NOVSX-NEXT:    addi r4, r1, -16
 ; CHECK-NOVSX-NEXT:    std r3, -16(r1)
-; CHECK-NOVSX-NEXT:    lvx v2, 0, r4
+; CHECK-NOVSX-NEXT:    addi r3, r1, -16
+; CHECK-NOVSX-NEXT:    lvx v2, 0, r3
 ; CHECK-NOVSX-NEXT:    vspltw v2, v2, 2
 ; CHECK-NOVSX-NEXT:    blr
 ;
@@ -960,9 +960,9 @@ define dso_local <4 x i32> @testSplat4hi(ptr nocapture readonly %ptr) local_unna
 ; CHECK-NOVSX-LABEL: testSplat4hi:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    ld r3, 0(r3)
-; CHECK-NOVSX-NEXT:    addi r4, r1, -16
 ; CHECK-NOVSX-NEXT:    std r3, -16(r1)
-; CHECK-NOVSX-NEXT:    lvx v2, 0, r4
+; CHECK-NOVSX-NEXT:    addi r3, r1, -16
+; CHECK-NOVSX-NEXT:    lvx v2, 0, r3
 ; CHECK-NOVSX-NEXT:    vspltw v2, v2, 3
 ; CHECK-NOVSX-NEXT:    blr
 ;
@@ -1014,10 +1014,10 @@ define dso_local <2 x i64> @testSplat8(ptr nocapture readonly %ptr) local_unname
 ; CHECK-NOVSX-LABEL: testSplat8:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    ld r3, 0(r3)
-; CHECK-NOVSX-NEXT:    addi r4, r1, -16
 ; CHECK-NOVSX-NEXT:    std r3, -8(r1)
 ; CHECK-NOVSX-NEXT:    std r3, -16(r1)
-; CHECK-NOVSX-NEXT:    lvx v2, 0, r4
+; CHECK-NOVSX-NEXT:    addi r3, r1, -16
+; CHECK-NOVSX-NEXT:    lvx v2, 0, r3
 ; CHECK-NOVSX-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: testSplat8:
@@ -1034,12 +1034,12 @@ define dso_local <2 x i64> @testSplat8(ptr nocapture readonly %ptr) local_unname
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    lwz r4, 4(r3)
 ; P8-AIX-32-NEXT:    stw r4, -16(r1)
-; P8-AIX-32-NEXT:    addi r4, r1, -32
 ; P8-AIX-32-NEXT:    lwz r3, 0(r3)
 ; P8-AIX-32-NEXT:    stw r3, -32(r1)
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
-; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r4
+; P8-AIX-32-NEXT:    addi r3, r1, -32
+; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
 ; P8-AIX-32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P8-AIX-32-NEXT:    xxmrghd v2, vs0, vs0
 ; P8-AIX-32-NEXT:    blr
@@ -1069,10 +1069,10 @@ define <2 x i64> @testSplati64_0(ptr nocapture readonly %ptr) #0 {
 ; CHECK-NOVSX-LABEL: testSplati64_0:
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    ld r3, 0(r3)
-; CHECK-NOVSX-NEXT:    addi r4, r1, -16
 ; CHECK-NOVSX-NEXT:    std r3, -8(r1)
 ; CHECK-NOVSX-NEXT:    std r3, -16(r1)
-; CHECK-NOVSX-NEXT:    lvx v2, 0, r4
+; CHECK-NOVSX-NEXT:    addi r3, r1, -16
+; CHECK-NOVSX-NEXT:    lvx v2, 0, r3
 ; CHECK-NOVSX-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: testSplati64_0:
@@ -1087,16 +1087,16 @@ define <2 x i64> @testSplati64_0(ptr nocapture readonly %ptr) #0 {
 ;
 ; P8-AIX-32-LABEL: testSplati64_0:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r4, L..C3(r2) # %const.0
-; P8-AIX-32-NEXT:    lwz r5, 4(r3)
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    stw r5, -16(r1)
-; P8-AIX-32-NEXT:    stw r3, -32(r1)
+; P8-AIX-32-NEXT:    lwz r4, 0(r3)
+; P8-AIX-32-NEXT:    lwz r3, 4(r3)
+; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lwz r3, L..C3(r2) # %const.0
+; P8-AIX-32-NEXT:    stw r4, -32(r1)
+; P8-AIX-32-NEXT:    lxvw4x v2, 0, r3
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX-32-NEXT:    addi r4, r1, -32
 ; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    lxvw4x v4, 0, r4
+; P8-AIX-32-NEXT:    addi r3, r1, -32
+; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    vperm v2, v4, v3, v2
 ; P8-AIX-32-NEXT:    blr
 entry:
@@ -1128,14 +1128,14 @@ define <2 x i64> @testSplati64_1(ptr nocapture readonly %ptr) #0 {
 ; CHECK-NOVSX:       # %bb.0: # %entry
 ; CHECK-NOVSX-NEXT:    ld r4, 8(r3)
 ; CHECK-NOVSX-NEXT:    std r4, -8(r1)
-; CHECK-NOVSX-NEXT:    addis r4, r2, .LCPI21_0 at toc@ha
 ; CHECK-NOVSX-NEXT:    ld r3, 0(r3)
-; CHECK-NOVSX-NEXT:    addi r4, r4, .LCPI21_0 at toc@l
-; CHECK-NOVSX-NEXT:    lvx v2, 0, r4
 ; CHECK-NOVSX-NEXT:    std r3, -16(r1)
 ; CHECK-NOVSX-NEXT:    addi r3, r1, -16
+; CHECK-NOVSX-NEXT:    lvx v2, 0, r3
+; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI21_0 at toc@ha
+; CHECK-NOVSX-NEXT:    addi r3, r3, .LCPI21_0 at toc@l
 ; CHECK-NOVSX-NEXT:    lvx v3, 0, r3
-; CHECK-NOVSX-NEXT:    vperm v2, v3, v3, v2
+; CHECK-NOVSX-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NOVSX-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: testSplati64_1:

diff  --git a/llvm/test/CodeGen/PowerPC/cfence-float.ll b/llvm/test/CodeGen/PowerPC/cfence-float.ll
index 649c8604b9d1ffd..c3166fc57486d0e 100644
--- a/llvm/test/CodeGen/PowerPC/cfence-float.ll
+++ b/llvm/test/CodeGen/PowerPC/cfence-float.ll
@@ -10,9 +10,9 @@ define float @bar(ptr %fp) {
 ; CHECK-LE-NEXT:    lwz 3, 0(3)
 ; CHECK-LE-NEXT:    mtfprd 0, 3
 ; CHECK-LE-NEXT:    cmpd 7, 3, 3
-; CHECK-LE-NEXT:    xxsldwi 0, 0, 0, 1
 ; CHECK-LE-NEXT:    bne- 7, .+4
 ; CHECK-LE-NEXT:    isync
+; CHECK-LE-NEXT:    xxsldwi 0, 0, 0, 1
 ; CHECK-LE-NEXT:    xscvspdpn 1, 0
 ; CHECK-LE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/coldcc2.ll b/llvm/test/CodeGen/PowerPC/coldcc2.ll
index 10dee3da522bba6..0da9b8b5673e0d6 100644
--- a/llvm/test/CodeGen/PowerPC/coldcc2.ll
+++ b/llvm/test/CodeGen/PowerPC/coldcc2.ll
@@ -7,8 +7,8 @@
 define signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext %cold) {
 entry:
 ; COLDCC: bl callee
-; COLDCC: ld 4, 40(1)
-; COLDCC: ld 5, 32(1)
+; COLDCC: ld 4, 32(1)
+; COLDCC: ld 3, 40(1)
   %call = tail call coldcc { i64, i64 } @callee(i32 signext %a, i32 signext %b)
   %0 = extractvalue { i64, i64 } %call, 0
   %1 = extractvalue { i64, i64 } %call, 1

diff  --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
index 3015e68e471a7c5..a72abf7007e8d49 100644
--- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
@@ -5,12 +5,12 @@
 define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
 ; CHECK-LABEL: fneg_fdiv_splat:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxspltd 0, 1, 0
-; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-NEXT:    lxvd2x 2, 0, 3
 ; CHECK-NEXT:    xvredp 1, 0
+; CHECK-NEXT:    lxvd2x 2, 0, 3
 ; CHECK-NEXT:    xxlor 3, 2, 2
 ; CHECK-NEXT:    xvmaddadp 3, 0, 1
 ; CHECK-NEXT:    xvnmsubadp 1, 1, 3

diff  --git a/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll b/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll
index cec24aed399a23b..f65f189b9568c10 100644
--- a/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll
@@ -139,8 +139,8 @@ false:
 define dso_local i64 @no_extswsli(ptr %base, i32 %index, i1 %flag) {
 ; CHECK-LABEL: no_extswsli:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. r5, r5, 1
 ; CHECK-NEXT:    extsw r4, r4
+; CHECK-NEXT:    andi. r5, r5, 1
 ; CHECK-NEXT:    bc 4, gt, .LBB2_2
 ; CHECK-NEXT:  # %bb.1: # %true
 ; CHECK-NEXT:    sldi r4, r4, 3
@@ -152,8 +152,8 @@ define dso_local i64 @no_extswsli(ptr %base, i32 %index, i1 %flag) {
 ;
 ; CHECK-BE-LABEL: no_extswsli:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    andi. r5, r5, 1
 ; CHECK-BE-NEXT:    extsw r4, r4
+; CHECK-BE-NEXT:    andi. r5, r5, 1
 ; CHECK-BE-NEXT:    bc 4, gt, .LBB2_2
 ; CHECK-BE-NEXT:  # %bb.1: # %true
 ; CHECK-BE-NEXT:    sldi r4, r4, 3

diff  --git a/llvm/test/CodeGen/PowerPC/combine_ext_trunc.ll b/llvm/test/CodeGen/PowerPC/combine_ext_trunc.ll
index d23d062024d2dce..24a72182c15b333 100644
--- a/llvm/test/CodeGen/PowerPC/combine_ext_trunc.ll
+++ b/llvm/test/CodeGen/PowerPC/combine_ext_trunc.ll
@@ -38,9 +38,9 @@ define i32 @pattern2(i32 %x, i32 %y){
 define i32 @pattern3(i1 %cond, i32 %x) {
 ; CHECK-LABEL: pattern3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 5, -1
 ; CHECK-NEXT:    andi. 3, 3, 1
-; CHECK-NEXT:    rldic 3, 5, 0, 32
+; CHECK-NEXT:    li 3, -1
+; CHECK-NEXT:    rldic 3, 3, 0, 32
 ; CHECK-NEXT:    iselgt 3, 0, 3
 ; CHECK-NEXT:    and 3, 3, 4
 ; CHECK-NEXT:    blr
@@ -53,10 +53,10 @@ define i32 @pattern3(i1 %cond, i32 %x) {
 define i32 @pattern4(i1 %cond, i32 %x) {
 ; CHECK-LABEL: pattern4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 5, -1
 ; CHECK-NEXT:    andi. 3, 3, 1
-; CHECK-NEXT:    rldic 3, 5, 0, 32
+; CHECK-NEXT:    li 3, -1
 ; CHECK-NEXT:    li 5, 0
+; CHECK-NEXT:    rldic 3, 3, 0, 32
 ; CHECK-NEXT:    iselgt 3, 3, 5
 ; CHECK-NEXT:    or 3, 4, 3
 ; CHECK-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll b/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll
index 5d76f5099a6622f..18a61d071cca6c0 100644
--- a/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll
+++ b/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll
@@ -51,14 +51,14 @@ define dso_local void @foo1_int_be_reuse4B(ptr nocapture noundef writeonly %a) l
 ; P8-LE-LABEL: foo1_int_be_reuse4B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI0_0 at toc@ha
-; P8-LE-NEXT:    li 5, 2312
 ; P8-LE-NEXT:    addi 4, 4, .LCPI0_0 at toc@l
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
 ; P8-LE-NEXT:    lis 4, 1798
 ; P8-LE-NEXT:    ori 4, 4, 1284
 ; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    stw 4, 16(3)
-; P8-LE-NEXT:    sth 5, 20(3)
+; P8-LE-NEXT:    li 4, 2312
+; P8-LE-NEXT:    sth 4, 20(3)
 ; P8-LE-NEXT:    blr
 ;
 ; P9-LE-LABEL: foo1_int_be_reuse4B:
@@ -139,14 +139,14 @@ define dso_local void @foo2_int_le_reuse4B(ptr nocapture noundef writeonly %a) l
 ; P8-LE-LABEL: foo2_int_le_reuse4B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI1_0 at toc@ha
-; P8-LE-NEXT:    li 5, 3340
 ; P8-LE-NEXT:    addi 4, 4, .LCPI1_0 at toc@l
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
 ; P8-LE-NEXT:    lis 4, 2826
 ; P8-LE-NEXT:    ori 4, 4, 2312
 ; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    stw 4, 16(3)
-; P8-LE-NEXT:    sth 5, 20(3)
+; P8-LE-NEXT:    li 4, 3340
+; P8-LE-NEXT:    sth 4, 20(3)
 ; P8-LE-NEXT:    blr
 ;
 ; P9-LE-LABEL: foo2_int_le_reuse4B:
@@ -227,14 +227,14 @@ define dso_local void @foo3_int_be_reuse4B(ptr nocapture noundef writeonly %a) l
 ; P8-LE-LABEL: foo3_int_be_reuse4B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI2_0 at toc@ha
-; P8-LE-NEXT:    li 5, 2057
 ; P8-LE-NEXT:    addi 4, 4, .LCPI2_0 at toc@l
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
 ; P8-LE-NEXT:    lis 4, 1543
 ; P8-LE-NEXT:    ori 4, 4, 1029
 ; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    stw 4, 16(3)
-; P8-LE-NEXT:    sth 5, 20(3)
+; P8-LE-NEXT:    li 4, 2057
+; P8-LE-NEXT:    sth 4, 20(3)
 ; P8-LE-NEXT:    blr
 ;
 ; P9-LE-LABEL: foo3_int_be_reuse4B:
@@ -309,14 +309,14 @@ define dso_local void @foo4_int_le_reuse4B(ptr nocapture noundef writeonly %a) l
 ; P8-LE-LABEL: foo4_int_le_reuse4B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI3_0 at toc@ha
-; P8-LE-NEXT:    li 5, 3085
 ; P8-LE-NEXT:    addi 4, 4, .LCPI3_0 at toc@l
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
 ; P8-LE-NEXT:    lis 4, 2571
 ; P8-LE-NEXT:    ori 4, 4, 2057
 ; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    stw 4, 16(3)
-; P8-LE-NEXT:    sth 5, 20(3)
+; P8-LE-NEXT:    li 4, 3085
+; P8-LE-NEXT:    sth 4, 20(3)
 ; P8-LE-NEXT:    blr
 ;
 ; P9-LE-LABEL: foo4_int_le_reuse4B:
@@ -488,13 +488,13 @@ define dso_local void @foo7_int_be_reuse8B(ptr nocapture noundef writeonly %a) l
 ; P8-BE-LABEL: foo7_int_be_reuse8B:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C6(2) # %const.0
-; P8-BE-NEXT:    lis 5, 1
-; P8-BE-NEXT:    ori 5, 5, 515
 ; P8-BE-NEXT:    lxvw4x 0, 0, 4
-; P8-BE-NEXT:    rldic 4, 5, 32, 15
+; P8-BE-NEXT:    lis 4, 1
+; P8-BE-NEXT:    ori 4, 4, 515
+; P8-BE-NEXT:    rldic 4, 4, 32, 15
+; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    oris 4, 4, 1029
 ; P8-BE-NEXT:    ori 4, 4, 1543
-; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -525,14 +525,14 @@ define dso_local void @foo7_int_be_reuse8B(ptr nocapture noundef writeonly %a) l
 ; P8-LE-LABEL: foo7_int_be_reuse8B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI6_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 449
 ; P8-LE-NEXT:    addi 4, 4, .LCPI6_0 at toc@l
-; P8-LE-NEXT:    ori 5, 5, 33089
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    rldic 4, 5, 34, 5
+; P8-LE-NEXT:    lis 4, 449
+; P8-LE-NEXT:    ori 4, 4, 33089
+; P8-LE-NEXT:    rldic 4, 4, 34, 5
+; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    oris 4, 4, 770
 ; P8-LE-NEXT:    ori 4, 4, 256
-; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;
@@ -584,13 +584,13 @@ define dso_local void @foo8_int_le_reuse8B(ptr nocapture noundef writeonly %a) l
 ; P8-BE-LABEL: foo8_int_le_reuse8B:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C7(2) # %const.0
-; P8-BE-NEXT:    lis 5, 2057
-; P8-BE-NEXT:    ori 5, 5, 2571
 ; P8-BE-NEXT:    lxvw4x 0, 0, 4
-; P8-BE-NEXT:    rldic 4, 5, 32, 4
+; P8-BE-NEXT:    lis 4, 2057
+; P8-BE-NEXT:    ori 4, 4, 2571
+; P8-BE-NEXT:    rldic 4, 4, 32, 4
+; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    oris 4, 4, 3085
 ; P8-BE-NEXT:    ori 4, 4, 3599
-; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -621,14 +621,14 @@ define dso_local void @foo8_int_le_reuse8B(ptr nocapture noundef writeonly %a) l
 ; P8-LE-LABEL: foo8_int_le_reuse8B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI7_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 963
 ; P8-LE-NEXT:    addi 4, 4, .LCPI7_0 at toc@l
-; P8-LE-NEXT:    ori 5, 5, 33603
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    rldic 4, 5, 34, 4
+; P8-LE-NEXT:    lis 4, 963
+; P8-LE-NEXT:    ori 4, 4, 33603
+; P8-LE-NEXT:    rldic 4, 4, 34, 4
+; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    oris 4, 4, 2826
 ; P8-LE-NEXT:    ori 4, 4, 2312
-; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;
@@ -680,13 +680,13 @@ define dso_local void @foo9_int_be_reuse8B(ptr nocapture noundef writeonly %a) l
 ; P8-BE-LABEL: foo9_int_be_reuse8B:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C8(2) # %const.0
-; P8-BE-NEXT:    lis 5, 1
-; P8-BE-NEXT:    ori 5, 5, 515
 ; P8-BE-NEXT:    lxvw4x 0, 0, 4
-; P8-BE-NEXT:    rldic 4, 5, 32, 15
+; P8-BE-NEXT:    lis 4, 1
+; P8-BE-NEXT:    ori 4, 4, 515
+; P8-BE-NEXT:    rldic 4, 4, 32, 15
+; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    oris 4, 4, 1029
 ; P8-BE-NEXT:    ori 4, 4, 1543
-; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -717,14 +717,14 @@ define dso_local void @foo9_int_be_reuse8B(ptr nocapture noundef writeonly %a) l
 ; P8-LE-LABEL: foo9_int_be_reuse8B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 1543
 ; P8-LE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
-; P8-LE-NEXT:    ori 5, 5, 1029
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    rldic 4, 5, 32, 5
+; P8-LE-NEXT:    lis 4, 1543
+; P8-LE-NEXT:    ori 4, 4, 1029
+; P8-LE-NEXT:    rldic 4, 4, 32, 5
+; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    oris 4, 4, 515
 ; P8-LE-NEXT:    ori 4, 4, 1
-; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;
@@ -768,13 +768,13 @@ define dso_local void @foo10_int_le_reuse8B(ptr nocapture noundef writeonly %a)
 ; P8-BE-LABEL: foo10_int_le_reuse8B:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C9(2) # %const.0
-; P8-BE-NEXT:    lis 5, 2057
-; P8-BE-NEXT:    ori 5, 5, 2571
 ; P8-BE-NEXT:    lxvw4x 0, 0, 4
-; P8-BE-NEXT:    rldic 4, 5, 32, 4
+; P8-BE-NEXT:    lis 4, 2057
+; P8-BE-NEXT:    ori 4, 4, 2571
+; P8-BE-NEXT:    rldic 4, 4, 32, 4
+; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    oris 4, 4, 3085
 ; P8-BE-NEXT:    ori 4, 4, 3599
-; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -805,14 +805,14 @@ define dso_local void @foo10_int_le_reuse8B(ptr nocapture noundef writeonly %a)
 ; P8-LE-LABEL: foo10_int_le_reuse8B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI9_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 3599
 ; P8-LE-NEXT:    addi 4, 4, .LCPI9_0 at toc@l
-; P8-LE-NEXT:    ori 5, 5, 3085
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    rldic 4, 5, 32, 4
+; P8-LE-NEXT:    lis 4, 3599
+; P8-LE-NEXT:    ori 4, 4, 3085
+; P8-LE-NEXT:    rldic 4, 4, 32, 4
+; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    oris 4, 4, 2571
 ; P8-LE-NEXT:    ori 4, 4, 2057
-; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;
@@ -856,13 +856,13 @@ define dso_local void @foo11_int_be_reuse8B(ptr nocapture noundef writeonly %a)
 ; P8-BE-LABEL: foo11_int_be_reuse8B:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C10(2) # %const.0
-; P8-BE-NEXT:    lis 5, 1
-; P8-BE-NEXT:    ori 5, 5, 515
 ; P8-BE-NEXT:    lxvw4x 0, 0, 4
-; P8-BE-NEXT:    rldic 4, 5, 32, 15
+; P8-BE-NEXT:    lis 4, 1
+; P8-BE-NEXT:    ori 4, 4, 515
+; P8-BE-NEXT:    rldic 4, 4, 32, 15
+; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    oris 4, 4, 1029
 ; P8-BE-NEXT:    ori 4, 4, 1543
-; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -893,14 +893,14 @@ define dso_local void @foo11_int_be_reuse8B(ptr nocapture noundef writeonly %a)
 ; P8-LE-LABEL: foo11_int_be_reuse8B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI10_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 1029
 ; P8-LE-NEXT:    addi 4, 4, .LCPI10_0 at toc@l
-; P8-LE-NEXT:    ori 5, 5, 1543
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    rldic 4, 5, 32, 5
+; P8-LE-NEXT:    lis 4, 1029
+; P8-LE-NEXT:    ori 4, 4, 1543
+; P8-LE-NEXT:    rldic 4, 4, 32, 5
+; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    oris 4, 4, 1
 ; P8-LE-NEXT:    ori 4, 4, 515
-; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;
@@ -940,13 +940,13 @@ define dso_local void @foo12_int_le_reuse8B(ptr nocapture noundef writeonly %a)
 ; P8-BE-LABEL: foo12_int_le_reuse8B:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C11(2) # %const.0
-; P8-BE-NEXT:    lis 5, 2057
-; P8-BE-NEXT:    ori 5, 5, 2571
 ; P8-BE-NEXT:    lxvw4x 0, 0, 4
-; P8-BE-NEXT:    rldic 4, 5, 32, 4
+; P8-BE-NEXT:    lis 4, 2057
+; P8-BE-NEXT:    ori 4, 4, 2571
+; P8-BE-NEXT:    rldic 4, 4, 32, 4
+; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    oris 4, 4, 3085
 ; P8-BE-NEXT:    ori 4, 4, 3599
-; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -977,14 +977,14 @@ define dso_local void @foo12_int_le_reuse8B(ptr nocapture noundef writeonly %a)
 ; P8-LE-LABEL: foo12_int_le_reuse8B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI11_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 3085
 ; P8-LE-NEXT:    addi 4, 4, .LCPI11_0 at toc@l
-; P8-LE-NEXT:    ori 5, 5, 3599
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    rldic 4, 5, 32, 4
+; P8-LE-NEXT:    lis 4, 3085
+; P8-LE-NEXT:    ori 4, 4, 3599
+; P8-LE-NEXT:    rldic 4, 4, 32, 4
+; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    oris 4, 4, 2057
 ; P8-LE-NEXT:    ori 4, 4, 2571
-; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;
@@ -1024,13 +1024,13 @@ define dso_local void @foo13_int_be_reuse8B(ptr nocapture noundef writeonly %a)
 ; P8-BE-LABEL: foo13_int_be_reuse8B:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C12(2) # %const.0
-; P8-BE-NEXT:    lis 5, 1
-; P8-BE-NEXT:    ori 5, 5, 515
 ; P8-BE-NEXT:    lxvd2x 0, 0, 4
-; P8-BE-NEXT:    rldic 4, 5, 32, 15
+; P8-BE-NEXT:    lis 4, 1
+; P8-BE-NEXT:    ori 4, 4, 515
+; P8-BE-NEXT:    rldic 4, 4, 32, 15
+; P8-BE-NEXT:    stxvd2x 0, 0, 3
 ; P8-BE-NEXT:    oris 4, 4, 1029
 ; P8-BE-NEXT:    ori 4, 4, 1543
-; P8-BE-NEXT:    stxvd2x 0, 0, 3
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -1061,14 +1061,14 @@ define dso_local void @foo13_int_be_reuse8B(ptr nocapture noundef writeonly %a)
 ; P8-LE-LABEL: foo13_int_be_reuse8B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI12_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 1
 ; P8-LE-NEXT:    addi 4, 4, .LCPI12_0 at toc@l
-; P8-LE-NEXT:    ori 5, 5, 515
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    rldic 4, 5, 32, 15
+; P8-LE-NEXT:    lis 4, 1
+; P8-LE-NEXT:    ori 4, 4, 515
+; P8-LE-NEXT:    rldic 4, 4, 32, 15
+; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    oris 4, 4, 1029
 ; P8-LE-NEXT:    ori 4, 4, 1543
-; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;
@@ -1106,13 +1106,13 @@ define dso_local void @foo14_int_le_reuse8B(ptr nocapture noundef writeonly %a)
 ; P8-BE-LABEL: foo14_int_le_reuse8B:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C13(2) # %const.0
-; P8-BE-NEXT:    lis 5, 2057
-; P8-BE-NEXT:    ori 5, 5, 2571
 ; P8-BE-NEXT:    lxvd2x 0, 0, 4
-; P8-BE-NEXT:    rldic 4, 5, 32, 4
+; P8-BE-NEXT:    lis 4, 2057
+; P8-BE-NEXT:    ori 4, 4, 2571
+; P8-BE-NEXT:    rldic 4, 4, 32, 4
+; P8-BE-NEXT:    stxvd2x 0, 0, 3
 ; P8-BE-NEXT:    oris 4, 4, 3085
 ; P8-BE-NEXT:    ori 4, 4, 3599
-; P8-BE-NEXT:    stxvd2x 0, 0, 3
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -1143,14 +1143,14 @@ define dso_local void @foo14_int_le_reuse8B(ptr nocapture noundef writeonly %a)
 ; P8-LE-LABEL: foo14_int_le_reuse8B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI13_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 2057
 ; P8-LE-NEXT:    addi 4, 4, .LCPI13_0 at toc@l
-; P8-LE-NEXT:    ori 5, 5, 2571
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    rldic 4, 5, 32, 4
+; P8-LE-NEXT:    lis 4, 2057
+; P8-LE-NEXT:    ori 4, 4, 2571
+; P8-LE-NEXT:    rldic 4, 4, 32, 4
+; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    oris 4, 4, 3085
 ; P8-LE-NEXT:    ori 4, 4, 3599
-; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;
@@ -1254,13 +1254,13 @@ define dso_local void @foo16_int_noreuse8B(ptr nocapture noundef writeonly %a) l
 ; P8-BE-LABEL: foo16_int_noreuse8B:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C15(2) # %const.0
-; P8-BE-NEXT:    lis 5, 1
-; P8-BE-NEXT:    ori 5, 5, 515
 ; P8-BE-NEXT:    lxvw4x 0, 0, 4
-; P8-BE-NEXT:    rldic 4, 5, 32, 15
+; P8-BE-NEXT:    lis 4, 1
+; P8-BE-NEXT:    ori 4, 4, 515
+; P8-BE-NEXT:    rldic 4, 4, 32, 15
+; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    oris 4, 4, 1029
 ; P8-BE-NEXT:    ori 4, 4, 1544
-; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -1291,14 +1291,14 @@ define dso_local void @foo16_int_noreuse8B(ptr nocapture noundef writeonly %a) l
 ; P8-LE-LABEL: foo16_int_noreuse8B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI15_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 128
 ; P8-LE-NEXT:    addi 4, 4, .LCPI15_0 at toc@l
-; P8-LE-NEXT:    ori 5, 5, 41153
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    rldic 4, 5, 35, 5
+; P8-LE-NEXT:    lis 4, 128
+; P8-LE-NEXT:    ori 4, 4, 41153
+; P8-LE-NEXT:    rldic 4, 4, 35, 5
+; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    oris 4, 4, 1
 ; P8-LE-NEXT:    ori 4, 4, 515
-; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;
@@ -1538,12 +1538,12 @@ define dso_local void @foo20_fp_le_reuse8B(ptr nocapture noundef writeonly %a) l
 ; P8-BE-LABEL: foo20_fp_le_reuse8B:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C19(2) # %const.0
-; P8-BE-NEXT:    lis 5, 16420
 ; P8-BE-NEXT:    lxvd2x 0, 0, 4
-; P8-BE-NEXT:    ori 4, 5, 13107
+; P8-BE-NEXT:    lis 4, 16420
+; P8-BE-NEXT:    ori 4, 4, 13107
 ; P8-BE-NEXT:    rldimi 4, 4, 32, 0
-; P8-BE-NEXT:    rlwimi 4, 4, 16, 0, 15
 ; P8-BE-NEXT:    stxvd2x 0, 0, 3
+; P8-BE-NEXT:    rlwimi 4, 4, 16, 0, 15
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -1573,13 +1573,13 @@ define dso_local void @foo20_fp_le_reuse8B(ptr nocapture noundef writeonly %a) l
 ; P8-LE-LABEL: foo20_fp_le_reuse8B:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI19_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 16420
 ; P8-LE-NEXT:    addi 4, 4, .LCPI19_0 at toc@l
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    ori 4, 5, 13107
+; P8-LE-NEXT:    lis 4, 16420
+; P8-LE-NEXT:    ori 4, 4, 13107
 ; P8-LE-NEXT:    rldimi 4, 4, 32, 0
-; P8-LE-NEXT:    rlwimi 4, 4, 16, 0, 15
 ; P8-LE-NEXT:    stxvd2x 0, 0, 3
+; P8-LE-NEXT:    rlwimi 4, 4, 16, 0, 15
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll b/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll
index 042fd3350323097..4139a8fbcbb4f1e 100644
--- a/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll
+++ b/llvm/test/CodeGen/PowerPC/const-splat-array-init.ll
@@ -178,14 +178,14 @@ define dso_local void @foo3(ptr nocapture noundef writeonly %a) local_unnamed_ad
 ; P8-LE-LABEL: foo3:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI2_0 at toc@ha
-; P8-LE-NEXT:    li 5, 3333
 ; P8-LE-NEXT:    addi 4, 4, .LCPI2_0 at toc@l
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
 ; P8-LE-NEXT:    lis 4, 3333
 ; P8-LE-NEXT:    ori 4, 4, 3333
 ; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    stw 4, 16(3)
-; P8-LE-NEXT:    sth 5, 20(3)
+; P8-LE-NEXT:    li 4, 3333
+; P8-LE-NEXT:    sth 4, 20(3)
 ; P8-LE-NEXT:    blr
 ;
 ; P9-LE-LABEL: foo3:
@@ -572,13 +572,13 @@ define dso_local void @foo9(ptr nocapture noundef writeonly %a) local_unnamed_ad
 ; P8-BE-LABEL: foo9:
 ; P8-BE:       # %bb.0: # %entry
 ; P8-BE-NEXT:    ld 4, L..C8(2) # %const.0
-; P8-BE-NEXT:    lis 5, 16394
-; P8-BE-NEXT:    ori 5, 5, 41943
 ; P8-BE-NEXT:    lxvd2x 0, 0, 4
-; P8-BE-NEXT:    rldic 4, 5, 32, 1
+; P8-BE-NEXT:    lis 4, 16394
+; P8-BE-NEXT:    ori 4, 4, 41943
+; P8-BE-NEXT:    rldic 4, 4, 32, 1
+; P8-BE-NEXT:    stxvd2x 0, 0, 3
 ; P8-BE-NEXT:    oris 4, 4, 2621
 ; P8-BE-NEXT:    ori 4, 4, 28836
-; P8-BE-NEXT:    stxvd2x 0, 0, 3
 ; P8-BE-NEXT:    std 4, 16(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -609,14 +609,14 @@ define dso_local void @foo9(ptr nocapture noundef writeonly %a) local_unnamed_ad
 ; P8-LE-LABEL: foo9:
 ; P8-LE:       # %bb.0: # %entry
 ; P8-LE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
-; P8-LE-NEXT:    lis 5, 16394
 ; P8-LE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
-; P8-LE-NEXT:    ori 5, 5, 41943
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    rldic 4, 5, 32, 1
+; P8-LE-NEXT:    lis 4, 16394
+; P8-LE-NEXT:    ori 4, 4, 41943
+; P8-LE-NEXT:    rldic 4, 4, 32, 1
+; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    oris 4, 4, 2621
 ; P8-LE-NEXT:    ori 4, 4, 28836
-; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    std 4, 16(3)
 ; P8-LE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/constant-combines.ll b/llvm/test/CodeGen/PowerPC/constant-combines.ll
index 04eb80d70c14fdf..1bd5089d65bc8d2 100644
--- a/llvm/test/CodeGen/PowerPC/constant-combines.ll
+++ b/llvm/test/CodeGen/PowerPC/constant-combines.ll
@@ -15,9 +15,9 @@ define void @fold_constant_stores_loaddr(ptr %i8_ptr) {
 ; LE-LABEL: fold_constant_stores_loaddr:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    li 4, 0
-; LE-NEXT:    li 5, -86
 ; LE-NEXT:    std 4, 0(3)
-; LE-NEXT:    stb 5, 0(3)
+; LE-NEXT:    li 4, -86
+; LE-NEXT:    stb 4, 0(3)
 ; LE-NEXT:    blr
 entry:
   store i64   0, ptr %i8_ptr, align 8
@@ -38,9 +38,9 @@ define void @fold_constant_stores_hiaddr(ptr %i8_ptr) {
 ; LE-LABEL: fold_constant_stores_hiaddr:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    li 4, 0
-; LE-NEXT:    li 5, -86
 ; LE-NEXT:    std 4, 0(3)
-; LE-NEXT:    stb 5, 0(3)
+; LE-NEXT:    li 4, -86
+; LE-NEXT:    stb 4, 0(3)
 ; LE-NEXT:    blr
 entry:
   store i64   0, ptr %i8_ptr, align 8

diff  --git a/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll b/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
index e2536d2e8e9ff6c..72d47c6e0fee2f9 100644
--- a/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
+++ b/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll
@@ -10,13 +10,13 @@ define <16 x i8> @test_vpermxorb() local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_vpermxorb:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI0_1 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-LE-P8-NEXT:    addi 4, 4, .LCPI0_1 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
-; CHECK-LE-P8-NEXT:    lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
+; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
 ; CHECK-LE-P8-NEXT:    xxswapd 34, 0
-; CHECK-LE-P8-NEXT:    xxswapd 35, 1
+; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT:    xxswapd 35, 0
 ; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -34,11 +34,11 @@ define <16 x i8> @test_vpermxorb() local_unnamed_addr {
 ; CHECK-BE-P8-LABEL: test_vpermxorb:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; CHECK-BE-P8-NEXT:    addis 4, 2, .LCPI0_1 at toc@ha
 ; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-BE-P8-NEXT:    addi 4, 4, .LCPI0_1 at toc@l
 ; CHECK-BE-P8-NEXT:    lxvw4x 34, 0, 3
-; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 3
 ; CHECK-BE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-BE-P8-NEXT:    blr
 entry:
@@ -52,13 +52,13 @@ define <8 x i16> @test_vpermxorh() local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_vpermxorh:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
-; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI1_1 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; CHECK-LE-P8-NEXT:    addi 4, 4, .LCPI1_1 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
-; CHECK-LE-P8-NEXT:    lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
+; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI1_1 at toc@l
 ; CHECK-LE-P8-NEXT:    xxswapd 34, 0
-; CHECK-LE-P8-NEXT:    xxswapd 35, 1
+; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT:    xxswapd 35, 0
 ; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -76,11 +76,11 @@ define <8 x i16> @test_vpermxorh() local_unnamed_addr {
 ; CHECK-BE-P8-LABEL: test_vpermxorh:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
-; CHECK-BE-P8-NEXT:    addis 4, 2, .LCPI1_1 at toc@ha
 ; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; CHECK-BE-P8-NEXT:    addi 4, 4, .LCPI1_1 at toc@l
 ; CHECK-BE-P8-NEXT:    lxvw4x 34, 0, 3
-; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI1_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 3
 ; CHECK-BE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-BE-P8-NEXT:    blr
 entry:
@@ -93,13 +93,13 @@ define <4 x i32> @test_vpermxorw() local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_vpermxorw:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
-; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI2_1 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT:    addi 4, 4, .LCPI2_1 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
-; CHECK-LE-P8-NEXT:    lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
+; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI2_1 at toc@l
 ; CHECK-LE-P8-NEXT:    xxswapd 34, 0
-; CHECK-LE-P8-NEXT:    xxswapd 35, 1
+; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT:    xxswapd 35, 0
 ; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -117,11 +117,11 @@ define <4 x i32> @test_vpermxorw() local_unnamed_addr {
 ; CHECK-BE-P8-LABEL: test_vpermxorw:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
-; CHECK-BE-P8-NEXT:    addis 4, 2, .LCPI2_1 at toc@ha
 ; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
-; CHECK-BE-P8-NEXT:    addi 4, 4, .LCPI2_1 at toc@l
 ; CHECK-BE-P8-NEXT:    lxvw4x 34, 0, 3
-; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI2_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 3
 ; CHECK-BE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-BE-P8-NEXT:    blr
 entry:
@@ -134,13 +134,13 @@ define <2 x i64> @test_vpermxord() local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_vpermxord:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
-; CHECK-LE-P8-NEXT:    addis 4, 2, .LCPI3_1 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
-; CHECK-LE-P8-NEXT:    addi 4, 4, .LCPI3_1 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
-; CHECK-LE-P8-NEXT:    lxvd2x 1, 0, 4
+; CHECK-LE-P8-NEXT:    addis 3, 2, .LCPI3_1 at toc@ha
+; CHECK-LE-P8-NEXT:    addi 3, 3, .LCPI3_1 at toc@l
 ; CHECK-LE-P8-NEXT:    xxswapd 34, 0
-; CHECK-LE-P8-NEXT:    xxswapd 35, 1
+; CHECK-LE-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-LE-P8-NEXT:    xxswapd 35, 0
 ; CHECK-LE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -158,11 +158,11 @@ define <2 x i64> @test_vpermxord() local_unnamed_addr {
 ; CHECK-BE-P8-LABEL: test_vpermxord:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
-; CHECK-BE-P8-NEXT:    addis 4, 2, .LCPI3_1 at toc@ha
 ; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
-; CHECK-BE-P8-NEXT:    addi 4, 4, .LCPI3_1 at toc@l
 ; CHECK-BE-P8-NEXT:    lxvw4x 34, 0, 3
-; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 4
+; CHECK-BE-P8-NEXT:    addis 3, 2, .LCPI3_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi 3, 3, .LCPI3_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x 35, 0, 3
 ; CHECK-BE-P8-NEXT:    vpermxor 2, 3, 2, 2
 ; CHECK-BE-P8-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/csr-split.ll b/llvm/test/CodeGen/PowerPC/csr-split.ll
index e3ba42a50fb7bc9..dea07f3c574203e 100644
--- a/llvm/test/CodeGen/PowerPC/csr-split.ll
+++ b/llvm/test/CodeGen/PowerPC/csr-split.ll
@@ -46,11 +46,11 @@ define dso_local signext i32 @test1(ptr %b) local_unnamed_addr  {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
-; CHECK-NEXT:    addis r4, r2, a at toc@ha
 ; CHECK-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    lwa r4, a at toc@l(r4)
-; CHECK-NEXT:    cmpld r4, r3
+; CHECK-NEXT:    addis r3, r2, a at toc@ha
+; CHECK-NEXT:    lwa r3, a at toc@l(r3)
+; CHECK-NEXT:    cmpld r3, r30
 ; CHECK-NEXT:    # implicit-def: $r3
 ; CHECK-NEXT:    bne cr0, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
@@ -129,8 +129,8 @@ define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr  {
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    cmpldi r3, 0
 ; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    cmpldi r30, 0
 ; CHECK-NEXT:    beq cr0, .LBB1_3
 ; CHECK-NEXT:  # %bb.1: # %if.end
 ; CHECK-NEXT:    addis r4, r2, a at toc@ha

diff  --git a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
index 26c62261bdf3411..c1d1461664418c8 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
@@ -13,18 +13,18 @@ define void @test(ptr %cast) {
 ; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stdu 1, -64(1)
-; CHECK-NEXT:    li 30, 255
-; CHECK-NEXT:    addi 29, 3, -8
+; CHECK-NEXT:    addi 30, 3, -8
+; CHECK-NEXT:    li 29, 255
 ; CHECK-NEXT:    std 0, 80(1)
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_1: # %for.body
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lfdu 1, 8(29)
+; CHECK-NEXT:    lfdu 1, 8(30)
 ; CHECK-NEXT:    bl cos
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi 30, 30, -1
-; CHECK-NEXT:    stfd 1, 0(29)
-; CHECK-NEXT:    cmpldi 30, 0
+; CHECK-NEXT:    addi 29, 29, -1
+; CHECK-NEXT:    stfd 1, 0(30)
+; CHECK-NEXT:    cmpldi 29, 0
 ; CHECK-NEXT:    bc 12, 1, .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %exit
 ; CHECK-NEXT:    addi 1, 1, 64

diff  --git a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
index 30d92068cb9d388..23d021a26293417 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
@@ -38,23 +38,23 @@ define void @fmul_ctrloop_fp128() nounwind {
 ; PWR8-NEXT:    stdu 1, -112(1)
 ; PWR8-NEXT:    li 3, 48
 ; PWR8-NEXT:    std 0, 128(1)
-; PWR8-NEXT:    addis 4, 2, x at toc@ha
 ; PWR8-NEXT:    std 28, 80(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 29, 88(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 30, 96(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    li 30, 4
-; PWR8-NEXT:    addi 4, 4, x at toc@l
 ; PWR8-NEXT:    li 29, 16
+; PWR8-NEXT:    std 26, 64(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PWR8-NEXT:    addis 3, 2, a at toc@ha
-; PWR8-NEXT:    std 26, 64(1) # 8-byte Folded Spill
-; PWR8-NEXT:    std 27, 72(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    addi 3, 3, a at toc@l
+; PWR8-NEXT:    std 27, 72(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    lxvd2x 0, 0, 3
 ; PWR8-NEXT:    addis 3, 2, y at toc@ha
 ; PWR8-NEXT:    addi 3, 3, y at toc@l
 ; PWR8-NEXT:    addi 28, 3, -16
-; PWR8-NEXT:    addi 3, 4, -16
+; PWR8-NEXT:    addis 3, 2, x at toc@ha
+; PWR8-NEXT:    addi 3, 3, x at toc@l
+; PWR8-NEXT:    addi 3, 3, -16
 ; PWR8-NEXT:    xxswapd 63, 0
 ; PWR8-NEXT:    .p2align 4
 ; PWR8-NEXT:  .LBB0_1: # %for.body
@@ -66,8 +66,8 @@ define void @fmul_ctrloop_fp128() nounwind {
 ; PWR8-NEXT:    xxswapd 35, 0
 ; PWR8-NEXT:    bl __mulkf3
 ; PWR8-NEXT:    nop
-; PWR8-NEXT:    xxswapd 0, 34
 ; PWR8-NEXT:    addi 30, 30, -1
+; PWR8-NEXT:    xxswapd 0, 34
 ; PWR8-NEXT:    mr 3, 26
 ; PWR8-NEXT:    cmpldi 30, 0
 ; PWR8-NEXT:    stxvd2x 0, 28, 29
@@ -77,9 +77,9 @@ define void @fmul_ctrloop_fp128() nounwind {
 ; PWR8-NEXT:    li 3, 48
 ; PWR8-NEXT:    ld 30, 96(1) # 8-byte Folded Reload
 ; PWR8-NEXT:    ld 29, 88(1) # 8-byte Folded Reload
+; PWR8-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PWR8-NEXT:    ld 28, 80(1) # 8-byte Folded Reload
 ; PWR8-NEXT:    ld 27, 72(1) # 8-byte Folded Reload
-; PWR8-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PWR8-NEXT:    ld 26, 64(1) # 8-byte Folded Reload
 ; PWR8-NEXT:    addi 1, 1, 112
 ; PWR8-NEXT:    ld 0, 16(1)
@@ -132,12 +132,12 @@ define void @fpext_ctrloop_fp128(ptr %a) nounwind {
 ; PWR8-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    stdu 1, -64(1)
-; PWR8-NEXT:    addis 4, 2, y at toc@ha
 ; PWR8-NEXT:    addi 30, 3, -8
+; PWR8-NEXT:    addis 3, 2, y at toc@ha
 ; PWR8-NEXT:    li 29, 4
 ; PWR8-NEXT:    std 0, 80(1)
-; PWR8-NEXT:    addi 4, 4, y at toc@l
-; PWR8-NEXT:    addi 28, 4, -16
+; PWR8-NEXT:    addi 3, 3, y at toc@l
+; PWR8-NEXT:    addi 28, 3, -16
 ; PWR8-NEXT:    .p2align 4
 ; PWR8-NEXT:  .LBB1_1: # %for.body
 ; PWR8-NEXT:    #
@@ -145,8 +145,8 @@ define void @fpext_ctrloop_fp128(ptr %a) nounwind {
 ; PWR8-NEXT:    addi 28, 28, 16
 ; PWR8-NEXT:    bl __extenddfkf2
 ; PWR8-NEXT:    nop
-; PWR8-NEXT:    xxswapd 0, 34
 ; PWR8-NEXT:    addi 29, 29, -1
+; PWR8-NEXT:    xxswapd 0, 34
 ; PWR8-NEXT:    cmpldi 29, 0
 ; PWR8-NEXT:    stxvd2x 0, 0, 28
 ; PWR8-NEXT:    bc 12, 1, .LBB1_1
@@ -204,12 +204,12 @@ define void @fptrunc_ctrloop_fp128(ptr %a) nounwind {
 ; PWR8-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    stdu 1, -64(1)
-; PWR8-NEXT:    addis 4, 2, x at toc@ha
 ; PWR8-NEXT:    addi 30, 3, -8
+; PWR8-NEXT:    addis 3, 2, x at toc@ha
 ; PWR8-NEXT:    li 29, 4
 ; PWR8-NEXT:    std 0, 80(1)
-; PWR8-NEXT:    addi 4, 4, x at toc@l
-; PWR8-NEXT:    addi 28, 4, -16
+; PWR8-NEXT:    addi 3, 3, x at toc@l
+; PWR8-NEXT:    addi 28, 3, -16
 ; PWR8-NEXT:    .p2align 4
 ; PWR8-NEXT:  .LBB2_1: # %for.body
 ; PWR8-NEXT:    #

diff  --git a/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll b/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
index db459598babc9a2..b96ffba94a91e28 100644
--- a/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
+++ b/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
@@ -17,8 +17,8 @@ define cxx_fast_tlscc nonnull ptr @_ZTW2sg() nounwind {
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stdu 1, -48(1)
-; CHECK-NEXT:    std 0, 64(1)
 ; CHECK-NEXT:    addis 3, 13, __tls_guard at tprel@ha
+; CHECK-NEXT:    std 0, 64(1)
 ; CHECK-NEXT:    lbz 4, __tls_guard at tprel@l(3)
 ; CHECK-NEXT:    andi. 4, 4, 1
 ; CHECK-NEXT:    bc 12, 1, .LBB0_2

diff  --git a/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll b/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll
index 70f57515726209f..cd5ea16d4600b72 100644
--- a/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll
+++ b/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll
@@ -79,10 +79,10 @@ define ppc_fp128 @test_ctr0() {
 ; P8LE-NEXT:    .cfi_offset r30, -16
 ; P8LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; P8LE-NEXT:    stdu r1, -48(r1)
-; P8LE-NEXT:    xxlxor f1, f1, f1
 ; P8LE-NEXT:    li r3, 1
-; P8LE-NEXT:    std r0, 64(r1)
+; P8LE-NEXT:    xxlxor f1, f1, f1
 ; P8LE-NEXT:    xxlxor f2, f2, f2
+; P8LE-NEXT:    std r0, 64(r1)
 ; P8LE-NEXT:    rldic r30, r3, 62, 1
 ; P8LE-NEXT:    .p2align 5
 ; P8LE-NEXT:  .LBB0_1: # %bb6
@@ -109,9 +109,9 @@ define ppc_fp128 @test_ctr0() {
 ; P8BE-NEXT:    .cfi_def_cfa_offset 128
 ; P8BE-NEXT:    .cfi_offset lr, 16
 ; P8BE-NEXT:    .cfi_offset r30, -16
-; P8BE-NEXT:    xxlxor f1, f1, f1
 ; P8BE-NEXT:    li r3, 1
 ; P8BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; P8BE-NEXT:    xxlxor f1, f1, f1
 ; P8BE-NEXT:    xxlxor f2, f2, f2
 ; P8BE-NEXT:    rldic r30, r3, 62, 1
 ; P8BE-NEXT:    .p2align 5

diff  --git a/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll b/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
index be004ffb30fa378..fc0bfef11d7a6f8 100644
--- a/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
+++ b/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
@@ -32,9 +32,8 @@ entry:
 define zeroext i8 @test_byval_mem1(ptr byval(%struct_S1) align 1 %s) {
 ; CHECK-LABEL: test_byval_mem1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mr 4, 3
+; CHECK-NEXT:    stb 3, -8(1)
 ; CHECK-NEXT:    clrldi 3, 3, 56
-; CHECK-NEXT:    stb 4, -8(1)
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i8, ptr %s, align 1
@@ -56,8 +55,8 @@ define void @call_test_byval_mem1_2() #0 {
 ; CHECK-NEXT:    li 7, 4
 ; CHECK-NEXT:    li 8, 5
 ; CHECK-NEXT:    li 9, 6
-; CHECK-NEXT:    ld 3, .LC0 at toc@l(3)
 ; CHECK-NEXT:    li 10, 7
+; CHECK-NEXT:    ld 3, .LC0 at toc@l(3)
 ; CHECK-NEXT:    lbz 3, 0(3)
 ; CHECK-NEXT:    stb 3, 96(1)
 ; CHECK-NEXT:    li 3, 0
@@ -130,16 +129,16 @@ define void @call_test_byval_mem1_4() #0 {
 ; CHECK-NEXT:    std 0, 128(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
-; CHECK-NEXT:    li 4, 7
+; CHECK-NEXT:    li 3, 7
+; CHECK-NEXT:    li 4, 1
 ; CHECK-NEXT:    li 5, 2
 ; CHECK-NEXT:    li 7, 3
 ; CHECK-NEXT:    li 8, 4
 ; CHECK-NEXT:    li 9, 5
 ; CHECK-NEXT:    li 10, 6
+; CHECK-NEXT:    std 3, 96(1)
+; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld 3, .LC0 at toc@l(3)
-; CHECK-NEXT:    std 4, 96(1)
-; CHECK-NEXT:    li 4, 1
 ; CHECK-NEXT:    lbz 6, 0(3)
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    bl test_byval_mem1_4
@@ -273,10 +272,9 @@ define zeroext i8 @test_byval_mem3(ptr byval(%struct_S3) align 1 %s) {
 ; CHECK-LABEL: test_byval_mem3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sth 3, -8(1)
-; CHECK-NEXT:    rldicl 5, 3, 48, 16
-; CHECK-NEXT:    lbz 4, -8(1)
-; CHECK-NEXT:    stb 5, -6(1)
-; CHECK-NEXT:    mr 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 48, 16
+; CHECK-NEXT:    stb 3, -6(1)
+; CHECK-NEXT:    lbz 3, -8(1)
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i8, ptr %s, align 1
@@ -347,9 +345,8 @@ entry:
 define zeroext i8 @test_byval_mem8(ptr byval(%struct_S8) align 1 %s) {
 ; CHECK-LABEL: test_byval_mem8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mr 4, 3
+; CHECK-NEXT:    std 3, -8(1)
 ; CHECK-NEXT:    clrldi 3, 3, 56
-; CHECK-NEXT:    std 4, -8(1)
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i8, ptr %s, align 1
@@ -387,12 +384,11 @@ entry:
 define zeroext i8 @test_byval_mem32(ptr byval(%struct_S32) align 1 %s) {
 ; CHECK-LABEL: test_byval_mem32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mr 7, 3
+; CHECK-NEXT:    std 3, -32(1)
 ; CHECK-NEXT:    clrldi 3, 3, 56
 ; CHECK-NEXT:    std 4, -24(1)
 ; CHECK-NEXT:    std 5, -16(1)
 ; CHECK-NEXT:    std 6, -8(1)
-; CHECK-NEXT:    std 7, -32(1)
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i8, ptr %s, align 1
@@ -411,11 +407,11 @@ define void @call_test_byval_mem32_2() #0 {
 ; CHECK-NEXT:    vspltisw 2, 1
 ; CHECK-NEXT:    ld 3, .LC5 at toc@l(3)
 ; CHECK-NEXT:    xvcvsxwdp 1, 34
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    ld 7, 24(3)
 ; CHECK-NEXT:    ld 6, 16(3)
 ; CHECK-NEXT:    ld 5, 8(3)
 ; CHECK-NEXT:    ld 4, 0(3)
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl test_byval_mem32_2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi 1, 1, 32
@@ -457,11 +453,11 @@ define void @call_test_byval_mem32_3() #0 {
 ; CHECK-NEXT:    li 7, 2
 ; CHECK-NEXT:    ld 3, .LC5 at toc@l(3)
 ; CHECK-NEXT:    xvcvsxwdp 1, 34
-; CHECK-NEXT:    lxvd2x 0, 3, 4
-; CHECK-NEXT:    li 4, 88
-; CHECK-NEXT:    xvcvsxwdp 2, 35
 ; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; CHECK-NEXT:    xvcvsxwdp 2, 35
 ; CHECK-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; CHECK-NEXT:    lxvd2x 0, 3, 4
+; CHECK-NEXT:    li 4, 88
 ; CHECK-NEXT:    stxvd2x 0, 1, 4
 ; CHECK-NEXT:    li 4, 72
 ; CHECK-NEXT:    lxvd2x 0, 0, 3

diff  --git a/llvm/test/CodeGen/PowerPC/extra-toc-reg-deps.ll b/llvm/test/CodeGen/PowerPC/extra-toc-reg-deps.ll
index c631cd431682a44..00d170f308c80a0 100644
--- a/llvm/test/CodeGen/PowerPC/extra-toc-reg-deps.ll
+++ b/llvm/test/CodeGen/PowerPC/extra-toc-reg-deps.ll
@@ -70,14 +70,14 @@ entry:
 ; CHECK: addis [[REG1:[0-9]+]], 2, .LC0 at toc@ha
 ; CHECK: std 2, 40(1)
 ; CHECK: ld {{[0-9]+}}, .LC0 at toc@l([[REG1]])
-; CHECK: {{mr|ld}} 2,
 ; CHECK: mtctr
+; CHECK: {{mr|ld}} 3,
 ; CHECK: bctrl
 ; CHECK: ld 2, 40(1)
 
 ; CHECK: std 2, 40(1)
-; CHECK: {{mr|ld}} 2,
 ; CHECK: mtctr
+; CHECK: {{mr|ld}} 3,
 ; CHECK: bctrl
 ; CHECK: ld 2, 40(1)
 

diff  --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
index 9840de29b538694..8bf4013160d8e9e 100644
--- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll
+++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
@@ -484,8 +484,8 @@ define dso_local void @test_consecutive_i32(<4 x i32> %a, ptr nocapture %b) loca
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 2
 ; CHECK-NEXT:    li r3, 4
-; CHECK-NEXT:    stxsiwx vs34, r5, r3
 ; CHECK-NEXT:    stfiwx f0, 0, r5
+; CHECK-NEXT:    stxsiwx vs34, r5, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_consecutive_i32:
@@ -571,7 +571,6 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, ptr nocapture %
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis r3, r2, .LCPI16_0 at toc@ha
 ; CHECK-NEXT:    xxsldwi vs1, vs34, vs34, 1
-; CHECK-NEXT:    li r4, 20
 ; CHECK-NEXT:    addi r3, r3, .LCPI16_0 at toc@l
 ; CHECK-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-NEXT:    li r3, 16
@@ -580,19 +579,20 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, ptr nocapture %
 ; CHECK-NEXT:    xxswapd vs0, vs35
 ; CHECK-NEXT:    stxvd2x vs0, 0, r5
 ; CHECK-NEXT:    stfiwx f1, r5, r3
-; CHECK-NEXT:    stxsiwx vs34, r5, r4
+; CHECK-NEXT:    li r3, 20
+; CHECK-NEXT:    stxsiwx vs34, r5, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_stores_exceed_vec_size:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI16_0 at toc@ha
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-BE-NEXT:    li r4, 20
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI16_0 at toc@l
 ; CHECK-BE-NEXT:    lxvw4x vs35, 0, r3
 ; CHECK-BE-NEXT:    li r3, 16
 ; CHECK-BE-NEXT:    stxsiwx vs34, r5, r3
-; CHECK-BE-NEXT:    stfiwx f0, r5, r4
+; CHECK-BE-NEXT:    li r3, 20
+; CHECK-BE-NEXT:    stfiwx f0, r5, r3
 ; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    stxvw4x vs35, 0, r5
 ; CHECK-BE-NEXT:    blr
@@ -649,16 +649,16 @@ define void @test_5_consecutive_stores_of_bytes(<16 x i8> %a, ptr nocapture %b)
 ; CHECK-NEXT:    xxswapd vs0, vs34
 ; CHECK-NEXT:    mfvsrd r3, vs34
 ; CHECK-NEXT:    rldicl r6, r3, 32, 56
-; CHECK-NEXT:    rldicl r3, r3, 56, 56
 ; CHECK-NEXT:    mffprd r4, f0
-; CHECK-NEXT:    stb r6, 1(r5)
+; CHECK-NEXT:    rldicl r3, r3, 56, 56
 ; CHECK-NEXT:    stb r3, 2(r5)
-; CHECK-NEXT:    rldicl r6, r4, 32, 56
 ; CHECK-NEXT:    rldicl r3, r4, 8, 56
-; CHECK-NEXT:    rldicl r4, r4, 16, 56
-; CHECK-NEXT:    stb r6, 0(r5)
+; CHECK-NEXT:    stb r6, 1(r5)
+; CHECK-NEXT:    rldicl r6, r4, 32, 56
 ; CHECK-NEXT:    stb r3, 3(r5)
-; CHECK-NEXT:    stb r4, 4(r5)
+; CHECK-NEXT:    rldicl r3, r4, 16, 56
+; CHECK-NEXT:    stb r6, 0(r5)
+; CHECK-NEXT:    stb r3, 4(r5)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_5_consecutive_stores_of_bytes:
@@ -670,11 +670,11 @@ define void @test_5_consecutive_stores_of_bytes(<16 x i8> %a, ptr nocapture %b)
 ; CHECK-BE-NEXT:    stb r6, 0(r5)
 ; CHECK-BE-NEXT:    rldicl r6, r4, 40, 56
 ; CHECK-BE-NEXT:    rldicl r4, r4, 16, 56
-; CHECK-BE-NEXT:    stb r6, 1(r5)
-; CHECK-BE-NEXT:    clrldi r6, r3, 56
-; CHECK-BE-NEXT:    rldicl r3, r3, 56, 56
 ; CHECK-BE-NEXT:    stb r4, 2(r5)
-; CHECK-BE-NEXT:    stb r6, 3(r5)
+; CHECK-BE-NEXT:    clrldi r4, r3, 56
+; CHECK-BE-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-BE-NEXT:    stb r6, 1(r5)
+; CHECK-BE-NEXT:    stb r4, 3(r5)
 ; CHECK-BE-NEXT:    stb r3, 4(r5)
 ; CHECK-BE-NEXT:    blr
 ;
@@ -733,33 +733,33 @@ entry:
 define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, ptr nocapture %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_13_consecutive_stores_of_bytes:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxswapd vs0, vs34
 ; CHECK-NEXT:    mfvsrd r3, vs34
-; CHECK-NEXT:    rldicl r4, r3, 32, 56
+; CHECK-NEXT:    xxswapd vs0, vs34
+; CHECK-NEXT:    rldicl r6, r3, 32, 56
+; CHECK-NEXT:    mffprd r4, f0
+; CHECK-NEXT:    stb r6, 1(r5)
 ; CHECK-NEXT:    rldicl r6, r3, 56, 56
-; CHECK-NEXT:    stb r4, 1(r5)
-; CHECK-NEXT:    rldicl r4, r3, 40, 56
-; CHECK-NEXT:    mffprd r7, f0
 ; CHECK-NEXT:    stb r6, 2(r5)
+; CHECK-NEXT:    rldicl r6, r3, 40, 56
+; CHECK-NEXT:    stb r6, 6(r5)
 ; CHECK-NEXT:    rldicl r6, r3, 24, 56
-; CHECK-NEXT:    stb r4, 6(r5)
-; CHECK-NEXT:    rldicl r4, r3, 8, 56
 ; CHECK-NEXT:    stb r6, 7(r5)
+; CHECK-NEXT:    rldicl r6, r3, 8, 56
 ; CHECK-NEXT:    rldicl r3, r3, 16, 56
-; CHECK-NEXT:    stb r4, 9(r5)
-; CHECK-NEXT:    rldicl r4, r7, 32, 56
-; CHECK-NEXT:    rldicl r6, r7, 8, 56
+; CHECK-NEXT:    stb r6, 9(r5)
+; CHECK-NEXT:    rldicl r6, r4, 32, 56
 ; CHECK-NEXT:    stb r3, 12(r5)
-; CHECK-NEXT:    stb r4, 0(r5)
-; CHECK-NEXT:    rldicl r4, r7, 16, 56
+; CHECK-NEXT:    stb r6, 0(r5)
+; CHECK-NEXT:    rldicl r6, r4, 8, 56
 ; CHECK-NEXT:    stb r6, 3(r5)
-; CHECK-NEXT:    clrldi r6, r7, 56
-; CHECK-NEXT:    stb r4, 4(r5)
-; CHECK-NEXT:    rldicl r4, r7, 48, 56
+; CHECK-NEXT:    rldicl r6, r4, 16, 56
+; CHECK-NEXT:    stb r6, 4(r5)
+; CHECK-NEXT:    clrldi r6, r4, 56
 ; CHECK-NEXT:    stb r6, 5(r5)
-; CHECK-NEXT:    rldicl r6, r7, 56, 56
-; CHECK-NEXT:    stb r4, 8(r5)
-; CHECK-NEXT:    rldicl r4, r7, 24, 56
+; CHECK-NEXT:    rldicl r6, r4, 48, 56
+; CHECK-NEXT:    stb r6, 8(r5)
+; CHECK-NEXT:    rldicl r6, r4, 56, 56
+; CHECK-NEXT:    rldicl r4, r4, 24, 56
 ; CHECK-NEXT:    stb r6, 10(r5)
 ; CHECK-NEXT:    stb r4, 11(r5)
 ; CHECK-NEXT:    blr
@@ -768,33 +768,33 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, ptr nocapture %b)
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mfvsrd r3, vs34
 ; CHECK-BE-NEXT:    xxswapd vs0, vs34
-; CHECK-BE-NEXT:    rldicl r4, r3, 40, 56
+; CHECK-BE-NEXT:    rldicl r6, r3, 40, 56
+; CHECK-BE-NEXT:    mffprd r4, f0
+; CHECK-BE-NEXT:    stb r6, 0(r5)
 ; CHECK-BE-NEXT:    clrldi r6, r3, 56
-; CHECK-BE-NEXT:    stb r4, 0(r5)
-; CHECK-BE-NEXT:    rldicl r4, r3, 56, 56
-; CHECK-BE-NEXT:    mffprd r7, f0
 ; CHECK-BE-NEXT:    stb r6, 3(r5)
+; CHECK-BE-NEXT:    rldicl r6, r3, 56, 56
+; CHECK-BE-NEXT:    stb r6, 4(r5)
 ; CHECK-BE-NEXT:    rldicl r6, r3, 8, 56
-; CHECK-BE-NEXT:    stb r4, 4(r5)
-; CHECK-BE-NEXT:    rldicl r4, r3, 24, 56
 ; CHECK-BE-NEXT:    stb r6, 5(r5)
+; CHECK-BE-NEXT:    rldicl r6, r3, 24, 56
+; CHECK-BE-NEXT:    stb r6, 8(r5)
 ; CHECK-BE-NEXT:    rldicl r6, r3, 16, 56
-; CHECK-BE-NEXT:    stb r4, 8(r5)
-; CHECK-BE-NEXT:    rldicl r4, r7, 40, 56
+; CHECK-BE-NEXT:    rldicl r3, r3, 48, 56
 ; CHECK-BE-NEXT:    stb r6, 10(r5)
-; CHECK-BE-NEXT:    rldicl r6, r7, 16, 56
-; CHECK-BE-NEXT:    stb r4, 1(r5)
-; CHECK-BE-NEXT:    rldicl r4, r7, 32, 56
+; CHECK-BE-NEXT:    rldicl r6, r4, 40, 56
+; CHECK-BE-NEXT:    stb r3, 11(r5)
+; CHECK-BE-NEXT:    rldicl r3, r4, 56, 56
+; CHECK-BE-NEXT:    stb r6, 1(r5)
+; CHECK-BE-NEXT:    rldicl r6, r4, 16, 56
+; CHECK-BE-NEXT:    stb r3, 12(r5)
 ; CHECK-BE-NEXT:    stb r6, 2(r5)
-; CHECK-BE-NEXT:    rldicl r6, r7, 48, 56
-; CHECK-BE-NEXT:    stb r4, 6(r5)
-; CHECK-BE-NEXT:    clrldi r4, r7, 56
+; CHECK-BE-NEXT:    rldicl r6, r4, 32, 56
+; CHECK-BE-NEXT:    stb r6, 6(r5)
+; CHECK-BE-NEXT:    rldicl r6, r4, 48, 56
 ; CHECK-BE-NEXT:    stb r6, 7(r5)
-; CHECK-BE-NEXT:    rldicl r3, r3, 48, 56
-; CHECK-BE-NEXT:    rldicl r6, r7, 56, 56
-; CHECK-BE-NEXT:    stb r4, 9(r5)
-; CHECK-BE-NEXT:    stb r3, 11(r5)
-; CHECK-BE-NEXT:    stb r6, 12(r5)
+; CHECK-BE-NEXT:    clrldi r6, r4, 56
+; CHECK-BE-NEXT:    stb r6, 9(r5)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test_13_consecutive_stores_of_bytes:
@@ -967,24 +967,24 @@ entry:
 define dso_local void @test_elements_from_three_vec(<4 x float> %a, <4 x float> %b, <4 x float> %c, ptr nocapture %d) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_elements_from_three_vec:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li r3, 4
 ; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
 ; CHECK-NEXT:    xxsldwi vs1, vs36, vs36, 1
-; CHECK-NEXT:    li r3, 4
-; CHECK-NEXT:    li r4, 8
 ; CHECK-NEXT:    stxsiwx vs35, r9, r3
+; CHECK-NEXT:    li r3, 8
 ; CHECK-NEXT:    stfiwx f0, 0, r9
-; CHECK-NEXT:    stfiwx f1, r9, r4
+; CHECK-NEXT:    stfiwx f1, r9, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_elements_from_three_vec:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs35, vs35, 1
 ; CHECK-BE-NEXT:    li r3, 4
-; CHECK-BE-NEXT:    li r4, 8
-; CHECK-BE-NEXT:    stxsiwx vs36, r9, r4
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
 ; CHECK-BE-NEXT:    stfiwx f1, r9, r3
+; CHECK-BE-NEXT:    li r3, 8
 ; CHECK-BE-NEXT:    stfiwx f0, 0, r9
+; CHECK-BE-NEXT:    stxsiwx vs36, r9, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test_elements_from_three_vec:

diff  --git a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
index dca85a6750adf98..b3d2457d31eebc6 100644
--- a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
@@ -438,13 +438,13 @@ define fp128 @testNestedAggregate(ptr byval(%struct.MixedC) nocapture readonly a
 ;
 ; CHECK-P8-LABEL: testNestedAggregate:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r11, 32
 ; CHECK-P8-NEXT:    std r8, 72(r1)
 ; CHECK-P8-NEXT:    std r7, 64(r1)
+; CHECK-P8-NEXT:    li r7, 32
+; CHECK-P8-NEXT:    addi r8, r1, 32
 ; CHECK-P8-NEXT:    std r9, 80(r1)
 ; CHECK-P8-NEXT:    std r10, 88(r1)
-; CHECK-P8-NEXT:    addi r7, r1, 32
-; CHECK-P8-NEXT:    lxvd2x vs0, r7, r11
+; CHECK-P8-NEXT:    lxvd2x vs0, r8, r7
 ; CHECK-P8-NEXT:    std r3, 32(r1)
 ; CHECK-P8-NEXT:    std r4, 40(r1)
 ; CHECK-P8-NEXT:    std r5, 48(r1)
@@ -472,10 +472,10 @@ define fp128 @testUnion_01([1 x i128] %a.coerce) {
 ;
 ; CHECK-P8-LABEL: testUnion_01:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addi r5, r1, -16
-; CHECK-P8-NEXT:    std r4, -8(r1)
 ; CHECK-P8-NEXT:    std r3, -16(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    addi r3, r1, -16
+; CHECK-P8-NEXT:    std r4, -8(r1)
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
@@ -499,10 +499,10 @@ define fp128 @testUnion_02([1 x i128] %a.coerce) {
 ;
 ; CHECK-P8-LABEL: testUnion_02:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addi r5, r1, -16
-; CHECK-P8-NEXT:    std r4, -8(r1)
 ; CHECK-P8-NEXT:    std r3, -16(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    addi r3, r1, -16
+; CHECK-P8-NEXT:    std r4, -8(r1)
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    blr
 
@@ -597,16 +597,16 @@ define fp128 @sum_float128(i32 signext %count, ...) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
-; CHECK-P8-NEXT:    addis r11, r2, .LCPI17_0 at toc@ha
-; CHECK-P8-NEXT:    cmpwi r3, 0
 ; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    std r4, 104(r1)
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI17_0 at toc@ha
+; CHECK-P8-NEXT:    cmpwi r3, 0
 ; CHECK-P8-NEXT:    std r5, 112(r1)
 ; CHECK-P8-NEXT:    std r6, 120(r1)
-; CHECK-P8-NEXT:    addi r11, r11, .LCPI17_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r11
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI17_0 at toc@l
 ; CHECK-P8-NEXT:    std r7, 128(r1)
 ; CHECK-P8-NEXT:    std r8, 136(r1)
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    std r9, 144(r1)
 ; CHECK-P8-NEXT:    std r10, 152(r1)
 ; CHECK-P8-NEXT:    xxswapd v3, vs0

diff  --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll
index bc1c7399bef1308..98e114f101bdf42 100644
--- a/llvm/test/CodeGen/PowerPC/f128-arith.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll
@@ -558,9 +558,9 @@ define fp128 @qp_minnum(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl fminf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -601,9 +601,9 @@ define fp128 @qp_maxnum(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl fmaxf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -644,9 +644,9 @@ define fp128 @qp_pow(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl powf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -771,8 +771,7 @@ define dso_local void @qp_powi(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-P8-NEXT:    mr r5, r3
+; CHECK-P8-NEXT:    lwz r5, 0(r4)
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __powikf2
 ; CHECK-P8-NEXT:    nop
@@ -825,13 +824,13 @@ define fp128 @qp_frem() #0 {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl fmodf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -1284,11 +1283,11 @@ define dso_local void @qpFMA(ptr %a, ptr %b, ptr %c, ptr %res) {
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-P8-NEXT:    bl fmaf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2

diff  --git a/llvm/test/CodeGen/PowerPC/f128-bitcast.ll b/llvm/test/CodeGen/PowerPC/f128-bitcast.ll
index 5ea1ace91a394d3..ffbfbd0c64ff3f0 100644
--- a/llvm/test/CodeGen/PowerPC/f128-bitcast.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-bitcast.ll
@@ -75,8 +75,8 @@ define i64 @checkBitcast(fp128 %in, <2 x i64> %in2, ptr %out) local_unnamed_addr
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    vaddudm v2, v2, v3
 ; CHECK-P8-NEXT:    mffprd r3, f0
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
+; CHECK-P8-NEXT:    xxswapd vs1, v2
+; CHECK-P8-NEXT:    stxvd2x vs1, 0, r7
 ; CHECK-P8-NEXT:    blr
 entry:
   %0 = bitcast fp128 %in to <2 x i64>

diff  --git a/llvm/test/CodeGen/PowerPC/f128-compare.ll b/llvm/test/CodeGen/PowerPC/f128-compare.ll
index 8eb8ae64c0aa6bf..a03049a9945dc50 100644
--- a/llvm/test/CodeGen/PowerPC/f128-compare.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-compare.ll
@@ -32,13 +32,13 @@ define dso_local signext i32 @greater_qp() {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __gtkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -80,13 +80,13 @@ define dso_local signext i32 @less_qp() {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __ltkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -126,13 +126,13 @@ define dso_local signext i32 @greater_eq_qp() {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __gekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -173,13 +173,13 @@ define dso_local signext i32 @less_eq_qp() {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __lekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -222,13 +222,13 @@ define dso_local signext i32 @equal_qp() {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __eqkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    cntlzw r3, r3
@@ -268,13 +268,13 @@ define dso_local signext i32 @not_greater_qp() {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __gtkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -317,13 +317,13 @@ define dso_local signext i32 @not_less_qp() {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __ltkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -365,13 +365,13 @@ define dso_local signext i32 @not_greater_eq_qp() {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __gekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -412,13 +412,13 @@ define dso_local signext i32 @not_less_eq_qp() {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __lekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -460,13 +460,13 @@ define dso_local signext i32 @not_equal_qp() {
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __nekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    cntlzw r3, r3
@@ -510,19 +510,19 @@ define fp128 @greater_sel_qp() {
 ; CHECK-P8-NEXT:    .cfi_offset v30, -32
 ; CHECK-P8-NEXT:    .cfi_offset v31, -16
 ; CHECK-P8-NEXT:    li r3, 48
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 64
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    xxswapd v30, vs1
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v31, vs0
-; CHECK-P8-NEXT:    vmr v3, v30
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    vmr v2, v31
+; CHECK-P8-NEXT:    xxswapd v30, vs0
+; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    bl __gtkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    cmpwi r3, 0
@@ -573,19 +573,19 @@ define fp128 @less_sel_qp() {
 ; CHECK-P8-NEXT:    .cfi_offset v30, -32
 ; CHECK-P8-NEXT:    .cfi_offset v31, -16
 ; CHECK-P8-NEXT:    li r3, 48
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 64
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    xxswapd v30, vs1
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v31, vs0
-; CHECK-P8-NEXT:    vmr v3, v30
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    vmr v2, v31
+; CHECK-P8-NEXT:    xxswapd v30, vs0
+; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    bl __ltkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    cmpwi r3, 0
@@ -637,19 +637,19 @@ define fp128 @greater_eq_sel_qp() {
 ; CHECK-P8-NEXT:    .cfi_offset v30, -32
 ; CHECK-P8-NEXT:    .cfi_offset v31, -16
 ; CHECK-P8-NEXT:    li r3, 48
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 64
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    xxswapd v30, vs1
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v31, vs0
-; CHECK-P8-NEXT:    vmr v3, v30
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    vmr v2, v31
+; CHECK-P8-NEXT:    xxswapd v30, vs0
+; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    bl __gekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    cmpwi r3, -1
@@ -701,19 +701,19 @@ define fp128 @less_eq_sel_qp() {
 ; CHECK-P8-NEXT:    .cfi_offset v30, -32
 ; CHECK-P8-NEXT:    .cfi_offset v31, -16
 ; CHECK-P8-NEXT:    li r3, 48
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 64
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    xxswapd v30, vs1
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v31, vs0
-; CHECK-P8-NEXT:    vmr v3, v30
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    vmr v2, v31
+; CHECK-P8-NEXT:    xxswapd v30, vs0
+; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    bl __lekf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    cmpwi r3, 1
@@ -764,19 +764,19 @@ define fp128 @equal_sel_qp() {
 ; CHECK-P8-NEXT:    .cfi_offset v30, -32
 ; CHECK-P8-NEXT:    .cfi_offset v31, -16
 ; CHECK-P8-NEXT:    li r3, 48
-; CHECK-P8-NEXT:    addis r4, r2, b_qp at toc@ha
 ; CHECK-P8-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    li r3, 64
-; CHECK-P8-NEXT:    addi r4, r4, b_qp at toc@l
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addi r3, r3, a_qp at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    xxswapd v30, vs1
+; CHECK-P8-NEXT:    addis r3, r2, b_qp at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, b_qp at toc@l
 ; CHECK-P8-NEXT:    xxswapd v31, vs0
-; CHECK-P8-NEXT:    vmr v3, v30
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    vmr v2, v31
+; CHECK-P8-NEXT:    xxswapd v30, vs0
+; CHECK-P8-NEXT:    vmr v3, v30
 ; CHECK-P8-NEXT:    bl __eqkf2
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    cmplwi r3, 0

diff  --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll
index 9802de15dcde645..d8eed1fb4092cee 100644
--- a/llvm/test/CodeGen/PowerPC/f128-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll
@@ -82,8 +82,8 @@ define void @sdwConv2qp_01(ptr nocapture %a, i128 %b) {
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r4
-; CHECK-P8-NEXT:    mr r4, r5
 ; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r4, r5
 ; CHECK-P8-NEXT:    bl __floattikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -119,12 +119,11 @@ define void @sdwConv2qp_02(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    ld r4, 16(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    ld r3, 16(r3)
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -162,8 +161,7 @@ define void @sdwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, 0(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    ld r3, 0(r4)
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -204,9 +202,9 @@ define void @sdwConv2qp_04(ptr nocapture %a, i1 %b) {
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    andi. r3, r4, 1
+; CHECK-P8-NEXT:    li r3, 0
 ; CHECK-P8-NEXT:    li r4, -1
 ; CHECK-P8-NEXT:    std r0, 64(r1)
-; CHECK-P8-NEXT:    li r3, 0
 ; CHECK-P8-NEXT:    iselgt r3, r4, r3
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
@@ -293,8 +291,8 @@ define void @udwConv2qp_01(ptr nocapture %a, i128 %b) {
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r4
-; CHECK-P8-NEXT:    mr r4, r5
 ; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r4, r5
 ; CHECK-P8-NEXT:    bl __floatuntikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -330,12 +328,11 @@ define void @udwConv2qp_02(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC1 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC1 at toc@l(r4)
-; CHECK-P8-NEXT:    ld r4, 32(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC1 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC1 at toc@l(r3)
+; CHECK-P8-NEXT:    ld r3, 32(r3)
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -373,8 +370,7 @@ define void @udwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, 0(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    ld r3, 0(r4)
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -448,12 +444,11 @@ define ptr @sdwConv2qp_testXForm(ptr returned %sink,
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lis r5, 1
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ori r5, r5, 7797
-; CHECK-P8-NEXT:    ldx r4, r4, r5
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    lis r3, 1
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ori r3, r3, 7797
+; CHECK-P8-NEXT:    ldx r3, r4, r3
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -493,12 +488,11 @@ define ptr @udwConv2qp_testXForm(ptr returned %sink,
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lis r5, 1
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ori r5, r5, 7797
-; CHECK-P8-NEXT:    ldx r4, r4, r5
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    lis r3, 1
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ori r3, r3, 7797
+; CHECK-P8-NEXT:    ldx r3, r4, r3
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -574,8 +568,7 @@ define void @swConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    lwa r4, 0(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    lwa r3, 0(r4)
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -613,12 +606,11 @@ define void @swConv2qp_03(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC2 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC2 at toc@l(r4)
-; CHECK-P8-NEXT:    lwa r4, 12(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC2 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC2 at toc@l(r3)
+; CHECK-P8-NEXT:    lwa r3, 12(r3)
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -692,8 +684,7 @@ define void @uwConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    lwz r4, 0(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    lwz r3, 0(r4)
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -731,12 +722,11 @@ define void @uwConv2qp_03(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC3 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC3 at toc@l(r4)
-; CHECK-P8-NEXT:    lwz r4, 12(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC3 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC3 at toc@l(r3)
+; CHECK-P8-NEXT:    lwz r3, 12(r3)
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -854,8 +844,7 @@ define void @uhwConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    lhz r4, 0(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    lhz r3, 0(r4)
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -893,12 +882,11 @@ define void @uhwConv2qp_03(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC4 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC4 at toc@l(r4)
-; CHECK-P8-NEXT:    lhz r4, 6(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC4 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC4 at toc@l(r3)
+; CHECK-P8-NEXT:    lhz r3, 6(r3)
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -1017,8 +1005,7 @@ define void @ubConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    lbz r4, 0(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    lbz r3, 0(r4)
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -1056,12 +1043,11 @@ define void @ubConv2qp_03(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC5 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC5 at toc@l(r4)
-; CHECK-P8-NEXT:    lbz r4, 2(r4)
-; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC5 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC5 at toc@l(r3)
+; CHECK-P8-NEXT:    lbz r3, 2(r3)
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -1183,11 +1169,11 @@ define void @qpConv2dp_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC6 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC6 at toc@l(r4)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC6 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC6 at toc@l(r3)
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __trunckfdf2
 ; CHECK-P8-NEXT:    nop
@@ -1226,12 +1212,12 @@ define void @qpConv2dp_03(ptr nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
-; CHECK-P8-NEXT:    mr r30, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LC7 at toc@ha
-; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r29, r3
-; CHECK-P8-NEXT:    ld r4, .LC7 at toc@l(r4)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC7 at toc@ha
+; CHECK-P8-NEXT:    std r0, 80(r1)
+; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    ld r3, .LC7 at toc@l(r3)
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __trunckfdf2
 ; CHECK-P8-NEXT:    nop
@@ -1274,9 +1260,9 @@ define void @qpConv2dp_04(ptr nocapture readonly %a, ptr nocapture readonly %b,
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __trunckfdf2
@@ -1348,11 +1334,11 @@ define void @qpConv2sp_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC6 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC6 at toc@l(r4)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC6 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC6 at toc@l(r3)
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __trunckfsf2
 ; CHECK-P8-NEXT:    nop
@@ -1392,13 +1378,13 @@ define void @qpConv2sp_03(ptr nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
-; CHECK-P8-NEXT:    mr r30, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LC7 at toc@ha
-; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r29, r3
-; CHECK-P8-NEXT:    ld r4, .LC7 at toc@l(r4)
-; CHECK-P8-NEXT:    addi r4, r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC7 at toc@ha
+; CHECK-P8-NEXT:    std r0, 80(r1)
+; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    ld r3, .LC7 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 48
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __trunckfsf2
 ; CHECK-P8-NEXT:    nop
@@ -1442,9 +1428,9 @@ define void @qpConv2sp_04(ptr nocapture readonly %a, ptr nocapture readonly %b,
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __trunckfsf2

diff  --git a/llvm/test/CodeGen/PowerPC/f128-fma.ll b/llvm/test/CodeGen/PowerPC/f128-fma.ll
index 2bfaecf12009f82..d830727e78fbf1e 100644
--- a/llvm/test/CodeGen/PowerPC/f128-fma.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-fma.ll
@@ -25,15 +25,15 @@ define void @qpFmadd(ptr nocapture readonly %a, ptr nocapture %b,
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    li r7, 48
 ; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r6
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
 ; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xxswapd v31, vs2
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    xxswapd v31, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v31
@@ -79,16 +79,16 @@ define void @qpFmadd_02(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r7, 48
 ; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r6
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    xxswapd v2, vs1
-; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    xxswapd v31, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2
@@ -141,9 +141,9 @@ define void @qpFmadd_03(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
@@ -190,16 +190,16 @@ define void @qpFnmadd(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r7, 64
 ; CHECK-P8-NEXT:    std r30, 80(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r6
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    xxswapd v2, vs1
-; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    xxswapd v31, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2
@@ -259,9 +259,9 @@ define void @qpFnmadd_02(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
@@ -315,16 +315,16 @@ define void @qpFmsub(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r7, 48
 ; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r6
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    xxswapd v2, vs1
-; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    xxswapd v31, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2
@@ -377,9 +377,9 @@ define void @qpFmsub_02(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
@@ -427,16 +427,16 @@ define void @qpFnmsub(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r7, 64
 ; CHECK-P8-NEXT:    std r30, 80(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r6
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    stvx v31, r1, r7 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    xxswapd v2, vs1
-; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    xxswapd v31, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    vmr v3, v2
@@ -496,9 +496,9 @@ define void @qpFnmsub_02(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29

diff  --git a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
index dd0493a12dcfd12..04a7d78d714cc5b 100644
--- a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
@@ -137,11 +137,11 @@ define fp128 @fp128Array(ptr nocapture readonly %farray,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    sldi r4, r4, 4
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    add r4, r3, r4
-; CHECK-P8-NEXT:    addi r4, r4, -16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    add r3, r3, r4
+; CHECK-P8-NEXT:    addi r3, r3, -16
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi r1, r1, 32
@@ -436,16 +436,15 @@ define fp128 @mixParam_02(fp128 %p1, double %p2, ptr nocapture %p3,
 ; CHECK-P8-NEXT:    add r4, r7, r9
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    stfd f31, 72(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:    fmr f31, f1
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-P8-NEXT:    lwz r3, 176(r1)
 ; CHECK-P8-NEXT:    add r4, r4, r10
-; CHECK-P8-NEXT:    fmr f31, f1
 ; CHECK-P8-NEXT:    add r3, r4, r3
 ; CHECK-P8-NEXT:    clrldi r3, r3, 32
 ; CHECK-P8-NEXT:    std r3, 0(r6)
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r8
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxlor v2, vs0, vs0
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    fmr f1, f31
@@ -506,18 +505,17 @@ define fastcc fp128 @mixParam_02f(fp128 %p1, double %p2, ptr nocapture %p3,
 ; CHECK-P8-NEXT:    .cfi_offset f31, -8
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
 ; CHECK-P8-NEXT:    add r4, r4, r6
-; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    li r9, 48
+; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    stfd f31, 72(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:    fmr f31, f1
 ; CHECK-P8-NEXT:    add r4, r4, r7
 ; CHECK-P8-NEXT:    stvx v31, r1, r9 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    fmr f31, f1
 ; CHECK-P8-NEXT:    add r4, r4, r8
 ; CHECK-P8-NEXT:    clrldi r4, r4, 32
 ; CHECK-P8-NEXT:    std r4, 0(r3)
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxlor v2, vs0, vs0
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    fmr f1, f31
@@ -577,15 +575,15 @@ define void @mixParam_03(fp128 %f1, ptr nocapture %d1, <4 x i32> %vec1,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
-; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    xxswapd vs1, v3
 ; CHECK-P8-NEXT:    ld r4, 184(r1)
 ; CHECK-P8-NEXT:    li r3, 48
+; CHECK-P8-NEXT:    xxswapd vs0, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v3
 ; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
-; CHECK-P8-NEXT:    mr r3, r10
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r9
+; CHECK-P8-NEXT:    mr r3, r10
 ; CHECK-P8-NEXT:    stxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r9
 ; CHECK-P8-NEXT:    xxswapd v31, vs0
@@ -641,9 +639,9 @@ define fastcc void @mixParam_03f(fp128 %f1, ptr nocapture %d1, <4 x i32> %vec1,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    .cfi_offset v31, -32
+; CHECK-P8-NEXT:    li r6, 48
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xxswapd vs1, v3
-; CHECK-P8-NEXT:    li r6, 48
 ; CHECK-P8-NEXT:    std r30, 64(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r5

diff  --git a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
index ae6222b1a8a0ba3..ca8911e434e4a6e 100644
--- a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
@@ -62,12 +62,12 @@ define void @qpConv2sdw_02(ptr nocapture %res) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfdi
 ; CHECK-P8-NEXT:    nop
@@ -107,13 +107,13 @@ define i64 @qpConv2sdw_03(ptr nocapture readonly %a) {
 ; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 16
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfdi
@@ -155,9 +155,9 @@ define void @qpConv2sdw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfdi
@@ -201,13 +201,13 @@ define void @qpConv2sdw_testXForm(ptr nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
-; CHECK-P8-NEXT:    mr r30, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r29, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 80(r1)
+; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfdi
 ; CHECK-P8-NEXT:    nop
@@ -281,12 +281,12 @@ define void @qpConv2udw_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixunskfdi
 ; CHECK-P8-NEXT:    nop
@@ -326,13 +326,13 @@ define i64 @qpConv2udw_03(ptr nocapture readonly %a) {
 ; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 16
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixunskfdi
@@ -374,9 +374,9 @@ define void @qpConv2udw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixunskfdi
@@ -420,12 +420,12 @@ define void @qpConv2udw_testXForm(ptr nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
-; CHECK-P8-NEXT:    mr r30, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r29, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 80(r1)
+; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixunskfdi
 ; CHECK-P8-NEXT:    nop
@@ -499,12 +499,12 @@ define void @qpConv2sw_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
@@ -545,13 +545,13 @@ define signext i32 @qpConv2sw_03(ptr nocapture readonly %a)  {
 ; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 16
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -594,9 +594,9 @@ define void @qpConv2sw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -668,12 +668,12 @@ define void @qpConv2uw_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixunskfsi
 ; CHECK-P8-NEXT:    nop
@@ -713,13 +713,13 @@ define zeroext i32 @qpConv2uw_03(ptr nocapture readonly %a)  {
 ; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 16
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixunskfsi
@@ -761,9 +761,9 @@ define void @qpConv2uw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixunskfsi
@@ -837,12 +837,12 @@ define void @qpConv2shw_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
@@ -882,13 +882,13 @@ define signext i16 @qpConv2shw_03(ptr nocapture readonly %a) {
 ; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 16
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -930,9 +930,9 @@ define void @qpConv2shw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1002,12 +1002,12 @@ define void @qpConv2uhw_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
@@ -1046,13 +1046,13 @@ define zeroext i16 @qpConv2uhw_03(ptr nocapture readonly %a) {
 ; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 16
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1093,9 +1093,9 @@ define void @qpConv2uhw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1167,12 +1167,12 @@ define void @qpConv2sb_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
@@ -1212,13 +1212,13 @@ define signext i8 @qpConv2sb_03(ptr nocapture readonly %a) {
 ; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 16
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1260,9 +1260,9 @@ define void @qpConv2sb_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1332,12 +1332,12 @@ define void @qpConv2ub_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-P8-NEXT:    addi r4, r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __fixkfsi
 ; CHECK-P8-NEXT:    nop
@@ -1376,13 +1376,13 @@ define zeroext i8 @qpConv2ub_03(ptr nocapture readonly %a) {
 ; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
-; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-P8-NEXT:    addi r3, r3, 16
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    addi r4, r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -1423,9 +1423,9 @@ define void @qpConv2ub_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    bl __addkf3
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    bl __fixkfsi

diff  --git a/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll
index cbae9924f7bf84f..f7ea279d06e9880 100644
--- a/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll
@@ -22,10 +22,10 @@ define float @can_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4
 define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {
 ; CHECK-LABEL: no_fma_with_fewer_uses:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xsmulsp 0, 3, 4
-; CHECK-NEXT:    xsmulsp 3, 1, 2
-; CHECK-NEXT:    xsmaddasp 0, 1, 2
-; CHECK-NEXT:    xsdivsp 1, 3, 0
+; CHECK-NEXT:    xsmulsp 3, 3, 4
+; CHECK-NEXT:    xsmulsp 0, 1, 2
+; CHECK-NEXT:    xsmaddasp 3, 1, 2
+; CHECK-NEXT:    xsdivsp 1, 0, 3
 ; CHECK-NEXT:    blr
   %mul1 = fmul contract float %f1, %f2
   %mul2 = fmul float %f3, %f4

diff  --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll
index 5423ecf79826633..b39c9327bc8ab2c 100644
--- a/llvm/test/CodeGen/PowerPC/fma-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll
@@ -60,36 +60,34 @@ define dso_local double @fma_combine_two_uses(double %a, double %b, double %c) {
 ; CHECK-FAST:       # %bb.0: # %entry
 ; CHECK-FAST-NEXT:    xsnegdp 0, 1
 ; CHECK-FAST-NEXT:    addis 3, 2, v at toc@ha
-; CHECK-FAST-NEXT:    addis 4, 2, z at toc@ha
 ; CHECK-FAST-NEXT:    xsnmaddadp 1, 3, 2
-; CHECK-FAST-NEXT:    xsnegdp 2, 3
 ; CHECK-FAST-NEXT:    stfd 0, v at toc@l(3)
-; CHECK-FAST-NEXT:    stfd 2, z at toc@l(4)
+; CHECK-FAST-NEXT:    xsnegdp 0, 3
+; CHECK-FAST-NEXT:    addis 3, 2, z at toc@ha
+; CHECK-FAST-NEXT:    stfd 0, z at toc@l(3)
 ; CHECK-FAST-NEXT:    blr
 ;
 ; CHECK-FAST-NOVSX-LABEL: fma_combine_two_uses:
 ; CHECK-FAST-NOVSX:       # %bb.0: # %entry
-; CHECK-FAST-NOVSX-NEXT:    fnmadd 0, 3, 2, 1
-; CHECK-FAST-NOVSX-NEXT:    fneg 2, 1
+; CHECK-FAST-NOVSX-NEXT:    fneg 0, 1
 ; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, v at toc@ha
-; CHECK-FAST-NOVSX-NEXT:    addis 4, 2, z at toc@ha
-; CHECK-FAST-NOVSX-NEXT:    fneg 3, 3
-; CHECK-FAST-NOVSX-NEXT:    fmr 1, 0
-; CHECK-FAST-NOVSX-NEXT:    stfd 2, v at toc@l(3)
-; CHECK-FAST-NOVSX-NEXT:    stfd 3, z at toc@l(4)
+; CHECK-FAST-NOVSX-NEXT:    fnmadd 1, 3, 2, 1
+; CHECK-FAST-NOVSX-NEXT:    stfd 0, v at toc@l(3)
+; CHECK-FAST-NOVSX-NEXT:    fneg 0, 3
+; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, z at toc@ha
+; CHECK-FAST-NOVSX-NEXT:    stfd 0, z at toc@l(3)
 ; CHECK-FAST-NOVSX-NEXT:    blr
 ;
 ; CHECK-LABEL: fma_combine_two_uses:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xsnegdp 3, 3
+; CHECK-NEXT:    xsnegdp 0, 1
 ; CHECK-NEXT:    addis 3, 2, v at toc@ha
-; CHECK-NEXT:    addis 4, 2, z at toc@ha
-; CHECK-NEXT:    xsmuldp 0, 3, 2
-; CHECK-NEXT:    stfd 3, z at toc@l(4)
-; CHECK-NEXT:    xsnegdp 2, 1
-; CHECK-NEXT:    xssubdp 0, 0, 1
-; CHECK-NEXT:    stfd 2, v at toc@l(3)
-; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    stfd 0, v at toc@l(3)
+; CHECK-NEXT:    xsnegdp 0, 3
+; CHECK-NEXT:    addis 3, 2, z at toc@ha
+; CHECK-NEXT:    stfd 0, z at toc@l(3)
+; CHECK-NEXT:    xsmuldp 0, 0, 2
+; CHECK-NEXT:    xssubdp 1, 0, 1
 ; CHECK-NEXT:    blr
 entry:
   %fneg = fneg double %a
@@ -105,29 +103,27 @@ define dso_local double @fma_combine_one_use(double %a, double %b, double %c) {
 ; CHECK-FAST-LABEL: fma_combine_one_use:
 ; CHECK-FAST:       # %bb.0: # %entry
 ; CHECK-FAST-NEXT:    xsnegdp 0, 1
-; CHECK-FAST-NEXT:    addis 3, 2, v at toc@ha
 ; CHECK-FAST-NEXT:    xsnmaddadp 1, 3, 2
+; CHECK-FAST-NEXT:    addis 3, 2, v at toc@ha
 ; CHECK-FAST-NEXT:    stfd 0, v at toc@l(3)
 ; CHECK-FAST-NEXT:    blr
 ;
 ; CHECK-FAST-NOVSX-LABEL: fma_combine_one_use:
 ; CHECK-FAST-NOVSX:       # %bb.0: # %entry
-; CHECK-FAST-NOVSX-NEXT:    fnmadd 0, 3, 2, 1
-; CHECK-FAST-NOVSX-NEXT:    fneg 2, 1
+; CHECK-FAST-NOVSX-NEXT:    fneg 0, 1
+; CHECK-FAST-NOVSX-NEXT:    fnmadd 1, 3, 2, 1
 ; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, v at toc@ha
-; CHECK-FAST-NOVSX-NEXT:    fmr 1, 0
-; CHECK-FAST-NOVSX-NEXT:    stfd 2, v at toc@l(3)
+; CHECK-FAST-NOVSX-NEXT:    stfd 0, v at toc@l(3)
 ; CHECK-FAST-NOVSX-NEXT:    blr
 ;
 ; CHECK-LABEL: fma_combine_one_use:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xsnegdp 0, 3
+; CHECK-NEXT:    xsnegdp 0, 1
 ; CHECK-NEXT:    addis 3, 2, v at toc@ha
+; CHECK-NEXT:    stfd 0, v at toc@l(3)
+; CHECK-NEXT:    xsnegdp 0, 3
 ; CHECK-NEXT:    xsmuldp 0, 0, 2
-; CHECK-NEXT:    xsnegdp 2, 1
-; CHECK-NEXT:    xssubdp 0, 0, 1
-; CHECK-NEXT:    stfd 2, v at toc@l(3)
-; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    xssubdp 1, 0, 1
 ; CHECK-NEXT:    blr
 entry:
   %fneg = fneg double %a
@@ -143,43 +139,43 @@ define dso_local float @fma_combine_no_ice() {
 ; CHECK-FAST:       # %bb.0:
 ; CHECK-FAST-NEXT:    vspltisw 2, 1
 ; CHECK-FAST-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
+; CHECK-FAST-NEXT:    xvcvsxwdp 3, 34
+; CHECK-FAST-NEXT:    lfs 0, .LCPI4_0 at toc@l(3)
 ; CHECK-FAST-NEXT:    lfs 2, 0(3)
-; CHECK-FAST-NEXT:    lfs 1, .LCPI4_0 at toc@l(3)
 ; CHECK-FAST-NEXT:    addis 3, 2, .LCPI4_1 at toc@ha
-; CHECK-FAST-NEXT:    xvcvsxwdp 0, 34
-; CHECK-FAST-NEXT:    xsmaddasp 0, 2, 1
 ; CHECK-FAST-NEXT:    lfs 1, .LCPI4_1 at toc@l(3)
-; CHECK-FAST-NEXT:    xsmaddasp 1, 2, 0
-; CHECK-FAST-NEXT:    xsnmsubasp 1, 0, 2
+; CHECK-FAST-NEXT:    xsmaddasp 3, 2, 0
+; CHECK-FAST-NEXT:    xsmaddasp 1, 2, 3
+; CHECK-FAST-NEXT:    xsnmsubasp 1, 3, 2
 ; CHECK-FAST-NEXT:    blr
 ;
 ; CHECK-FAST-NOVSX-LABEL: fma_combine_no_ice:
 ; CHECK-FAST-NOVSX:       # %bb.0:
+; CHECK-FAST-NOVSX-NEXT:    lfs 0, 0(3)
 ; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
-; CHECK-FAST-NOVSX-NEXT:    lfs 0, .LCPI4_0 at toc@l(3)
+; CHECK-FAST-NOVSX-NEXT:    lfs 1, .LCPI4_0 at toc@l(3)
 ; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, .LCPI4_1 at toc@ha
-; CHECK-FAST-NOVSX-NEXT:    lfs 1, 0(3)
 ; CHECK-FAST-NOVSX-NEXT:    lfs 2, .LCPI4_1 at toc@l(3)
 ; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, .LCPI4_2 at toc@ha
-; CHECK-FAST-NOVSX-NEXT:    fmadds 0, 1, 2, 0
+; CHECK-FAST-NOVSX-NEXT:    fmadds 1, 0, 2, 1
 ; CHECK-FAST-NOVSX-NEXT:    lfs 2, .LCPI4_2 at toc@l(3)
-; CHECK-FAST-NOVSX-NEXT:    fmadds 2, 1, 0, 2
-; CHECK-FAST-NOVSX-NEXT:    fnmsubs 1, 0, 1, 2
+; CHECK-FAST-NOVSX-NEXT:    fmadds 2, 0, 1, 2
+; CHECK-FAST-NOVSX-NEXT:    fnmsubs 1, 1, 0, 2
 ; CHECK-FAST-NOVSX-NEXT:    blr
 ;
 ; CHECK-LABEL: fma_combine_no_ice:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vspltisw 2, 1
 ; CHECK-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
+; CHECK-NEXT:    xvcvsxwdp 3, 34
+; CHECK-NEXT:    lfs 0, .LCPI4_0 at toc@l(3)
 ; CHECK-NEXT:    lfs 2, 0(3)
-; CHECK-NEXT:    lfs 3, .LCPI4_0 at toc@l(3)
 ; CHECK-NEXT:    addis 3, 2, .LCPI4_1 at toc@ha
 ; CHECK-NEXT:    lfs 1, .LCPI4_1 at toc@l(3)
-; CHECK-NEXT:    xvcvsxwdp 0, 34
-; CHECK-NEXT:    fmr 4, 0
-; CHECK-NEXT:    xsmaddasp 0, 2, 3
-; CHECK-NEXT:    xsnmaddasp 4, 2, 3
-; CHECK-NEXT:    xsmaddasp 1, 2, 0
+; CHECK-NEXT:    fmr 4, 3
+; CHECK-NEXT:    xsmaddasp 3, 2, 0
+; CHECK-NEXT:    xsnmaddasp 4, 2, 0
+; CHECK-NEXT:    xsmaddasp 1, 2, 3
 ; CHECK-NEXT:    xsmaddasp 1, 4, 2
 ; CHECK-NEXT:    blr
   %tmp = load float, ptr undef, align 4
@@ -203,21 +199,21 @@ define dso_local double @getNegatedExpression_crash(double %x, double %y) {
 ; CHECK-FAST:       # %bb.0:
 ; CHECK-FAST-NEXT:    vspltisw 2, -1
 ; CHECK-FAST-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
-; CHECK-FAST-NEXT:    lfs 4, .LCPI5_0 at toc@l(3)
-; CHECK-FAST-NEXT:    xvcvsxwdp 3, 34
-; CHECK-FAST-NEXT:    xssubdp 0, 1, 3
-; CHECK-FAST-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
-; CHECK-FAST-NEXT:    xsmaddadp 3, 1, 4
-; CHECK-FAST-NEXT:    xsmaddadp 0, 3, 2
+; CHECK-FAST-NEXT:    xvcvsxwdp 4, 34
+; CHECK-FAST-NEXT:    lfs 3, .LCPI5_0 at toc@l(3)
+; CHECK-FAST-NEXT:    xssubdp 0, 1, 4
+; CHECK-FAST-NEXT:    # kill: def $f4 killed $f4 killed $vsl4
+; CHECK-FAST-NEXT:    xsmaddadp 4, 1, 3
+; CHECK-FAST-NEXT:    xsmaddadp 0, 4, 2
 ; CHECK-FAST-NEXT:    fmr 1, 0
 ; CHECK-FAST-NEXT:    blr
 ;
 ; CHECK-FAST-NOVSX-LABEL: getNegatedExpression_crash:
 ; CHECK-FAST-NOVSX:       # %bb.0:
 ; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
-; CHECK-FAST-NOVSX-NEXT:    addis 4, 2, .LCPI5_1 at toc@ha
 ; CHECK-FAST-NOVSX-NEXT:    lfs 0, .LCPI5_0 at toc@l(3)
-; CHECK-FAST-NOVSX-NEXT:    lfs 3, .LCPI5_1 at toc@l(4)
+; CHECK-FAST-NOVSX-NEXT:    addis 3, 2, .LCPI5_1 at toc@ha
+; CHECK-FAST-NOVSX-NEXT:    lfs 3, .LCPI5_1 at toc@l(3)
 ; CHECK-FAST-NOVSX-NEXT:    fmadd 3, 1, 3, 0
 ; CHECK-FAST-NOVSX-NEXT:    fsub 0, 1, 0
 ; CHECK-FAST-NOVSX-NEXT:    fmadd 1, 3, 2, 0
@@ -227,12 +223,12 @@ define dso_local double @getNegatedExpression_crash(double %x, double %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vspltisw 2, -1
 ; CHECK-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
-; CHECK-NEXT:    lfs 4, .LCPI5_0 at toc@l(3)
-; CHECK-NEXT:    xvcvsxwdp 3, 34
-; CHECK-NEXT:    xssubdp 0, 1, 3
-; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
-; CHECK-NEXT:    xsmaddadp 3, 1, 4
-; CHECK-NEXT:    xsmaddadp 0, 3, 2
+; CHECK-NEXT:    xvcvsxwdp 4, 34
+; CHECK-NEXT:    lfs 3, .LCPI5_0 at toc@l(3)
+; CHECK-NEXT:    xssubdp 0, 1, 4
+; CHECK-NEXT:    # kill: def $f4 killed $f4 killed $vsl4
+; CHECK-NEXT:    xsmaddadp 4, 1, 3
+; CHECK-NEXT:    xsmaddadp 0, 4, 2
 ; CHECK-NEXT:    fmr 1, 0
 ; CHECK-NEXT:    blr
   %neg = fneg reassoc double %x
@@ -334,10 +330,10 @@ define dso_local double @fma_combine_const(double %a, double %b) {
 ; CHECK-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
 ; CHECK-NEXT:    lfd 0, .LCPI9_0 at toc@l(3)
 ; CHECK-NEXT:    addis 3, 2, .LCPI9_1 at toc@ha
-; CHECK-NEXT:    lfd 3, .LCPI9_1 at toc@l(3)
 ; CHECK-NEXT:    xsmuldp 0, 1, 0
+; CHECK-NEXT:    lfd 1, .LCPI9_1 at toc@l(3)
+; CHECK-NEXT:    xsmaddadp 2, 0, 1
 ; CHECK-NEXT:    fmr 1, 2
-; CHECK-NEXT:    xsmaddadp 1, 0, 3
 ; CHECK-NEXT:    blr
 entry:
   %0 = fmul double %a, 1.1

diff  --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index d90103cc7b9d053..58b3ee485ea4b1c 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -302,31 +302,31 @@ define float @fmul_fma_fast2(float %x) {
 define float @sqrt_afn_ieee(float %x) #0 {
 ; FMF-LABEL: sqrt_afn_ieee:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    xsabsdp 0, 1
 ; FMF-NEXT:    addis 3, 2, .LCPI11_1 at toc@ha
+; FMF-NEXT:    xsabsdp 0, 1
 ; FMF-NEXT:    lfs 2, .LCPI11_1 at toc@l(3)
 ; FMF-NEXT:    fcmpu 0, 0, 2
 ; FMF-NEXT:    xxlxor 0, 0, 0
 ; FMF-NEXT:    blt 0, .LBB11_2
 ; FMF-NEXT:  # %bb.1:
-; FMF-NEXT:    xsrsqrtesp 0, 1
-; FMF-NEXT:    vspltisw 2, -3
+; FMF-NEXT:    xsrsqrtesp 2, 1
 ; FMF-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
-; FMF-NEXT:    lfs 3, .LCPI11_0 at toc@l(3)
-; FMF-NEXT:    xvcvsxwdp 2, 34
-; FMF-NEXT:    xsmulsp 1, 1, 0
-; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:    xsmulsp 1, 1, 3
-; FMF-NEXT:    xsaddsp 0, 0, 2
+; FMF-NEXT:    vspltisw 2, -3
+; FMF-NEXT:    lfs 0, .LCPI11_0 at toc@l(3)
+; FMF-NEXT:    xsmulsp 1, 1, 2
 ; FMF-NEXT:    xsmulsp 0, 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 2
+; FMF-NEXT:    xvcvsxwdp 2, 34
+; FMF-NEXT:    xsaddsp 1, 1, 2
+; FMF-NEXT:    xsmulsp 0, 0, 1
 ; FMF-NEXT:  .LBB11_2:
 ; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: sqrt_afn_ieee:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    xsabsdp 0, 1
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI11_1 at toc@ha
+; GLOBAL-NEXT:    xsabsdp 0, 1
 ; GLOBAL-NEXT:    lfs 2, .LCPI11_1 at toc@l(3)
 ; GLOBAL-NEXT:    fcmpu 0, 0, 2
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
@@ -335,11 +335,11 @@ define float @sqrt_afn_ieee(float %x) #0 {
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 ; GLOBAL-NEXT:    vspltisw 2, -3
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
-; GLOBAL-NEXT:    lfs 3, .LCPI11_0 at toc@l(3)
 ; GLOBAL-NEXT:    xvcvsxwdp 2, 34
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
-; GLOBAL-NEXT:    xsmulsp 0, 1, 3
+; GLOBAL-NEXT:    lfs 0, .LCPI11_0 at toc@l(3)
+; GLOBAL-NEXT:    xsmulsp 0, 1, 0
 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 ; GLOBAL-NEXT:  .LBB11_2:
 ; GLOBAL-NEXT:    fmr 1, 0
@@ -378,15 +378,15 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
 ; FMF-NEXT:    beq 0, .LBB13_2
 ; FMF-NEXT:  # %bb.1:
 ; FMF-NEXT:    xsrsqrtesp 0, 1
-; FMF-NEXT:    vspltisw 2, -3
 ; FMF-NEXT:    addis 3, 2, .LCPI13_0 at toc@ha
-; FMF-NEXT:    lfs 3, .LCPI13_0 at toc@l(3)
-; FMF-NEXT:    xvcvsxwdp 2, 34
+; FMF-NEXT:    vspltisw 2, -3
+; FMF-NEXT:    lfs 2, .LCPI13_0 at toc@l(3)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    xsmulsp 2, 1, 2
 ; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:    xsmulsp 1, 1, 3
-; FMF-NEXT:    xsaddsp 0, 0, 2
-; FMF-NEXT:    xsmulsp 0, 1, 0
+; FMF-NEXT:    xvcvsxwdp 1, 34
+; FMF-NEXT:    xsaddsp 0, 0, 1
+; FMF-NEXT:    xsmulsp 0, 2, 0
 ; FMF-NEXT:  .LBB13_2:
 ; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
@@ -400,11 +400,11 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 ; GLOBAL-NEXT:    vspltisw 2, -3
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI13_0 at toc@ha
-; GLOBAL-NEXT:    lfs 3, .LCPI13_0 at toc@l(3)
 ; GLOBAL-NEXT:    xvcvsxwdp 2, 34
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
-; GLOBAL-NEXT:    xsmulsp 0, 1, 3
+; GLOBAL-NEXT:    lfs 0, .LCPI13_0 at toc@l(3)
+; GLOBAL-NEXT:    xsmulsp 0, 1, 0
 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 ; GLOBAL-NEXT:  .LBB13_2:
 ; GLOBAL-NEXT:    fmr 1, 0
@@ -440,8 +440,8 @@ define float @sqrt_afn_preserve_sign_inf(float %x) #1 {
 define float @sqrt_fast_ieee(float %x) #0 {
 ; FMF-LABEL: sqrt_fast_ieee:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    xsabsdp 0, 1
 ; FMF-NEXT:    addis 3, 2, .LCPI15_1 at toc@ha
+; FMF-NEXT:    xsabsdp 0, 1
 ; FMF-NEXT:    lfs 2, .LCPI15_1 at toc@l(3)
 ; FMF-NEXT:    fcmpu 0, 0, 2
 ; FMF-NEXT:    xxlxor 0, 0, 0
@@ -450,11 +450,11 @@ define float @sqrt_fast_ieee(float %x) #0 {
 ; FMF-NEXT:    xsrsqrtesp 0, 1
 ; FMF-NEXT:    vspltisw 2, -3
 ; FMF-NEXT:    addis 3, 2, .LCPI15_0 at toc@ha
-; FMF-NEXT:    lfs 3, .LCPI15_0 at toc@l(3)
 ; FMF-NEXT:    xvcvsxwdp 2, 34
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    xsmaddasp 2, 1, 0
-; FMF-NEXT:    xsmulsp 0, 1, 3
+; FMF-NEXT:    lfs 0, .LCPI15_0 at toc@l(3)
+; FMF-NEXT:    xsmulsp 0, 1, 0
 ; FMF-NEXT:    xsmulsp 0, 0, 2
 ; FMF-NEXT:  .LBB15_2:
 ; FMF-NEXT:    fmr 1, 0
@@ -462,8 +462,8 @@ define float @sqrt_fast_ieee(float %x) #0 {
 ;
 ; GLOBAL-LABEL: sqrt_fast_ieee:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    xsabsdp 0, 1
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI15_1 at toc@ha
+; GLOBAL-NEXT:    xsabsdp 0, 1
 ; GLOBAL-NEXT:    lfs 2, .LCPI15_1 at toc@l(3)
 ; GLOBAL-NEXT:    fcmpu 0, 0, 2
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
@@ -472,11 +472,11 @@ define float @sqrt_fast_ieee(float %x) #0 {
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 ; GLOBAL-NEXT:    vspltisw 2, -3
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI15_0 at toc@ha
-; GLOBAL-NEXT:    lfs 3, .LCPI15_0 at toc@l(3)
 ; GLOBAL-NEXT:    xvcvsxwdp 2, 34
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
-; GLOBAL-NEXT:    xsmulsp 0, 1, 3
+; GLOBAL-NEXT:    lfs 0, .LCPI15_0 at toc@l(3)
+; GLOBAL-NEXT:    xsmulsp 0, 1, 0
 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 ; GLOBAL-NEXT:  .LBB15_2:
 ; GLOBAL-NEXT:    fmr 1, 0
@@ -505,11 +505,11 @@ define float @sqrt_fast_preserve_sign(float %x) #1 {
 ; FMF-NEXT:    xsrsqrtesp 0, 1
 ; FMF-NEXT:    vspltisw 2, -3
 ; FMF-NEXT:    addis 3, 2, .LCPI16_0 at toc@ha
-; FMF-NEXT:    lfs 3, .LCPI16_0 at toc@l(3)
 ; FMF-NEXT:    xvcvsxwdp 2, 34
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    xsmaddasp 2, 1, 0
-; FMF-NEXT:    xsmulsp 0, 1, 3
+; FMF-NEXT:    lfs 0, .LCPI16_0 at toc@l(3)
+; FMF-NEXT:    xsmulsp 0, 1, 0
 ; FMF-NEXT:    xsmulsp 0, 0, 2
 ; FMF-NEXT:  .LBB16_2:
 ; FMF-NEXT:    fmr 1, 0
@@ -524,11 +524,11 @@ define float @sqrt_fast_preserve_sign(float %x) #1 {
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 ; GLOBAL-NEXT:    vspltisw 2, -3
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI16_0 at toc@ha
-; GLOBAL-NEXT:    lfs 3, .LCPI16_0 at toc@l(3)
 ; GLOBAL-NEXT:    xvcvsxwdp 2, 34
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
-; GLOBAL-NEXT:    xsmulsp 0, 1, 3
+; GLOBAL-NEXT:    lfs 0, .LCPI16_0 at toc@l(3)
+; GLOBAL-NEXT:    xsmulsp 0, 1, 0
 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 ; GLOBAL-NEXT:  .LBB16_2:
 ; GLOBAL-NEXT:    fmr 1, 0

diff  --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll b/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll
index 9cb70bad3e6f26a..306212ec64fae3c 100644
--- a/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll
+++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll
@@ -26,13 +26,13 @@ define void @foo_multiple_use(i32 signext %var1) {
 ; CHECK-LABEL: foo_multiple_use:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xori r3, r3, 1
-; CHECK-NEXT:    addis r4, r2, res2 at toc@ha
-; CHECK-NEXT:    addis r6, r2, res at toc@ha
+; CHECK-NEXT:    addis r5, r2, res2 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
-; CHECK-NEXT:    srwi r5, r3, 5
+; CHECK-NEXT:    srwi r4, r3, 5
 ; CHECK-NEXT:    rlwinm r3, r3, 14, 0, 12
-; CHECK-NEXT:    stw r5, res2 at toc@l(r4)
-; CHECK-NEXT:    stw r3, res at toc@l(r6)
+; CHECK-NEXT:    stw r4, res2 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, res at toc@ha
+; CHECK-NEXT:    stw r3, res at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp eq i32 %var1, 1

diff  --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll b/llvm/test/CodeGen/PowerPC/fp-classify.ll
index 796a9be06bf9ccd..2079ca992629450 100644
--- a/llvm/test/CodeGen/PowerPC/fp-classify.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-classify.ll
@@ -7,8 +7,8 @@
 define zeroext i1 @abs_isinff(float %x) {
 ; P8-LABEL: abs_isinff:
 ; P8:       # %bb.0: # %entry
-; P8-NEXT:    xsabsdp 0, 1
 ; P8-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; P8-NEXT:    xsabsdp 0, 1
 ; P8-NEXT:    li 4, 1
 ; P8-NEXT:    lfs 1, .LCPI0_0 at toc@l(3)
 ; P8-NEXT:    li 3, 0
@@ -35,8 +35,8 @@ entry:
 define zeroext i1 @abs_isinf(double %x) {
 ; P8-LABEL: abs_isinf:
 ; P8:       # %bb.0: # %entry
-; P8-NEXT:    xsabsdp 0, 1
 ; P8-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; P8-NEXT:    xsabsdp 0, 1
 ; P8-NEXT:    li 4, 1
 ; P8-NEXT:    lfs 1, .LCPI1_0 at toc@l(3)
 ; P8-NEXT:    li 3, 0
@@ -109,8 +109,8 @@ entry:
 define <4 x i1> @abs_isinfv4f32(<4 x float> %x) {
 ; P8-LABEL: abs_isinfv4f32:
 ; P8:       # %bb.0: # %entry
-; P8-NEXT:    xvabssp 0, 34
 ; P8-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
+; P8-NEXT:    xvabssp 0, 34
 ; P8-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
 ; P8-NEXT:    lxvd2x 1, 0, 3
 ; P8-NEXT:    xvcmpeqsp 34, 0, 1
@@ -133,8 +133,8 @@ entry:
 define <2 x i1> @abs_isinfv2f64(<2 x double> %x) {
 ; P8-LABEL: abs_isinfv2f64:
 ; P8:       # %bb.0: # %entry
-; P8-NEXT:    xvabsdp 0, 34
 ; P8-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
+; P8-NEXT:    xvabsdp 0, 34
 ; P8-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
 ; P8-NEXT:    lxvd2x 1, 0, 3
 ; P8-NEXT:    xvcmpeqdp 34, 0, 1

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
index 839022750826842..988ec6d8cc72bf2 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
@@ -496,8 +496,8 @@ define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 {
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mffs f0
 ; NOVSX-NEXT:    mtfsb1 31
-; NOVSX-NEXT:    addi r3, r1, -4
 ; NOVSX-NEXT:    mtfsb0 30
+; NOVSX-NEXT:    addi r3, r1, -4
 ; NOVSX-NEXT:    fadd f1, f2, f1
 ; NOVSX-NEXT:    mtfsf 1, f0
 ; NOVSX-NEXT:    fctiwz f0, f1
@@ -616,11 +616,11 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
 ; P8-NEXT:    xxlxor f3, f3, f3
 ; P8-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
 ; P8-NEXT:    lfs f0, .LCPI13_0 at toc@l(r3)
+; P8-NEXT:    fcmpo cr1, f2, f3
 ; P8-NEXT:    lis r3, -32768
-; P8-NEXT:    fcmpo cr0, f2, f3
-; P8-NEXT:    fcmpo cr1, f1, f0
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr1+lt, 4*cr1+eq
+; P8-NEXT:    fcmpo cr0, f1, f0
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
+; P8-NEXT:    crandc 4*cr5+gt, lt, eq
 ; P8-NEXT:    cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r30, 0, r3, 4*cr5+lt
 ; P8-NEXT:    bc 12, 4*cr5+lt, .LBB13_2
@@ -689,22 +689,22 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
 ; NOVSX-LABEL: ppcq_to_u32:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mfocrf r12, 32
-; NOVSX-NEXT:    mflr r0
 ; NOVSX-NEXT:    stw r12, 8(r1)
+; NOVSX-NEXT:    mflr r0
 ; NOVSX-NEXT:    stdu r1, -48(r1)
 ; NOVSX-NEXT:    std r0, 64(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 48
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    .cfi_offset cr2, 8
 ; NOVSX-NEXT:    addis r3, r2, .LCPI13_0 at toc@ha
-; NOVSX-NEXT:    addis r4, r2, .LCPI13_1 at toc@ha
 ; NOVSX-NEXT:    lfs f0, .LCPI13_0 at toc@l(r3)
-; NOVSX-NEXT:    lfs f4, .LCPI13_1 at toc@l(r4)
+; NOVSX-NEXT:    addis r3, r2, .LCPI13_1 at toc@ha
+; NOVSX-NEXT:    lfs f4, .LCPI13_1 at toc@l(r3)
 ; NOVSX-NEXT:    fcmpo cr0, f1, f0
 ; NOVSX-NEXT:    fcmpo cr1, f2, f4
 ; NOVSX-NEXT:    fmr f3, f4
-; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; NOVSX-NEXT:    crandc 4*cr5+gt, lt, eq
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; NOVSX-NEXT:    cror 4*cr2+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    bc 12, 4*cr2+lt, .LBB13_2
 ; NOVSX-NEXT:  # %bb.1: # %entry
@@ -714,8 +714,8 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    mffs f0
 ; NOVSX-NEXT:    mtfsb1 31
-; NOVSX-NEXT:    addi r3, r1, 44
 ; NOVSX-NEXT:    mtfsb0 30
+; NOVSX-NEXT:    addi r3, r1, 44
 ; NOVSX-NEXT:    fadd f1, f2, f1
 ; NOVSX-NEXT:    mtfsf 1, f0
 ; NOVSX-NEXT:    fctiwz f0, f1
@@ -728,8 +728,8 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
 ; NOVSX-NEXT:    addi r1, r1, 48
 ; NOVSX-NEXT:    ld r0, 16(r1)
 ; NOVSX-NEXT:    lwz r12, 8(r1)
-; NOVSX-NEXT:    mtocrf 32, r12
 ; NOVSX-NEXT:    mtlr r0
+; NOVSX-NEXT:    mtocrf 32, r12
 ; NOVSX-NEXT:    blr
 entry:
   %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0
@@ -831,10 +831,10 @@ define ppc_fp128 @i1_to_ppcq(i1 signext %m) #0 {
 ;
 ; NOVSX-LABEL: i1_to_ppcq:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    addi r4, r1, -4
 ; NOVSX-NEXT:    stw r3, -4(r1)
+; NOVSX-NEXT:    addi r3, r1, -4
+; NOVSX-NEXT:    lfiwax f0, 0, r3
 ; NOVSX-NEXT:    addis r3, r2, .LCPI16_0 at toc@ha
-; NOVSX-NEXT:    lfiwax f0, 0, r4
 ; NOVSX-NEXT:    lfs f2, .LCPI16_0 at toc@l(r3)
 ; NOVSX-NEXT:    fcfid f1, f0
 ; NOVSX-NEXT:    blr
@@ -860,10 +860,10 @@ define ppc_fp128 @u1_to_ppcq(i1 zeroext %m) #0 {
 ;
 ; NOVSX-LABEL: u1_to_ppcq:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    addi r4, r1, -4
 ; NOVSX-NEXT:    stw r3, -4(r1)
+; NOVSX-NEXT:    addi r3, r1, -4
+; NOVSX-NEXT:    lfiwax f0, 0, r3
 ; NOVSX-NEXT:    addis r3, r2, .LCPI17_0 at toc@ha
-; NOVSX-NEXT:    lfiwax f0, 0, r4
 ; NOVSX-NEXT:    lfs f2, .LCPI17_0 at toc@l(r3)
 ; NOVSX-NEXT:    fcfid f1, f0
 ; NOVSX-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll
index 8defbeaf1bcb022..f92460d8668e32e 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll
@@ -189,9 +189,9 @@ define double @i32_to_d(i32 signext %m) #0 {
 ;
 ; NOVSX-LABEL: i32_to_d:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    addi r4, r1, -4
 ; NOVSX-NEXT:    stw r3, -4(r1)
-; NOVSX-NEXT:    lfiwax f0, 0, r4
+; NOVSX-NEXT:    addi r3, r1, -4
+; NOVSX-NEXT:    lfiwax f0, 0, r3
 ; NOVSX-NEXT:    fcfid f1, f0
 ; NOVSX-NEXT:    blr
 entry:
@@ -226,9 +226,9 @@ define double @u32_to_d(i32 zeroext %m) #0 {
 ;
 ; NOVSX-LABEL: u32_to_d:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    addi r4, r1, -4
 ; NOVSX-NEXT:    stw r3, -4(r1)
-; NOVSX-NEXT:    lfiwzx f0, 0, r4
+; NOVSX-NEXT:    addi r3, r1, -4
+; NOVSX-NEXT:    lfiwzx f0, 0, r3
 ; NOVSX-NEXT:    fcfidu f1, f0
 ; NOVSX-NEXT:    blr
 entry:
@@ -263,9 +263,9 @@ define float @i32_to_f(i32 signext %m) #0 {
 ;
 ; NOVSX-LABEL: i32_to_f:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    addi r4, r1, -4
 ; NOVSX-NEXT:    stw r3, -4(r1)
-; NOVSX-NEXT:    lfiwax f0, 0, r4
+; NOVSX-NEXT:    addi r3, r1, -4
+; NOVSX-NEXT:    lfiwax f0, 0, r3
 ; NOVSX-NEXT:    fcfids f1, f0
 ; NOVSX-NEXT:    blr
 entry:
@@ -300,9 +300,9 @@ define float @u32_to_f(i32 zeroext %m) #0 {
 ;
 ; NOVSX-LABEL: u32_to_f:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    addi r4, r1, -4
 ; NOVSX-NEXT:    stw r3, -4(r1)
-; NOVSX-NEXT:    lfiwzx f0, 0, r4
+; NOVSX-NEXT:    addi r3, r1, -4
+; NOVSX-NEXT:    lfiwzx f0, 0, r3
 ; NOVSX-NEXT:    fcfidus f1, f0
 ; NOVSX-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll
index 9a44e195a25e36b..af4c051d553eeed 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll
@@ -1705,19 +1705,19 @@ define i32 @fcmp_one_f128(fp128 %a, fp128 %b) #0 {
 ; P8-NEXT:    vmr v31, v3
 ; P8-NEXT:    bl __unordkf2
 ; P8-NEXT:    nop
-; P8-NEXT:    vmr v2, v30
 ; P8-NEXT:    cntlzw r3, r3
+; P8-NEXT:    vmr v2, v30
 ; P8-NEXT:    vmr v3, v31
 ; P8-NEXT:    srwi r30, r3, 5
 ; P8-NEXT:    bl __eqkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    cntlzw r3, r3
 ; P8-NEXT:    li r4, 144
-; P8-NEXT:    srwi r3, r3, 5
 ; P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
 ; P8-NEXT:    li r4, 128
-; P8-NEXT:    xori r3, r3, 1
+; P8-NEXT:    srwi r3, r3, 5
 ; P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; P8-NEXT:    xori r3, r3, 1
 ; P8-NEXT:    and r3, r30, r3
 ; P8-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
 ; P8-NEXT:    addi r1, r1, 176
@@ -1956,19 +1956,19 @@ define i32 @fcmp_ueq_f128(fp128 %a, fp128 %b) #0 {
 ; P8-NEXT:    vmr v31, v3
 ; P8-NEXT:    bl __eqkf2
 ; P8-NEXT:    nop
-; P8-NEXT:    vmr v2, v30
 ; P8-NEXT:    cntlzw r3, r3
+; P8-NEXT:    vmr v2, v30
 ; P8-NEXT:    vmr v3, v31
 ; P8-NEXT:    srwi r30, r3, 5
 ; P8-NEXT:    bl __unordkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    cntlzw r3, r3
 ; P8-NEXT:    li r4, 144
-; P8-NEXT:    srwi r3, r3, 5
 ; P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
 ; P8-NEXT:    li r4, 128
-; P8-NEXT:    xori r3, r3, 1
+; P8-NEXT:    srwi r3, r3, 5
 ; P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; P8-NEXT:    xori r3, r3, 1
 ; P8-NEXT:    or r3, r3, r30
 ; P8-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
 ; P8-NEXT:    addi r1, r1, 176
@@ -2292,19 +2292,19 @@ define i32 @fcmps_one_f128(fp128 %a, fp128 %b) #0 {
 ; P8-NEXT:    vmr v31, v3
 ; P8-NEXT:    bl __unordkf2
 ; P8-NEXT:    nop
-; P8-NEXT:    vmr v2, v30
 ; P8-NEXT:    cntlzw r3, r3
+; P8-NEXT:    vmr v2, v30
 ; P8-NEXT:    vmr v3, v31
 ; P8-NEXT:    srwi r30, r3, 5
 ; P8-NEXT:    bl __eqkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    cntlzw r3, r3
 ; P8-NEXT:    li r4, 144
-; P8-NEXT:    srwi r3, r3, 5
 ; P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
 ; P8-NEXT:    li r4, 128
-; P8-NEXT:    xori r3, r3, 1
+; P8-NEXT:    srwi r3, r3, 5
 ; P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; P8-NEXT:    xori r3, r3, 1
 ; P8-NEXT:    and r3, r30, r3
 ; P8-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
 ; P8-NEXT:    addi r1, r1, 176
@@ -2543,19 +2543,19 @@ define i32 @fcmps_ueq_f128(fp128 %a, fp128 %b) #0 {
 ; P8-NEXT:    vmr v31, v3
 ; P8-NEXT:    bl __eqkf2
 ; P8-NEXT:    nop
-; P8-NEXT:    vmr v2, v30
 ; P8-NEXT:    cntlzw r3, r3
+; P8-NEXT:    vmr v2, v30
 ; P8-NEXT:    vmr v3, v31
 ; P8-NEXT:    srwi r30, r3, 5
 ; P8-NEXT:    bl __unordkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    cntlzw r3, r3
 ; P8-NEXT:    li r4, 144
-; P8-NEXT:    srwi r3, r3, 5
 ; P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
 ; P8-NEXT:    li r4, 128
-; P8-NEXT:    xori r3, r3, 1
+; P8-NEXT:    srwi r3, r3, 5
 ; P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; P8-NEXT:    xori r3, r3, 1
 ; P8-NEXT:    or r3, r3, r30
 ; P8-NEXT:    ld r30, 160(r1) # 8-byte Folded Reload
 ; P8-NEXT:    addi r1, r1, 176
@@ -2659,10 +2659,10 @@ define i32 @fcmp_olt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_olt_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpu cr0, f1, f3
-; P8-NEXT:    fcmpu cr1, f2, f4
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; P8-NEXT:    crandc 4*cr5+gt, lt, eq
+; P8-NEXT:    fcmpu cr1, f2, f4
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -2681,10 +2681,10 @@ define i32 @fcmp_olt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_olt_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpu cr0, f1, f3
-; NOVSX-NEXT:    fcmpu cr1, f2, f4
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; NOVSX-NEXT:    crandc 4*cr5+gt, lt, eq
+; NOVSX-NEXT:    fcmpu cr1, f2, f4
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -2697,12 +2697,12 @@ define i32 @fcmp_ole_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_ole_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpu cr0, f2, f4
-; P8-NEXT:    fcmpu cr1, f1, f3
 ; P8-NEXT:    li r3, 1
 ; P8-NEXT:    crnor 4*cr5+lt, un, gt
-; P8-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+gt
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    fcmpu cr0, f1, f3
+; P8-NEXT:    crnor 4*cr5+gt, un, gt
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -2723,12 +2723,12 @@ define i32 @fcmp_ole_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_ole_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpu cr0, f2, f4
-; NOVSX-NEXT:    fcmpu cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
 ; NOVSX-NEXT:    crnor 4*cr5+lt, un, gt
-; NOVSX-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+gt
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    fcmpu cr0, f1, f3
+; NOVSX-NEXT:    crnor 4*cr5+gt, un, gt
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -2741,10 +2741,10 @@ define i32 @fcmp_ogt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_ogt_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpu cr0, f1, f3
-; P8-NEXT:    fcmpu cr1, f2, f4
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+gt
 ; P8-NEXT:    crandc 4*cr5+gt, gt, eq
+; P8-NEXT:    fcmpu cr1, f2, f4
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+gt
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -2763,10 +2763,10 @@ define i32 @fcmp_ogt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_ogt_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpu cr0, f1, f3
-; NOVSX-NEXT:    fcmpu cr1, f2, f4
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+gt
 ; NOVSX-NEXT:    crandc 4*cr5+gt, gt, eq
+; NOVSX-NEXT:    fcmpu cr1, f2, f4
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+gt
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -2779,12 +2779,12 @@ define i32 @fcmp_oge_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_oge_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpu cr0, f2, f4
-; P8-NEXT:    fcmpu cr1, f1, f3
 ; P8-NEXT:    li r3, 1
 ; P8-NEXT:    crnor 4*cr5+lt, un, lt
-; P8-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+lt
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    fcmpu cr0, f1, f3
+; P8-NEXT:    crnor 4*cr5+gt, un, lt
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -2805,12 +2805,12 @@ define i32 @fcmp_oge_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_oge_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpu cr0, f2, f4
-; NOVSX-NEXT:    fcmpu cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
 ; NOVSX-NEXT:    crnor 4*cr5+lt, un, lt
-; NOVSX-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+lt
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    fcmpu cr0, f1, f3
+; NOVSX-NEXT:    crnor 4*cr5+gt, un, lt
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -2823,10 +2823,10 @@ define i32 @fcmp_oeq_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_oeq_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpu cr0, f1, f3
-; P8-NEXT:    fcmpu cr1, f2, f4
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+eq
 ; P8-NEXT:    crandc 4*cr5+gt, eq, eq
+; P8-NEXT:    fcmpu cr1, f2, f4
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -2845,10 +2845,10 @@ define i32 @fcmp_oeq_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_oeq_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpu cr0, f1, f3
-; NOVSX-NEXT:    fcmpu cr1, f2, f4
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+eq
 ; NOVSX-NEXT:    crandc 4*cr5+gt, eq, eq
+; NOVSX-NEXT:    fcmpu cr1, f2, f4
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -2861,12 +2861,12 @@ define i32 @fcmp_one_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_one_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpu cr0, f2, f4
-; P8-NEXT:    fcmpu cr1, f1, f3
 ; P8-NEXT:    li r3, 1
 ; P8-NEXT:    crnor 4*cr5+lt, un, eq
-; P8-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+eq
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    fcmpu cr0, f1, f3
+; P8-NEXT:    crnor 4*cr5+gt, un, eq
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -2887,12 +2887,12 @@ define i32 @fcmp_one_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_one_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpu cr0, f2, f4
-; NOVSX-NEXT:    fcmpu cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
 ; NOVSX-NEXT:    crnor 4*cr5+lt, un, eq
-; NOVSX-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+eq
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    fcmpu cr0, f1, f3
+; NOVSX-NEXT:    crnor 4*cr5+gt, un, eq
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -2904,13 +2904,13 @@ define i32 @fcmp_one_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 define i32 @fcmp_ult_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_ult_ppcf128:
 ; P8:       # %bb.0:
-; P8-NEXT:    fcmpu cr0, f2, f4
-; P8-NEXT:    fcmpu cr1, f1, f3
+; P8-NEXT:    fcmpu cr0, f1, f3
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    cror 4*cr5+lt, lt, un
-; P8-NEXT:    cror 4*cr5+gt, 4*cr1+lt, 4*cr1+un
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    cror 4*cr5+gt, lt, un
+; P8-NEXT:    fcmpu cr1, f2, f4
+; P8-NEXT:    cror 4*cr5+lt, 4*cr1+lt, 4*cr1+un
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -2930,13 +2930,13 @@ define i32 @fcmp_ult_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ;
 ; NOVSX-LABEL: fcmp_ult_ppcf128:
 ; NOVSX:       # %bb.0:
-; NOVSX-NEXT:    fcmpu cr0, f2, f4
-; NOVSX-NEXT:    fcmpu cr1, f1, f3
+; NOVSX-NEXT:    fcmpu cr0, f1, f3
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    cror 4*cr5+lt, lt, un
-; NOVSX-NEXT:    cror 4*cr5+gt, 4*cr1+lt, 4*cr1+un
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    cror 4*cr5+gt, lt, un
+; NOVSX-NEXT:    fcmpu cr1, f2, f4
+; NOVSX-NEXT:    cror 4*cr5+lt, 4*cr1+lt, 4*cr1+un
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -2949,8 +2949,8 @@ define i32 @fcmp_ule_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_ule_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpu cr0, f2, f4
-; P8-NEXT:    fcmpu cr1, f1, f3
 ; P8-NEXT:    li r3, 1
+; P8-NEXT:    fcmpu cr1, f1, f3
 ; P8-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, gt
 ; P8-NEXT:    crnor 4*cr5+gt, 4*cr1+gt, 4*cr1+eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
@@ -2971,8 +2971,8 @@ define i32 @fcmp_ule_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_ule_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpu cr0, f2, f4
-; NOVSX-NEXT:    fcmpu cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
+; NOVSX-NEXT:    fcmpu cr1, f1, f3
 ; NOVSX-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, gt
 ; NOVSX-NEXT:    crnor 4*cr5+gt, 4*cr1+gt, 4*cr1+eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
@@ -2986,13 +2986,13 @@ define i32 @fcmp_ule_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 define i32 @fcmp_ugt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_ugt_ppcf128:
 ; P8:       # %bb.0:
-; P8-NEXT:    fcmpu cr0, f2, f4
-; P8-NEXT:    fcmpu cr1, f1, f3
+; P8-NEXT:    fcmpu cr0, f1, f3
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    cror 4*cr5+lt, gt, un
-; P8-NEXT:    cror 4*cr5+gt, 4*cr1+gt, 4*cr1+un
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    cror 4*cr5+gt, gt, un
+; P8-NEXT:    fcmpu cr1, f2, f4
+; P8-NEXT:    cror 4*cr5+lt, 4*cr1+gt, 4*cr1+un
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3012,13 +3012,13 @@ define i32 @fcmp_ugt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ;
 ; NOVSX-LABEL: fcmp_ugt_ppcf128:
 ; NOVSX:       # %bb.0:
-; NOVSX-NEXT:    fcmpu cr0, f2, f4
-; NOVSX-NEXT:    fcmpu cr1, f1, f3
+; NOVSX-NEXT:    fcmpu cr0, f1, f3
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    cror 4*cr5+lt, gt, un
-; NOVSX-NEXT:    cror 4*cr5+gt, 4*cr1+gt, 4*cr1+un
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    cror 4*cr5+gt, gt, un
+; NOVSX-NEXT:    fcmpu cr1, f2, f4
+; NOVSX-NEXT:    cror 4*cr5+lt, 4*cr1+gt, 4*cr1+un
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3031,8 +3031,8 @@ define i32 @fcmp_uge_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_uge_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpu cr0, f2, f4
-; P8-NEXT:    fcmpu cr1, f1, f3
 ; P8-NEXT:    li r3, 1
+; P8-NEXT:    fcmpu cr1, f1, f3
 ; P8-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, lt
 ; P8-NEXT:    crnor 4*cr5+gt, 4*cr1+lt, 4*cr1+eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
@@ -3053,8 +3053,8 @@ define i32 @fcmp_uge_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_uge_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpu cr0, f2, f4
-; NOVSX-NEXT:    fcmpu cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
+; NOVSX-NEXT:    fcmpu cr1, f1, f3
 ; NOVSX-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, lt
 ; NOVSX-NEXT:    crnor 4*cr5+gt, 4*cr1+lt, 4*cr1+eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
@@ -3068,13 +3068,13 @@ define i32 @fcmp_uge_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 define i32 @fcmp_ueq_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_ueq_ppcf128:
 ; P8:       # %bb.0:
-; P8-NEXT:    fcmpu cr0, f2, f4
-; P8-NEXT:    fcmpu cr1, f1, f3
+; P8-NEXT:    fcmpu cr0, f1, f3
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    cror 4*cr5+lt, eq, un
-; P8-NEXT:    cror 4*cr5+gt, 4*cr1+eq, 4*cr1+un
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    cror 4*cr5+gt, eq, un
+; P8-NEXT:    fcmpu cr1, f2, f4
+; P8-NEXT:    cror 4*cr5+lt, 4*cr1+eq, 4*cr1+un
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3094,13 +3094,13 @@ define i32 @fcmp_ueq_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ;
 ; NOVSX-LABEL: fcmp_ueq_ppcf128:
 ; NOVSX:       # %bb.0:
-; NOVSX-NEXT:    fcmpu cr0, f2, f4
-; NOVSX-NEXT:    fcmpu cr1, f1, f3
+; NOVSX-NEXT:    fcmpu cr0, f1, f3
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    cror 4*cr5+lt, eq, un
-; NOVSX-NEXT:    cror 4*cr5+gt, 4*cr1+eq, 4*cr1+un
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    cror 4*cr5+gt, eq, un
+; NOVSX-NEXT:    fcmpu cr1, f2, f4
+; NOVSX-NEXT:    cror 4*cr5+lt, 4*cr1+eq, 4*cr1+un
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3113,8 +3113,8 @@ define i32 @fcmp_une_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmp_une_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpu cr0, f2, f4
-; P8-NEXT:    fcmpu cr1, f1, f3
 ; P8-NEXT:    li r3, 1
+; P8-NEXT:    fcmpu cr1, f1, f3
 ; P8-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, eq
 ; P8-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
@@ -3133,8 +3133,8 @@ define i32 @fcmp_une_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_une_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpu cr0, f2, f4
-; NOVSX-NEXT:    fcmpu cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
+; NOVSX-NEXT:    fcmpu cr1, f1, f3
 ; NOVSX-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, eq
 ; NOVSX-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
@@ -3148,10 +3148,10 @@ define i32 @fcmps_olt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_olt_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpo cr0, f1, f3
-; P8-NEXT:    fcmpo cr1, f2, f4
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; P8-NEXT:    crandc 4*cr5+gt, lt, eq
+; P8-NEXT:    fcmpo cr1, f2, f4
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3170,10 +3170,10 @@ define i32 @fcmps_olt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_olt_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpo cr0, f1, f3
-; NOVSX-NEXT:    fcmpo cr1, f2, f4
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; NOVSX-NEXT:    crandc 4*cr5+gt, lt, eq
+; NOVSX-NEXT:    fcmpo cr1, f2, f4
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3186,12 +3186,12 @@ define i32 @fcmps_ole_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_ole_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpo cr0, f2, f4
-; P8-NEXT:    fcmpo cr1, f1, f3
 ; P8-NEXT:    li r3, 1
 ; P8-NEXT:    crnor 4*cr5+lt, un, gt
-; P8-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+gt
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    fcmpo cr0, f1, f3
+; P8-NEXT:    crnor 4*cr5+gt, un, gt
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3212,12 +3212,12 @@ define i32 @fcmps_ole_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_ole_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpo cr0, f2, f4
-; NOVSX-NEXT:    fcmpo cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
 ; NOVSX-NEXT:    crnor 4*cr5+lt, un, gt
-; NOVSX-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+gt
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    fcmpo cr0, f1, f3
+; NOVSX-NEXT:    crnor 4*cr5+gt, un, gt
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3230,10 +3230,10 @@ define i32 @fcmps_ogt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_ogt_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpo cr0, f1, f3
-; P8-NEXT:    fcmpo cr1, f2, f4
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+gt
 ; P8-NEXT:    crandc 4*cr5+gt, gt, eq
+; P8-NEXT:    fcmpo cr1, f2, f4
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+gt
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3252,10 +3252,10 @@ define i32 @fcmps_ogt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_ogt_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpo cr0, f1, f3
-; NOVSX-NEXT:    fcmpo cr1, f2, f4
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+gt
 ; NOVSX-NEXT:    crandc 4*cr5+gt, gt, eq
+; NOVSX-NEXT:    fcmpo cr1, f2, f4
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+gt
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3268,12 +3268,12 @@ define i32 @fcmps_oge_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_oge_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpo cr0, f2, f4
-; P8-NEXT:    fcmpo cr1, f1, f3
 ; P8-NEXT:    li r3, 1
 ; P8-NEXT:    crnor 4*cr5+lt, un, lt
-; P8-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+lt
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    fcmpo cr0, f1, f3
+; P8-NEXT:    crnor 4*cr5+gt, un, lt
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3294,12 +3294,12 @@ define i32 @fcmps_oge_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_oge_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpo cr0, f2, f4
-; NOVSX-NEXT:    fcmpo cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
 ; NOVSX-NEXT:    crnor 4*cr5+lt, un, lt
-; NOVSX-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+lt
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    fcmpo cr0, f1, f3
+; NOVSX-NEXT:    crnor 4*cr5+gt, un, lt
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3312,10 +3312,10 @@ define i32 @fcmps_oeq_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_oeq_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpo cr0, f1, f3
-; P8-NEXT:    fcmpo cr1, f2, f4
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+eq
 ; P8-NEXT:    crandc 4*cr5+gt, eq, eq
+; P8-NEXT:    fcmpo cr1, f2, f4
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr1+eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3334,10 +3334,10 @@ define i32 @fcmps_oeq_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_oeq_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpo cr0, f1, f3
-; NOVSX-NEXT:    fcmpo cr1, f2, f4
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+eq
 ; NOVSX-NEXT:    crandc 4*cr5+gt, eq, eq
+; NOVSX-NEXT:    fcmpo cr1, f2, f4
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3350,12 +3350,12 @@ define i32 @fcmps_one_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_one_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpo cr0, f2, f4
-; P8-NEXT:    fcmpo cr1, f1, f3
 ; P8-NEXT:    li r3, 1
 ; P8-NEXT:    crnor 4*cr5+lt, un, eq
-; P8-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+eq
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    fcmpo cr0, f1, f3
+; P8-NEXT:    crnor 4*cr5+gt, un, eq
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3376,12 +3376,12 @@ define i32 @fcmps_one_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_one_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpo cr0, f2, f4
-; NOVSX-NEXT:    fcmpo cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
 ; NOVSX-NEXT:    crnor 4*cr5+lt, un, eq
-; NOVSX-NEXT:    crnor 4*cr5+gt, 4*cr1+un, 4*cr1+eq
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    fcmpo cr0, f1, f3
+; NOVSX-NEXT:    crnor 4*cr5+gt, un, eq
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3393,13 +3393,13 @@ define i32 @fcmps_one_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 define i32 @fcmps_ult_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_ult_ppcf128:
 ; P8:       # %bb.0:
-; P8-NEXT:    fcmpo cr0, f2, f4
-; P8-NEXT:    fcmpo cr1, f1, f3
+; P8-NEXT:    fcmpo cr0, f1, f3
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    cror 4*cr5+lt, lt, un
-; P8-NEXT:    cror 4*cr5+gt, 4*cr1+lt, 4*cr1+un
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    cror 4*cr5+gt, lt, un
+; P8-NEXT:    fcmpo cr1, f2, f4
+; P8-NEXT:    cror 4*cr5+lt, 4*cr1+lt, 4*cr1+un
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3419,13 +3419,13 @@ define i32 @fcmps_ult_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ;
 ; NOVSX-LABEL: fcmps_ult_ppcf128:
 ; NOVSX:       # %bb.0:
-; NOVSX-NEXT:    fcmpo cr0, f2, f4
-; NOVSX-NEXT:    fcmpo cr1, f1, f3
+; NOVSX-NEXT:    fcmpo cr0, f1, f3
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    cror 4*cr5+lt, lt, un
-; NOVSX-NEXT:    cror 4*cr5+gt, 4*cr1+lt, 4*cr1+un
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    cror 4*cr5+gt, lt, un
+; NOVSX-NEXT:    fcmpo cr1, f2, f4
+; NOVSX-NEXT:    cror 4*cr5+lt, 4*cr1+lt, 4*cr1+un
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3438,8 +3438,8 @@ define i32 @fcmps_ule_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_ule_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpo cr0, f2, f4
-; P8-NEXT:    fcmpo cr1, f1, f3
 ; P8-NEXT:    li r3, 1
+; P8-NEXT:    fcmpo cr1, f1, f3
 ; P8-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, gt
 ; P8-NEXT:    crnor 4*cr5+gt, 4*cr1+gt, 4*cr1+eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
@@ -3460,8 +3460,8 @@ define i32 @fcmps_ule_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_ule_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpo cr0, f2, f4
-; NOVSX-NEXT:    fcmpo cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
+; NOVSX-NEXT:    fcmpo cr1, f1, f3
 ; NOVSX-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, gt
 ; NOVSX-NEXT:    crnor 4*cr5+gt, 4*cr1+gt, 4*cr1+eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
@@ -3475,13 +3475,13 @@ define i32 @fcmps_ule_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 define i32 @fcmps_ugt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_ugt_ppcf128:
 ; P8:       # %bb.0:
-; P8-NEXT:    fcmpo cr0, f2, f4
-; P8-NEXT:    fcmpo cr1, f1, f3
+; P8-NEXT:    fcmpo cr0, f1, f3
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    cror 4*cr5+lt, gt, un
-; P8-NEXT:    cror 4*cr5+gt, 4*cr1+gt, 4*cr1+un
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    cror 4*cr5+gt, gt, un
+; P8-NEXT:    fcmpo cr1, f2, f4
+; P8-NEXT:    cror 4*cr5+lt, 4*cr1+gt, 4*cr1+un
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3501,13 +3501,13 @@ define i32 @fcmps_ugt_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ;
 ; NOVSX-LABEL: fcmps_ugt_ppcf128:
 ; NOVSX:       # %bb.0:
-; NOVSX-NEXT:    fcmpo cr0, f2, f4
-; NOVSX-NEXT:    fcmpo cr1, f1, f3
+; NOVSX-NEXT:    fcmpo cr0, f1, f3
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    cror 4*cr5+lt, gt, un
-; NOVSX-NEXT:    cror 4*cr5+gt, 4*cr1+gt, 4*cr1+un
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    cror 4*cr5+gt, gt, un
+; NOVSX-NEXT:    fcmpo cr1, f2, f4
+; NOVSX-NEXT:    cror 4*cr5+lt, 4*cr1+gt, 4*cr1+un
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3520,8 +3520,8 @@ define i32 @fcmps_uge_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_uge_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpo cr0, f2, f4
-; P8-NEXT:    fcmpo cr1, f1, f3
 ; P8-NEXT:    li r3, 1
+; P8-NEXT:    fcmpo cr1, f1, f3
 ; P8-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, lt
 ; P8-NEXT:    crnor 4*cr5+gt, 4*cr1+lt, 4*cr1+eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
@@ -3542,8 +3542,8 @@ define i32 @fcmps_uge_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_uge_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpo cr0, f2, f4
-; NOVSX-NEXT:    fcmpo cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
+; NOVSX-NEXT:    fcmpo cr1, f1, f3
 ; NOVSX-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, lt
 ; NOVSX-NEXT:    crnor 4*cr5+gt, 4*cr1+lt, 4*cr1+eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
@@ -3557,13 +3557,13 @@ define i32 @fcmps_uge_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 define i32 @fcmps_ueq_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_ueq_ppcf128:
 ; P8:       # %bb.0:
-; P8-NEXT:    fcmpo cr0, f2, f4
-; P8-NEXT:    fcmpo cr1, f1, f3
+; P8-NEXT:    fcmpo cr0, f1, f3
 ; P8-NEXT:    li r3, 1
-; P8-NEXT:    cror 4*cr5+lt, eq, un
-; P8-NEXT:    cror 4*cr5+gt, 4*cr1+eq, 4*cr1+un
-; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; P8-NEXT:    cror 4*cr5+gt, eq, un
+; P8-NEXT:    fcmpo cr1, f2, f4
+; P8-NEXT:    cror 4*cr5+lt, 4*cr1+eq, 4*cr1+un
+; P8-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; P8-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; P8-NEXT:    blr
@@ -3583,13 +3583,13 @@ define i32 @fcmps_ueq_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ;
 ; NOVSX-LABEL: fcmps_ueq_ppcf128:
 ; NOVSX:       # %bb.0:
-; NOVSX-NEXT:    fcmpo cr0, f2, f4
-; NOVSX-NEXT:    fcmpo cr1, f1, f3
+; NOVSX-NEXT:    fcmpo cr0, f1, f3
 ; NOVSX-NEXT:    li r3, 1
-; NOVSX-NEXT:    cror 4*cr5+lt, eq, un
-; NOVSX-NEXT:    cror 4*cr5+gt, 4*cr1+eq, 4*cr1+un
-; NOVSX-NEXT:    crand 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
-; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, 4*cr1+eq
+; NOVSX-NEXT:    cror 4*cr5+gt, eq, un
+; NOVSX-NEXT:    fcmpo cr1, f2, f4
+; NOVSX-NEXT:    cror 4*cr5+lt, 4*cr1+eq, 4*cr1+un
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr5+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, 4*cr5+gt, eq
 ; NOVSX-NEXT:    crnor 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; NOVSX-NEXT:    blr
@@ -3602,8 +3602,8 @@ define i32 @fcmps_une_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; P8-LABEL: fcmps_une_ppcf128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    fcmpo cr0, f2, f4
-; P8-NEXT:    fcmpo cr1, f1, f3
 ; P8-NEXT:    li r3, 1
+; P8-NEXT:    fcmpo cr1, f1, f3
 ; P8-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, eq
 ; P8-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
 ; P8-NEXT:    isel r3, 0, r3, 4*cr5+lt
@@ -3622,8 +3622,8 @@ define i32 @fcmps_une_ppcf128(ppc_fp128 %a, ppc_fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_une_ppcf128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    fcmpo cr0, f2, f4
-; NOVSX-NEXT:    fcmpo cr1, f1, f3
 ; NOVSX-NEXT:    li r3, 1
+; NOVSX-NEXT:    fcmpo cr1, f1, f3
 ; NOVSX-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, eq
 ; NOVSX-NEXT:    crandc 4*cr5+lt, 4*cr1+eq, 4*cr5+lt
 ; NOVSX-NEXT:    isel r3, 0, r3, 4*cr5+lt

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
index 6c2a718e65b3f88..4c8729b9f43a533 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
@@ -215,11 +215,11 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
 ; P8-NEXT:    .cfi_offset v31, -16
 ; P8-NEXT:    xxsldwi vs0, v2, v2, 3
 ; P8-NEXT:    li r3, 128
+; P8-NEXT:    xscvspdpn f1, vs0
 ; P8-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    li r3, 144
 ; P8-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    li r3, 160
-; P8-NEXT:    xscvspdpn f1, vs0
 ; P8-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    vmr v31, v2
 ; P8-NEXT:    bl nearbyintf
@@ -243,11 +243,11 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
 ; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-NEXT:    xxmrghd vs0, v30, vs1
 ; P8-NEXT:    li r3, 160
+; P8-NEXT:    xvcvdpsp v2, vs0
 ; P8-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 144
 ; P8-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 128
-; P8-NEXT:    xvcvdpsp v2, vs0
 ; P8-NEXT:    vmrgew v2, v2, v29
 ; P8-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    addi r1, r1, 176
@@ -380,14 +380,14 @@ define <4 x double> @fpext_v4f64_v4f32(<4 x float> %vf1) strictfp {
 ; P8-LABEL: fpext_v4f64_v4f32:
 ; P8:       # %bb.0:
 ; P8-NEXT:    xxsldwi vs0, v2, v2, 1
+; P8-NEXT:    xscvspdpn f3, v2
 ; P8-NEXT:    xxsldwi vs1, v2, v2, 3
-; P8-NEXT:    xxswapd vs3, v2
-; P8-NEXT:    xscvspdpn f2, v2
+; P8-NEXT:    xxswapd vs2, v2
 ; P8-NEXT:    xscvspdpn f0, vs0
-; P8-NEXT:    xscvspdpn f1, vs1
-; P8-NEXT:    xscvspdpn f3, vs3
-; P8-NEXT:    xxmrghd v2, vs2, vs0
-; P8-NEXT:    xxmrghd v3, vs3, vs1
+; P8-NEXT:    xxmrghd v2, vs3, vs0
+; P8-NEXT:    xscvspdpn f0, vs1
+; P8-NEXT:    xscvspdpn f1, vs2
+; P8-NEXT:    xxmrghd v3, vs1, vs0
 ; P8-NEXT:    blr
 ;
 ; P9-LABEL: fpext_v4f64_v4f32:

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll
index 9f852b02e0d1ad4..124c588ba242c18 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll
@@ -90,9 +90,9 @@ define <4 x float> @fadd_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
 ; NOVSX-LABEL: fadd_v4f32:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    addi r3, r1, -32
-; NOVSX-NEXT:    addi r4, r1, -48
 ; NOVSX-NEXT:    stvx v3, 0, r3
-; NOVSX-NEXT:    stvx v2, 0, r4
+; NOVSX-NEXT:    addi r3, r1, -48
+; NOVSX-NEXT:    stvx v2, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -16
 ; NOVSX-NEXT:    lfs f0, -20(r1)
 ; NOVSX-NEXT:    lfs f1, -36(r1)
@@ -216,9 +216,9 @@ define <4 x float> @fsub_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
 ; NOVSX-LABEL: fsub_v4f32:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    addi r3, r1, -32
-; NOVSX-NEXT:    addi r4, r1, -48
 ; NOVSX-NEXT:    stvx v3, 0, r3
-; NOVSX-NEXT:    stvx v2, 0, r4
+; NOVSX-NEXT:    addi r3, r1, -48
+; NOVSX-NEXT:    stvx v2, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -16
 ; NOVSX-NEXT:    lfs f0, -20(r1)
 ; NOVSX-NEXT:    lfs f1, -36(r1)
@@ -342,9 +342,9 @@ define <4 x float> @fmul_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
 ; NOVSX-LABEL: fmul_v4f32:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    addi r3, r1, -32
-; NOVSX-NEXT:    addi r4, r1, -48
 ; NOVSX-NEXT:    stvx v3, 0, r3
-; NOVSX-NEXT:    stvx v2, 0, r4
+; NOVSX-NEXT:    addi r3, r1, -48
+; NOVSX-NEXT:    stvx v2, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -16
 ; NOVSX-NEXT:    lfs f0, -20(r1)
 ; NOVSX-NEXT:    lfs f1, -36(r1)
@@ -468,9 +468,9 @@ define <4 x float> @fdiv_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
 ; NOVSX-LABEL: fdiv_v4f32:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    addi r3, r1, -32
-; NOVSX-NEXT:    addi r4, r1, -48
 ; NOVSX-NEXT:    stvx v3, 0, r3
-; NOVSX-NEXT:    stvx v2, 0, r4
+; NOVSX-NEXT:    addi r3, r1, -48
+; NOVSX-NEXT:    stvx v2, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -16
 ; NOVSX-NEXT:    lfs f0, -20(r1)
 ; NOVSX-NEXT:    lfs f1, -36(r1)
@@ -649,10 +649,10 @@ define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; NOVSX-LABEL: fmadd_v4f32:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    addi r3, r1, -32
-; NOVSX-NEXT:    addi r4, r1, -48
 ; NOVSX-NEXT:    stvx v4, 0, r3
+; NOVSX-NEXT:    addi r3, r1, -48
+; NOVSX-NEXT:    stvx v3, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -64
-; NOVSX-NEXT:    stvx v3, 0, r4
 ; NOVSX-NEXT:    stvx v2, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -16
 ; NOVSX-NEXT:    lfs f0, -20(r1)
@@ -912,12 +912,12 @@ define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    vspltisb v5, -1
 ; NOVSX-NEXT:    addi r3, r1, -48
-; NOVSX-NEXT:    addi r4, r1, -64
-; NOVSX-NEXT:    stvx v3, 0, r3
-; NOVSX-NEXT:    addi r3, r1, -32
-; NOVSX-NEXT:    stvx v2, 0, r4
 ; NOVSX-NEXT:    vslw v5, v5, v5
+; NOVSX-NEXT:    stvx v3, 0, r3
+; NOVSX-NEXT:    addi r3, r1, -64
 ; NOVSX-NEXT:    vsubfp v4, v5, v4
+; NOVSX-NEXT:    stvx v2, 0, r3
+; NOVSX-NEXT:    addi r3, r1, -32
 ; NOVSX-NEXT:    stvx v4, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -16
 ; NOVSX-NEXT:    lfs f0, -36(r1)
@@ -1184,17 +1184,17 @@ define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; NOVSX-LABEL: fnmadd_v4f32:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    addi r3, r1, -32
-; NOVSX-NEXT:    addi r4, r1, -48
+; NOVSX-NEXT:    vspltisb v5, -1
 ; NOVSX-NEXT:    stvx v4, 0, r3
+; NOVSX-NEXT:    addi r3, r1, -48
+; NOVSX-NEXT:    stvx v3, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -64
-; NOVSX-NEXT:    stvx v3, 0, r4
+; NOVSX-NEXT:    vslw v3, v5, v5
 ; NOVSX-NEXT:    stvx v2, 0, r3
-; NOVSX-NEXT:    vspltisb v2, -1
 ; NOVSX-NEXT:    addi r3, r1, -16
 ; NOVSX-NEXT:    lfs f0, -20(r1)
 ; NOVSX-NEXT:    lfs f1, -36(r1)
 ; NOVSX-NEXT:    lfs f2, -52(r1)
-; NOVSX-NEXT:    vslw v2, v2, v2
 ; NOVSX-NEXT:    fmadds f0, f2, f1, f0
 ; NOVSX-NEXT:    lfs f1, -40(r1)
 ; NOVSX-NEXT:    lfs f2, -56(r1)
@@ -1212,8 +1212,8 @@ define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; NOVSX-NEXT:    lfs f0, -32(r1)
 ; NOVSX-NEXT:    fmadds f0, f2, f1, f0
 ; NOVSX-NEXT:    stfs f0, -16(r1)
-; NOVSX-NEXT:    lvx v3, 0, r3
-; NOVSX-NEXT:    vsubfp v2, v2, v3
+; NOVSX-NEXT:    lvx v2, 0, r3
+; NOVSX-NEXT:    vsubfp v2, v3, v2
 ; NOVSX-NEXT:    blr
 ;
 ; SPE-LABEL: fnmadd_v4f32:
@@ -1459,12 +1459,12 @@ define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    vspltisb v5, -1
 ; NOVSX-NEXT:    addi r3, r1, -48
-; NOVSX-NEXT:    addi r4, r1, -64
-; NOVSX-NEXT:    stvx v3, 0, r3
-; NOVSX-NEXT:    addi r3, r1, -32
-; NOVSX-NEXT:    stvx v2, 0, r4
 ; NOVSX-NEXT:    vslw v5, v5, v5
+; NOVSX-NEXT:    stvx v3, 0, r3
+; NOVSX-NEXT:    addi r3, r1, -64
 ; NOVSX-NEXT:    vsubfp v4, v5, v4
+; NOVSX-NEXT:    stvx v2, 0, r3
+; NOVSX-NEXT:    addi r3, r1, -32
 ; NOVSX-NEXT:    stvx v4, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -16
 ; NOVSX-NEXT:    lfs f0, -36(r1)

diff  --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
index 27021b649f8730d..e3df5479f7fc348 100644
--- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
+++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -49,12 +49,12 @@ entry:
 define i128 @test_neg(ppc_fp128 %x) nounwind  {
 ; PPC64-P8-LABEL: test_neg:
 ; PPC64-P8:       # %bb.0: # %entry
-; PPC64-P8-NEXT:    li 3, 1
 ; PPC64-P8-NEXT:    mffprd 4, 2
-; PPC64-P8-NEXT:    mffprd 5, 1
-; PPC64-P8-NEXT:    rldic 6, 3, 63, 0
-; PPC64-P8-NEXT:    xor 4, 4, 6
-; PPC64-P8-NEXT:    xor 3, 5, 6
+; PPC64-P8-NEXT:    mffprd 3, 1
+; PPC64-P8-NEXT:    li 5, 1
+; PPC64-P8-NEXT:    rldic 5, 5, 63, 0
+; PPC64-P8-NEXT:    xor 3, 3, 5
+; PPC64-P8-NEXT:    xor 4, 4, 5
 ; PPC64-P8-NEXT:    blr
 ;
 ; PPC64-LABEL: test_neg:
@@ -199,13 +199,13 @@ define i128 @test_copysign_const(ppc_fp128 %x) nounwind  {
 ; PPC64-P8-LABEL: test_copysign_const:
 ; PPC64-P8:       # %bb.0: # %entry
 ; PPC64-P8-NEXT:    mffprd 3, 1
-; PPC64-P8-NEXT:    li 4, 16399
 ; PPC64-P8-NEXT:    li 5, 3019
-; PPC64-P8-NEXT:    rldicr 6, 3, 0, 0
-; PPC64-P8-NEXT:    rldic 3, 4, 48, 1
-; PPC64-P8-NEXT:    rldic 4, 5, 52, 0
-; PPC64-P8-NEXT:    or 3, 6, 3
-; PPC64-P8-NEXT:    xor 4, 6, 4
+; PPC64-P8-NEXT:    rldic 5, 5, 52, 0
+; PPC64-P8-NEXT:    rldicr 4, 3, 0, 0
+; PPC64-P8-NEXT:    li 3, 16399
+; PPC64-P8-NEXT:    rldic 3, 3, 48, 1
+; PPC64-P8-NEXT:    or 3, 4, 3
+; PPC64-P8-NEXT:    xor 4, 4, 5
 ; PPC64-P8-NEXT:    blr
 ;
 ; PPC64-LABEL: test_copysign_const:

diff  --git a/llvm/test/CodeGen/PowerPC/fpscr-intrinsics.ll b/llvm/test/CodeGen/PowerPC/fpscr-intrinsics.ll
index c326ec5bd66eb4a..a2d5590268548fc 100644
--- a/llvm/test/CodeGen/PowerPC/fpscr-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/fpscr-intrinsics.ll
@@ -65,9 +65,9 @@ define dso_local void @callmtfsf(i32 zeroext %a) local_unnamed_addr {
 ;
 ; CHECK-AIX32-LABEL: callmtfsf:
 ; CHECK-AIX32:       # %bb.0: # %entry
-; CHECK-AIX32-NEXT:    addi 4, 1, -4
 ; CHECK-AIX32-NEXT:    stw 3, -4(1)
-; CHECK-AIX32-NEXT:    lfiwzx 0, 0, 4
+; CHECK-AIX32-NEXT:    addi 3, 1, -4
+; CHECK-AIX32-NEXT:    lfiwzx 0, 0, 3
 ; CHECK-AIX32-NEXT:    xscvuxddp 0, 0
 ; CHECK-AIX32-NEXT:    mtfsf 7, 0
 ; CHECK-AIX32-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll
index 2bbdbf37e54e9ef..c1f7181b30f3f6e 100644
--- a/llvm/test/CodeGen/PowerPC/frounds.ll
+++ b/llvm/test/CodeGen/PowerPC/frounds.ll
@@ -43,10 +43,10 @@ define i32 @foo() {
 ; PPC64LE-NEXT:    mffs 0
 ; PPC64LE-NEXT:    stfd 0, -16(1)
 ; PPC64LE-NEXT:    lwz 3, -16(1)
-; PPC64LE-NEXT:    not 4, 3
-; PPC64LE-NEXT:    clrlwi 3, 3, 30
-; PPC64LE-NEXT:    rlwinm 4, 4, 31, 31, 31
-; PPC64LE-NEXT:    xor 3, 3, 4
+; PPC64LE-NEXT:    clrlwi 4, 3, 30
+; PPC64LE-NEXT:    not 3, 3
+; PPC64LE-NEXT:    rlwinm 3, 3, 31, 31, 31
+; PPC64LE-NEXT:    xor 3, 4, 3
 ; PPC64LE-NEXT:    stw 3, -8(1)
 ; PPC64LE-NEXT:    stw 3, -4(1)
 ; PPC64LE-NEXT:    blr
@@ -55,10 +55,10 @@ define i32 @foo() {
 ; DM:       # %bb.0: # %entry
 ; DM-NEXT:    mffs 0
 ; DM-NEXT:    mffprd 3, 0
-; DM-NEXT:    not 4, 3
-; DM-NEXT:    clrlwi 3, 3, 30
-; DM-NEXT:    rlwinm 4, 4, 31, 31, 31
-; DM-NEXT:    xor 3, 3, 4
+; DM-NEXT:    clrlwi 4, 3, 30
+; DM-NEXT:    not 3, 3
+; DM-NEXT:    rlwinm 3, 3, 31, 31, 31
+; DM-NEXT:    xor 3, 4, 3
 ; DM-NEXT:    stw 3, -8(1)
 ; DM-NEXT:    stw 3, -4(1)
 ; DM-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
index 0a622fd68d6b3f6..72f8af9dfed5d4f 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
@@ -50,27 +50,16 @@ define i64 @rotl_i64_const_shift(i64 %x) {
 ; When first 2 operands match, it's a rotate (by variable amount).
 
 define i16 @rotl_i16(i16 %x, i16 %z) {
-; CHECK32-LABEL: rotl_i16:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    clrlwi 6, 4, 28
-; CHECK32-NEXT:    neg 4, 4
-; CHECK32-NEXT:    clrlwi 5, 3, 16
-; CHECK32-NEXT:    clrlwi 4, 4, 28
-; CHECK32-NEXT:    slw 3, 3, 6
-; CHECK32-NEXT:    srw 4, 5, 4
-; CHECK32-NEXT:    or 3, 3, 4
-; CHECK32-NEXT:    blr
-;
-; CHECK64-LABEL: rotl_i16:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    neg 5, 4
-; CHECK64-NEXT:    clrlwi 6, 3, 16
-; CHECK64-NEXT:    clrlwi 4, 4, 28
-; CHECK64-NEXT:    clrlwi 5, 5, 28
-; CHECK64-NEXT:    slw 3, 3, 4
-; CHECK64-NEXT:    srw 4, 6, 5
-; CHECK64-NEXT:    or 3, 3, 4
-; CHECK64-NEXT:    blr
+; CHECK-LABEL: rotl_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    clrlwi 6, 4, 28
+; CHECK-NEXT:    neg 4, 4
+; CHECK-NEXT:    clrlwi 5, 3, 16
+; CHECK-NEXT:    clrlwi 4, 4, 28
+; CHECK-NEXT:    slw 3, 3, 6
+; CHECK-NEXT:    srw 4, 5, 4
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    blr
   %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
   ret i16 %f
 }
@@ -210,27 +199,16 @@ define i32 @rotr_i32_const_shift(i32 %x) {
 ; When first 2 operands match, it's a rotate (by variable amount).
 
 define i16 @rotr_i16(i16 %x, i16 %z) {
-; CHECK32-LABEL: rotr_i16:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    clrlwi 6, 4, 28
-; CHECK32-NEXT:    neg 4, 4
-; CHECK32-NEXT:    clrlwi 5, 3, 16
-; CHECK32-NEXT:    clrlwi 4, 4, 28
-; CHECK32-NEXT:    srw 5, 5, 6
-; CHECK32-NEXT:    slw 3, 3, 4
-; CHECK32-NEXT:    or 3, 5, 3
-; CHECK32-NEXT:    blr
-;
-; CHECK64-LABEL: rotr_i16:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    neg 5, 4
-; CHECK64-NEXT:    clrlwi 6, 3, 16
-; CHECK64-NEXT:    clrlwi 4, 4, 28
-; CHECK64-NEXT:    clrlwi 5, 5, 28
-; CHECK64-NEXT:    srw 4, 6, 4
-; CHECK64-NEXT:    slw 3, 3, 5
-; CHECK64-NEXT:    or 3, 4, 3
-; CHECK64-NEXT:    blr
+; CHECK-LABEL: rotr_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    clrlwi 6, 4, 28
+; CHECK-NEXT:    neg 4, 4
+; CHECK-NEXT:    clrlwi 5, 3, 16
+; CHECK-NEXT:    clrlwi 4, 4, 28
+; CHECK-NEXT:    srw 5, 5, 6
+; CHECK-NEXT:    slw 3, 3, 4
+; CHECK-NEXT:    or 3, 5, 3
+; CHECK-NEXT:    blr
   %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
   ret i16 %f
 }

diff  --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
index 24fe892a5a5e4b1..c766c950f0a551c 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
@@ -19,23 +19,14 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 ; General case - all operands can be variables.
 
 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
-; CHECK32-LABEL: fshl_i32:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    clrlwi 5, 5, 27
-; CHECK32-NEXT:    slw 3, 3, 5
-; CHECK32-NEXT:    subfic 5, 5, 32
-; CHECK32-NEXT:    srw 4, 4, 5
-; CHECK32-NEXT:    or 3, 3, 4
-; CHECK32-NEXT:    blr
-;
-; CHECK64-LABEL: fshl_i32:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    clrlwi 5, 5, 27
-; CHECK64-NEXT:    subfic 6, 5, 32
-; CHECK64-NEXT:    slw 3, 3, 5
-; CHECK64-NEXT:    srw 4, 4, 6
-; CHECK64-NEXT:    or 3, 3, 4
-; CHECK64-NEXT:    blr
+; CHECK-LABEL: fshl_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    clrlwi 5, 5, 27
+; CHECK-NEXT:    slw 3, 3, 5
+; CHECK-NEXT:    subfic 5, 5, 32
+; CHECK-NEXT:    srw 4, 4, 5
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    blr
   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
   ret i32 %f
 }
@@ -89,9 +80,9 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
 ; CHECK64-LABEL: fshl_i64:
 ; CHECK64:       # %bb.0:
 ; CHECK64-NEXT:    clrlwi 5, 5, 26
-; CHECK64-NEXT:    subfic 6, 5, 64
 ; CHECK64-NEXT:    sld 3, 3, 5
-; CHECK64-NEXT:    srd 4, 4, 6
+; CHECK64-NEXT:    subfic 5, 5, 64
+; CHECK64-NEXT:    srd 4, 4, 5
 ; CHECK64-NEXT:    or 3, 3, 4
 ; CHECK64-NEXT:    blr
   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
@@ -213,16 +204,17 @@ define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
 ; CHECK64:       # %bb.0:
 ; CHECK64-NEXT:    andi. 8, 7, 64
 ; CHECK64-NEXT:    clrlwi 7, 7, 26
-; CHECK64-NEXT:    iseleq 5, 6, 5
 ; CHECK64-NEXT:    subfic 8, 7, 64
+; CHECK64-NEXT:    iseleq 5, 6, 5
 ; CHECK64-NEXT:    iseleq 6, 3, 6
 ; CHECK64-NEXT:    iseleq 3, 4, 3
-; CHECK64-NEXT:    srd 4, 5, 8
-; CHECK64-NEXT:    sld 5, 6, 7
+; CHECK64-NEXT:    srd 5, 5, 8
+; CHECK64-NEXT:    sld 9, 6, 7
 ; CHECK64-NEXT:    srd 6, 6, 8
-; CHECK64-NEXT:    sld 7, 3, 7
-; CHECK64-NEXT:    or 3, 5, 4
-; CHECK64-NEXT:    or 4, 7, 6
+; CHECK64-NEXT:    sld 3, 3, 7
+; CHECK64-NEXT:    or 5, 9, 5
+; CHECK64-NEXT:    or 4, 3, 6
+; CHECK64-NEXT:    mr 3, 5
 ; CHECK64-NEXT:    blr
   %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
   ret i128 %f
@@ -352,20 +344,20 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
 ;
 ; CHECK64-LABEL: fshl_i37:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    lis 6, 1771
-; CHECK64-NEXT:    clrldi 7, 5, 27
-; CHECK64-NEXT:    ori 6, 6, 15941
+; CHECK64-NEXT:    lis 7, 1771
+; CHECK64-NEXT:    clrldi 6, 5, 27
 ; CHECK64-NEXT:    sldi 4, 4, 27
-; CHECK64-NEXT:    rldic 6, 6, 32, 5
-; CHECK64-NEXT:    oris 6, 6, 12398
-; CHECK64-NEXT:    ori 6, 6, 46053
-; CHECK64-NEXT:    mulhdu 6, 7, 6
+; CHECK64-NEXT:    ori 7, 7, 15941
+; CHECK64-NEXT:    rldic 7, 7, 32, 5
+; CHECK64-NEXT:    oris 7, 7, 12398
+; CHECK64-NEXT:    ori 7, 7, 46053
+; CHECK64-NEXT:    mulhdu 6, 6, 7
 ; CHECK64-NEXT:    mulli 6, 6, 37
 ; CHECK64-NEXT:    sub 5, 5, 6
 ; CHECK64-NEXT:    clrlwi 5, 5, 26
-; CHECK64-NEXT:    subfic 6, 5, 64
 ; CHECK64-NEXT:    sld 3, 3, 5
-; CHECK64-NEXT:    srd 4, 4, 6
+; CHECK64-NEXT:    subfic 5, 5, 64
+; CHECK64-NEXT:    srd 4, 4, 5
 ; CHECK64-NEXT:    or 3, 3, 4
 ; CHECK64-NEXT:    blr
   %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
@@ -448,23 +440,14 @@ define i8 @fshl_i8_const_fold() {
 ; General case - all operands can be variables.
 
 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
-; CHECK32-LABEL: fshr_i32:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    clrlwi 5, 5, 27
-; CHECK32-NEXT:    srw 4, 4, 5
-; CHECK32-NEXT:    subfic 5, 5, 32
-; CHECK32-NEXT:    slw 3, 3, 5
-; CHECK32-NEXT:    or 3, 3, 4
-; CHECK32-NEXT:    blr
-;
-; CHECK64-LABEL: fshr_i32:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    clrlwi 5, 5, 27
-; CHECK64-NEXT:    subfic 6, 5, 32
-; CHECK64-NEXT:    srw 4, 4, 5
-; CHECK64-NEXT:    slw 3, 3, 6
-; CHECK64-NEXT:    or 3, 3, 4
-; CHECK64-NEXT:    blr
+; CHECK-LABEL: fshr_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    clrlwi 5, 5, 27
+; CHECK-NEXT:    srw 4, 4, 5
+; CHECK-NEXT:    subfic 5, 5, 32
+; CHECK-NEXT:    slw 3, 3, 5
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    blr
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
   ret i32 %f
 }
@@ -518,9 +501,9 @@ define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
 ; CHECK64-LABEL: fshr_i64:
 ; CHECK64:       # %bb.0:
 ; CHECK64-NEXT:    clrlwi 5, 5, 26
-; CHECK64-NEXT:    subfic 6, 5, 64
 ; CHECK64-NEXT:    srd 4, 4, 5
-; CHECK64-NEXT:    sld 3, 3, 6
+; CHECK64-NEXT:    subfic 5, 5, 64
+; CHECK64-NEXT:    sld 3, 3, 5
 ; CHECK64-NEXT:    or 3, 3, 4
 ; CHECK64-NEXT:    blr
   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
@@ -648,21 +631,21 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
 ;
 ; CHECK64-LABEL: fshr_i37:
 ; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    lis 6, 1771
-; CHECK64-NEXT:    clrldi 7, 5, 27
-; CHECK64-NEXT:    ori 6, 6, 15941
+; CHECK64-NEXT:    lis 7, 1771
+; CHECK64-NEXT:    clrldi 6, 5, 27
 ; CHECK64-NEXT:    sldi 4, 4, 27
-; CHECK64-NEXT:    rldic 6, 6, 32, 5
-; CHECK64-NEXT:    oris 6, 6, 12398
-; CHECK64-NEXT:    ori 6, 6, 46053
-; CHECK64-NEXT:    mulhdu 6, 7, 6
+; CHECK64-NEXT:    ori 7, 7, 15941
+; CHECK64-NEXT:    rldic 7, 7, 32, 5
+; CHECK64-NEXT:    oris 7, 7, 12398
+; CHECK64-NEXT:    ori 7, 7, 46053
+; CHECK64-NEXT:    mulhdu 6, 6, 7
 ; CHECK64-NEXT:    mulli 6, 6, 37
 ; CHECK64-NEXT:    sub 5, 5, 6
 ; CHECK64-NEXT:    addi 5, 5, 27
 ; CHECK64-NEXT:    clrlwi 5, 5, 26
-; CHECK64-NEXT:    subfic 6, 5, 64
 ; CHECK64-NEXT:    srd 4, 4, 5
-; CHECK64-NEXT:    sld 3, 3, 6
+; CHECK64-NEXT:    subfic 5, 5, 64
+; CHECK64-NEXT:    sld 3, 3, 5
 ; CHECK64-NEXT:    or 3, 3, 4
 ; CHECK64-NEXT:    blr
   %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)

diff  --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
index b838f8e935184f5..13f70f420400bf1 100644
--- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
+++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
@@ -641,8 +641,8 @@ define <4 x float> @test_extend32_vec4(ptr %p) #0 {
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    stdu r1, -112(r1)
-; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    li r4, 48
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
 ; P8-NEXT:    mr r30, r3
 ; P8-NEXT:    lhz r3, 6(r3)
@@ -665,19 +665,19 @@ define <4 x float> @test_extend32_vec4(ptr %p) #0 {
 ; P8-NEXT:    xxlor vs61, f1, f1
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    xxmrghd vs0, vs63, vs62
 ; P8-NEXT:    li r3, 80
+; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8-NEXT:    xxmrghd vs0, vs61, vs1
+; P8-NEXT:    xxmrghd vs1, vs63, vs62
 ; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
-; P8-NEXT:    xxmrghd vs1, vs61, vs1
 ; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 64
+; P8-NEXT:    xvcvdpsp vs34, vs0
+; P8-NEXT:    xvcvdpsp vs35, vs1
 ; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 48
-; P8-NEXT:    xvcvdpsp vs34, vs0
 ; P8-NEXT:    lxvd2x vs61, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    xvcvdpsp vs35, vs1
-; P8-NEXT:    vmrgew v2, v2, v3
+; P8-NEXT:    vmrgew v2, v3, v2
 ; P8-NEXT:    addi r1, r1, 112
 ; P8-NEXT:    ld r0, 16(r1)
 ; P8-NEXT:    mtlr r0
@@ -750,8 +750,8 @@ define <4 x double> @test_extend64_vec4(ptr %p) #0 {
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    stdu r1, -112(r1)
-; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    li r4, 48
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
 ; P8-NEXT:    mr r30, r3
 ; P8-NEXT:    lhz r3, 6(r3)
@@ -1005,22 +1005,22 @@ define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
 ; P8-NEXT:    stdu r1, -128(r1)
 ; P8-NEXT:    li r3, 48
 ; P8-NEXT:    std r0, 144(r1)
-; P8-NEXT:    xxswapd vs1, vs34
 ; P8-NEXT:    std r27, 88(r1) # 8-byte Folded Spill
+; P8-NEXT:    xxswapd vs1, vs34
 ; P8-NEXT:    std r28, 96(r1) # 8-byte Folded Spill
 ; P8-NEXT:    std r29, 104(r1) # 8-byte Folded Spill
 ; P8-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; P8-NEXT:    stxvd2x vs62, r1, r3 # 16-byte Folded Spill
-; P8-NEXT:    li r3, 64
 ; P8-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
 ; P8-NEXT:    mr r30, r7
+; P8-NEXT:    stxvd2x vs62, r1, r3 # 16-byte Folded Spill
+; P8-NEXT:    li r3, 64
 ; P8-NEXT:    vmr v30, v2
 ; P8-NEXT:    stxvd2x vs63, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    vmr v31, v3
 ; P8-NEXT:    bl __truncdfhf2
 ; P8-NEXT:    nop
-; P8-NEXT:    xxswapd vs1, vs63
 ; P8-NEXT:    mr r29, r3
+; P8-NEXT:    xxswapd vs1, vs63
 ; P8-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; P8-NEXT:    bl __truncdfhf2
 ; P8-NEXT:    nop
@@ -1230,14 +1230,15 @@ define half @PR40273(half) #0 {
 ; P8-NEXT:    clrldi r3, r3, 48
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
-; P8-NEXT:    xxlxor f0, f0, f0
-; P8-NEXT:    fcmpu cr0, f1, f0
+; P8-NEXT:    fmr f0, f1
+; P8-NEXT:    xxlxor f1, f1, f1
+; P8-NEXT:    fcmpu cr0, f0, f1
 ; P8-NEXT:    beq cr0, .LBB20_2
 ; P8-NEXT:  # %bb.1:
 ; P8-NEXT:    vspltisw v2, 1
-; P8-NEXT:    xvcvsxwdp vs0, vs34
+; P8-NEXT:    xvcvsxwdp vs1, vs34
 ; P8-NEXT:  .LBB20_2:
-; P8-NEXT:    fmr f1, f0
+; P8-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; P8-NEXT:    addi r1, r1, 32
 ; P8-NEXT:    ld r0, 16(r1)
 ; P8-NEXT:    mtlr r0

diff  --git a/llvm/test/CodeGen/PowerPC/huge-frame-call.ll b/llvm/test/CodeGen/PowerPC/huge-frame-call.ll
index 1ca4e1f6586a841..acbd81eecd8b1ff 100644
--- a/llvm/test/CodeGen/PowerPC/huge-frame-call.ll
+++ b/llvm/test/CodeGen/PowerPC/huge-frame-call.ll
@@ -28,14 +28,14 @@ define dso_local signext i32 @main() nounwind {
 ; CHECK-LE-NEXT:    bl pluto
 ; CHECK-LE-NEXT:    nop
 ; CHECK-LE-NEXT:    addis 3, 2, global.1 at toc@ha
-; CHECK-LE-NEXT:    li 4, 0
+; CHECK-LE-NEXT:    li 4, 257
 ; CHECK-LE-NEXT:    li 7, 0
 ; CHECK-LE-NEXT:    li 8, 0
 ; CHECK-LE-NEXT:    li 9, 0
 ; CHECK-LE-NEXT:    addi 5, 3, global.1 at toc@l
-; CHECK-LE-NEXT:    ori 6, 4, 32768
+; CHECK-LE-NEXT:    li 3, 0
+; CHECK-LE-NEXT:    ori 6, 3, 32768
 ; CHECK-LE-NEXT:    li 3, 6
-; CHECK-LE-NEXT:    li 4, 257
 ; CHECK-LE-NEXT:    bl snork
 ; CHECK-LE-NEXT:    nop
 ; CHECK-LE-NEXT:    mr 30, 3

diff  --git a/llvm/test/CodeGen/PowerPC/huge-frame-size.ll b/llvm/test/CodeGen/PowerPC/huge-frame-size.ll
index a3490ca9f48e734..f1039df6f549ae6 100644
--- a/llvm/test/CodeGen/PowerPC/huge-frame-size.ll
+++ b/llvm/test/CodeGen/PowerPC/huge-frame-size.ll
@@ -20,13 +20,13 @@ define void @foo(i8 %x) {
 ; CHECK-LE-NEXT:    stdux 1, 1, 0
 ; CHECK-LE-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LE-NEXT:    li 4, 1
-; CHECK-LE-NEXT:    li 5, -1
-; CHECK-LE-NEXT:    addi 6, 1, 32
+; CHECK-LE-NEXT:    addi 5, 1, 32
 ; CHECK-LE-NEXT:    stb 3, 32(1)
 ; CHECK-LE-NEXT:    rldic 4, 4, 31, 32
-; CHECK-LE-NEXT:    rldic 5, 5, 0, 32
-; CHECK-LE-NEXT:    stbx 3, 6, 4
-; CHECK-LE-NEXT:    stbx 3, 6, 5
+; CHECK-LE-NEXT:    stbx 3, 5, 4
+; CHECK-LE-NEXT:    li 4, -1
+; CHECK-LE-NEXT:    rldic 4, 4, 0, 32
+; CHECK-LE-NEXT:    stbx 3, 5, 4
 ; CHECK-LE-NEXT:    ld 1, 0(1)
 ; CHECK-LE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/int128_ldst.ll b/llvm/test/CodeGen/PowerPC/int128_ldst.ll
index ef6a82334e28bbe..7f5f6a181c1b01c 100644
--- a/llvm/test/CodeGen/PowerPC/int128_ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/int128_ldst.ll
@@ -7,19 +7,19 @@
 ; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9  \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P9
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9  \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P9
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8  \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P8
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8  \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P8
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
 ; RUN:   -mcpu=pwr8  \
-; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10,CHECK-P8
+; RUN:   < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PREP10
 
 ; Function Attrs: norecurse nounwind readonly uwtable willreturn
 define dso_local i128 @ld_0___int128___int128(i64 %ptr) {
@@ -44,23 +44,14 @@ define dso_local i128 @ld_unalign16___int128___int128(ptr nocapture readonly %pt
 ; CHECK-P10-NEXT:    mr 3, 5
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_unalign16___int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li 4, 1
-; CHECK-P9-NEXT:    ldx 5, 3, 4
-; CHECK-P9-NEXT:    li 4, 9
-; CHECK-P9-NEXT:    ldx 4, 3, 4
-; CHECK-P9-NEXT:    mr 3, 5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_unalign16___int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 4, 1
-; CHECK-P8-NEXT:    li 6, 9
-; CHECK-P8-NEXT:    ldx 5, 3, 4
-; CHECK-P8-NEXT:    ldx 4, 3, 6
-; CHECK-P8-NEXT:    mr 3, 5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_unalign16___int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li 4, 1
+; CHECK-PREP10-NEXT:    ldx 5, 3, 4
+; CHECK-PREP10-NEXT:    li 4, 9
+; CHECK-PREP10-NEXT:    ldx 4, 3, 4
+; CHECK-PREP10-NEXT:    mr 3, 5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i8, ptr %ptr, i64 1
   %0 = load i128, ptr %add.ptr, align 16
@@ -256,23 +247,14 @@ define dso_local i128 @ld_disjoint_unalign16___int128___int128(i64 %ptr) {
 ; CHECK-P10-NEXT:    pld 4, 14(4), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_unalign16___int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    rldicr 4, 3, 0, 51
-; CHECK-P9-NEXT:    li 3, 6
-; CHECK-P9-NEXT:    li 5, 14
-; CHECK-P9-NEXT:    ldx 3, 4, 3
-; CHECK-P9-NEXT:    ldx 4, 4, 5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_unalign16___int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 4, 6
-; CHECK-P8-NEXT:    rldicr 5, 3, 0, 51
-; CHECK-P8-NEXT:    li 6, 14
-; CHECK-P8-NEXT:    ldx 3, 5, 4
-; CHECK-P8-NEXT:    ldx 4, 5, 6
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_unalign16___int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    rldicr 4, 3, 0, 51
+; CHECK-PREP10-NEXT:    li 3, 6
+; CHECK-PREP10-NEXT:    li 5, 14
+; CHECK-PREP10-NEXT:    ldx 3, 4, 3
+; CHECK-PREP10-NEXT:    ldx 4, 4, 5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -4096
   %or = or i64 %and, 6
@@ -322,25 +304,15 @@ define dso_local i128 @ld_disjoint_unalign32___int128___int128(i64 %ptr) {
 ; CHECK-P10-NEXT:    pld 4, 100007(4), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_unalign32___int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis 5, 1
-; CHECK-P9-NEXT:    rldicr 4, 3, 0, 43
-; CHECK-P9-NEXT:    ori 3, 5, 34463
-; CHECK-P9-NEXT:    ori 5, 5, 34471
-; CHECK-P9-NEXT:    ldx 3, 4, 3
-; CHECK-P9-NEXT:    ldx 4, 4, 5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_unalign32___int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis 4, 1
-; CHECK-P8-NEXT:    rldicr 5, 3, 0, 43
-; CHECK-P8-NEXT:    ori 3, 4, 34463
-; CHECK-P8-NEXT:    ori 4, 4, 34471
-; CHECK-P8-NEXT:    ldx 3, 5, 3
-; CHECK-P8-NEXT:    ldx 4, 5, 4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_unalign32___int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis 5, 1
+; CHECK-PREP10-NEXT:    rldicr 4, 3, 0, 43
+; CHECK-PREP10-NEXT:    ori 3, 5, 34463
+; CHECK-PREP10-NEXT:    ori 5, 5, 34471
+; CHECK-PREP10-NEXT:    ldx 3, 4, 3
+; CHECK-PREP10-NEXT:    ldx 4, 4, 5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1048576
   %or = or i64 %and, 99999
@@ -420,29 +392,17 @@ define dso_local i128 @ld_disjoint_unalign64___int128___int128(i64 %ptr) {
 ; CHECK-P10-NEXT:    ldx 4, 4, 6
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_unalign64___int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    rldicr 4, 3, 0, 23
-; CHECK-P9-NEXT:    li 3, 29
-; CHECK-P9-NEXT:    rldic 3, 3, 35, 24
-; CHECK-P9-NEXT:    oris 5, 3, 54437
-; CHECK-P9-NEXT:    ori 3, 5, 4097
-; CHECK-P9-NEXT:    ori 5, 5, 4105
-; CHECK-P9-NEXT:    ldx 3, 4, 3
-; CHECK-P9-NEXT:    ldx 4, 4, 5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_unalign64___int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 4, 29
-; CHECK-P8-NEXT:    rldicr 5, 3, 0, 23
-; CHECK-P8-NEXT:    rldic 4, 4, 35, 24
-; CHECK-P8-NEXT:    oris 3, 4, 54437
-; CHECK-P8-NEXT:    ori 4, 3, 4097
-; CHECK-P8-NEXT:    ori 6, 3, 4105
-; CHECK-P8-NEXT:    ldx 3, 5, 4
-; CHECK-P8-NEXT:    ldx 4, 5, 6
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_unalign64___int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    rldicr 4, 3, 0, 23
+; CHECK-PREP10-NEXT:    li 3, 29
+; CHECK-PREP10-NEXT:    rldic 3, 3, 35, 24
+; CHECK-PREP10-NEXT:    oris 5, 3, 54437
+; CHECK-PREP10-NEXT:    ori 3, 5, 4097
+; CHECK-PREP10-NEXT:    ori 5, 5, 4105
+; CHECK-PREP10-NEXT:    ldx 3, 4, 3
+; CHECK-PREP10-NEXT:    ldx 4, 4, 5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
   %or = or i64 %and, 1000000000001
@@ -465,33 +425,19 @@ define dso_local i128 @ld_disjoint_align64___int128___int128(i64 %ptr) {
 ; CHECK-P10-NEXT:    ldx 4, 4, 6
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align64___int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li 5, 29
-; CHECK-P9-NEXT:    rldicr 4, 3, 0, 23
-; CHECK-P9-NEXT:    lis 3, 3725
-; CHECK-P9-NEXT:    rldic 5, 5, 35, 24
-; CHECK-P9-NEXT:    ori 3, 3, 19025
-; CHECK-P9-NEXT:    oris 5, 5, 54437
-; CHECK-P9-NEXT:    rldic 3, 3, 12, 24
-; CHECK-P9-NEXT:    ori 5, 5, 4104
-; CHECK-P9-NEXT:    ldx 3, 4, 3
-; CHECK-P9-NEXT:    ldx 4, 4, 5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align64___int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 4, 29
-; CHECK-P8-NEXT:    rldicr 5, 3, 0, 23
-; CHECK-P8-NEXT:    lis 3, 3725
-; CHECK-P8-NEXT:    rldic 4, 4, 35, 24
-; CHECK-P8-NEXT:    ori 3, 3, 19025
-; CHECK-P8-NEXT:    oris 4, 4, 54437
-; CHECK-P8-NEXT:    rldic 3, 3, 12, 24
-; CHECK-P8-NEXT:    ori 4, 4, 4104
-; CHECK-P8-NEXT:    ldx 3, 5, 3
-; CHECK-P8-NEXT:    ldx 4, 5, 4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align64___int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li 5, 29
+; CHECK-PREP10-NEXT:    rldicr 4, 3, 0, 23
+; CHECK-PREP10-NEXT:    lis 3, 3725
+; CHECK-PREP10-NEXT:    rldic 5, 5, 35, 24
+; CHECK-PREP10-NEXT:    ori 3, 3, 19025
+; CHECK-PREP10-NEXT:    oris 5, 5, 54437
+; CHECK-PREP10-NEXT:    rldic 3, 3, 12, 24
+; CHECK-PREP10-NEXT:    ori 5, 5, 4104
+; CHECK-PREP10-NEXT:    ldx 3, 4, 3
+; CHECK-PREP10-NEXT:    ldx 4, 4, 5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
   %or = or i64 %and, 1000000000000
@@ -536,23 +482,14 @@ define dso_local i128 @ld_cst_unalign32___int128___int128() {
 ; CHECK-P10-NEXT:    ld 4, 0(4)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_cst_unalign32___int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis 4, 1
-; CHECK-P9-NEXT:    ori 3, 4, 34463
-; CHECK-P9-NEXT:    ori 4, 4, 34471
-; CHECK-P9-NEXT:    ld 3, 0(3)
-; CHECK-P9-NEXT:    ld 4, 0(4)
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_cst_unalign32___int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis 3, 1
-; CHECK-P8-NEXT:    ori 4, 3, 34463
-; CHECK-P8-NEXT:    ori 5, 3, 34471
-; CHECK-P8-NEXT:    ld 3, 0(4)
-; CHECK-P8-NEXT:    ld 4, 0(5)
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_cst_unalign32___int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis 4, 1
+; CHECK-PREP10-NEXT:    ori 3, 4, 34463
+; CHECK-PREP10-NEXT:    ori 4, 4, 34471
+; CHECK-PREP10-NEXT:    ld 3, 0(3)
+; CHECK-PREP10-NEXT:    ld 4, 0(4)
+; CHECK-PREP10-NEXT:    blr
 entry:
   %0 = load i128, ptr inttoptr (i64 99999 to ptr), align 16
   ret i128 %0
@@ -584,27 +521,16 @@ define dso_local i128 @ld_cst_unalign64___int128___int128() {
 ; CHECK-P10-NEXT:    ld 4, 0(5)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_cst_unalign64___int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li 3, 29
-; CHECK-P9-NEXT:    rldic 3, 3, 35, 24
-; CHECK-P9-NEXT:    oris 4, 3, 54437
-; CHECK-P9-NEXT:    ori 3, 4, 4097
-; CHECK-P9-NEXT:    ori 4, 4, 4105
-; CHECK-P9-NEXT:    ld 3, 0(3)
-; CHECK-P9-NEXT:    ld 4, 0(4)
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_cst_unalign64___int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 3, 29
-; CHECK-P8-NEXT:    rldic 3, 3, 35, 24
-; CHECK-P8-NEXT:    oris 3, 3, 54437
-; CHECK-P8-NEXT:    ori 4, 3, 4097
-; CHECK-P8-NEXT:    ori 5, 3, 4105
-; CHECK-P8-NEXT:    ld 3, 0(4)
-; CHECK-P8-NEXT:    ld 4, 0(5)
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_cst_unalign64___int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li 3, 29
+; CHECK-PREP10-NEXT:    rldic 3, 3, 35, 24
+; CHECK-PREP10-NEXT:    oris 4, 3, 54437
+; CHECK-PREP10-NEXT:    ori 3, 4, 4097
+; CHECK-PREP10-NEXT:    ori 4, 4, 4105
+; CHECK-PREP10-NEXT:    ld 3, 0(3)
+; CHECK-PREP10-NEXT:    ld 4, 0(4)
+; CHECK-PREP10-NEXT:    blr
 entry:
   %0 = load i128, ptr inttoptr (i64 1000000000001 to ptr), align 16
   ret i128 %0
@@ -623,31 +549,18 @@ define dso_local i128 @ld_cst_align64___int128___int128() {
 ; CHECK-P10-NEXT:    ld 3, 0(3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_cst_align64___int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li 4, 29
-; CHECK-P9-NEXT:    lis 3, 3725
-; CHECK-P9-NEXT:    rldic 4, 4, 35, 24
-; CHECK-P9-NEXT:    ori 3, 3, 19025
-; CHECK-P9-NEXT:    oris 4, 4, 54437
-; CHECK-P9-NEXT:    rldic 3, 3, 12, 24
-; CHECK-P9-NEXT:    ori 4, 4, 4104
-; CHECK-P9-NEXT:    ld 3, 0(3)
-; CHECK-P9-NEXT:    ld 4, 0(4)
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_cst_align64___int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 3, 29
-; CHECK-P8-NEXT:    lis 4, 3725
-; CHECK-P8-NEXT:    rldic 3, 3, 35, 24
-; CHECK-P8-NEXT:    ori 4, 4, 19025
-; CHECK-P8-NEXT:    oris 3, 3, 54437
-; CHECK-P8-NEXT:    rldic 4, 4, 12, 24
-; CHECK-P8-NEXT:    ori 5, 3, 4104
-; CHECK-P8-NEXT:    ld 3, 0(4)
-; CHECK-P8-NEXT:    ld 4, 0(5)
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_cst_align64___int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li 4, 29
+; CHECK-PREP10-NEXT:    lis 3, 3725
+; CHECK-PREP10-NEXT:    rldic 4, 4, 35, 24
+; CHECK-PREP10-NEXT:    ori 3, 3, 19025
+; CHECK-PREP10-NEXT:    oris 4, 4, 54437
+; CHECK-PREP10-NEXT:    rldic 3, 3, 12, 24
+; CHECK-PREP10-NEXT:    ori 4, 4, 4104
+; CHECK-PREP10-NEXT:    ld 3, 0(3)
+; CHECK-PREP10-NEXT:    ld 4, 0(4)
+; CHECK-PREP10-NEXT:    blr
 entry:
   %0 = load i128, ptr inttoptr (i64 1000000000000 to ptr), align 4096
   ret i128 %0
@@ -674,21 +587,13 @@ define dso_local void @st_unalign16__int128___int128(ptr nocapture %ptr, i128 %s
 ; CHECK-P10-NEXT:    pstd 4, 1(3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_unalign16__int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li 6, 9
-; CHECK-P9-NEXT:    stdx 5, 3, 6
-; CHECK-P9-NEXT:    li 5, 1
-; CHECK-P9-NEXT:    stdx 4, 3, 5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_unalign16__int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 6, 9
-; CHECK-P8-NEXT:    li 7, 1
-; CHECK-P8-NEXT:    stdx 5, 3, 6
-; CHECK-P8-NEXT:    stdx 4, 3, 7
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_unalign16__int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li 6, 9
+; CHECK-PREP10-NEXT:    stdx 5, 3, 6
+; CHECK-PREP10-NEXT:    li 5, 1
+; CHECK-PREP10-NEXT:    stdx 4, 3, 5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i8, ptr %ptr, i64 1
   store i128 %str, ptr %add.ptr, align 16
@@ -874,23 +779,14 @@ define dso_local void @st_disjoint_unalign16__int128___int128(i64 %ptr, i128 %st
 ; CHECK-P10-NEXT:    pstd 4, 6(3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_unalign16__int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    rldicr 3, 3, 0, 51
-; CHECK-P9-NEXT:    li 6, 14
-; CHECK-P9-NEXT:    stdx 5, 3, 6
-; CHECK-P9-NEXT:    li 5, 6
-; CHECK-P9-NEXT:    stdx 4, 3, 5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_unalign16__int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 6, 14
-; CHECK-P8-NEXT:    rldicr 3, 3, 0, 51
-; CHECK-P8-NEXT:    li 7, 6
-; CHECK-P8-NEXT:    stdx 5, 3, 6
-; CHECK-P8-NEXT:    stdx 4, 3, 7
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_unalign16__int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    rldicr 3, 3, 0, 51
+; CHECK-PREP10-NEXT:    li 6, 14
+; CHECK-PREP10-NEXT:    stdx 5, 3, 6
+; CHECK-PREP10-NEXT:    li 5, 6
+; CHECK-PREP10-NEXT:    stdx 4, 3, 5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -4096
   %or = or i64 %and, 6
@@ -940,25 +836,15 @@ define dso_local void @st_disjoint_unalign32__int128___int128(i64 %ptr, i128 %st
 ; CHECK-P10-NEXT:    pstd 4, 99999(3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_unalign32__int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis 6, 1
-; CHECK-P9-NEXT:    rldicr 3, 3, 0, 43
-; CHECK-P9-NEXT:    ori 7, 6, 34471
-; CHECK-P9-NEXT:    stdx 5, 3, 7
-; CHECK-P9-NEXT:    ori 5, 6, 34463
-; CHECK-P9-NEXT:    stdx 4, 3, 5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_unalign32__int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis 6, 1
-; CHECK-P8-NEXT:    rldicr 3, 3, 0, 43
-; CHECK-P8-NEXT:    ori 7, 6, 34471
-; CHECK-P8-NEXT:    ori 6, 6, 34463
-; CHECK-P8-NEXT:    stdx 5, 3, 7
-; CHECK-P8-NEXT:    stdx 4, 3, 6
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_unalign32__int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis 6, 1
+; CHECK-PREP10-NEXT:    rldicr 3, 3, 0, 43
+; CHECK-PREP10-NEXT:    ori 7, 6, 34471
+; CHECK-PREP10-NEXT:    stdx 5, 3, 7
+; CHECK-PREP10-NEXT:    ori 5, 6, 34463
+; CHECK-PREP10-NEXT:    stdx 4, 3, 5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1048576
   %or = or i64 %and, 99999
@@ -977,27 +863,16 @@ define dso_local void @st_disjoint_align32__int128___int128(i64 %ptr, i128 %str)
 ; CHECK-P10-NEXT:    pstd 4, 999990000(3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32__int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis 6, -15264
-; CHECK-P9-NEXT:    and 3, 3, 6
-; CHECK-P9-NEXT:    lis 6, 15258
-; CHECK-P9-NEXT:    ori 7, 6, 41720
-; CHECK-P9-NEXT:    stdx 5, 3, 7
-; CHECK-P9-NEXT:    ori 5, 6, 41712
-; CHECK-P9-NEXT:    stdx 4, 3, 5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32__int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis 6, -15264
-; CHECK-P8-NEXT:    lis 7, 15258
-; CHECK-P8-NEXT:    and 3, 3, 6
-; CHECK-P8-NEXT:    ori 6, 7, 41720
-; CHECK-P8-NEXT:    ori 7, 7, 41712
-; CHECK-P8-NEXT:    stdx 5, 3, 6
-; CHECK-P8-NEXT:    stdx 4, 3, 7
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32__int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis 6, -15264
+; CHECK-PREP10-NEXT:    and 3, 3, 6
+; CHECK-PREP10-NEXT:    lis 6, 15258
+; CHECK-PREP10-NEXT:    ori 7, 6, 41720
+; CHECK-PREP10-NEXT:    stdx 5, 3, 7
+; CHECK-PREP10-NEXT:    ori 5, 6, 41712
+; CHECK-PREP10-NEXT:    stdx 4, 3, 5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1049,29 +924,17 @@ define dso_local void @st_disjoint_unalign64__int128___int128(i64 %ptr, i128 %st
 ; CHECK-P10-NEXT:    stdx 4, 3, 5
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_unalign64__int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li 6, 29
-; CHECK-P9-NEXT:    rldicr 3, 3, 0, 23
-; CHECK-P9-NEXT:    rldic 6, 6, 35, 24
-; CHECK-P9-NEXT:    oris 6, 6, 54437
-; CHECK-P9-NEXT:    ori 7, 6, 4105
-; CHECK-P9-NEXT:    stdx 5, 3, 7
-; CHECK-P9-NEXT:    ori 5, 6, 4097
-; CHECK-P9-NEXT:    stdx 4, 3, 5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_unalign64__int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 6, 29
-; CHECK-P8-NEXT:    rldicr 3, 3, 0, 23
-; CHECK-P8-NEXT:    rldic 6, 6, 35, 24
-; CHECK-P8-NEXT:    oris 6, 6, 54437
-; CHECK-P8-NEXT:    ori 7, 6, 4105
-; CHECK-P8-NEXT:    ori 6, 6, 4097
-; CHECK-P8-NEXT:    stdx 5, 3, 7
-; CHECK-P8-NEXT:    stdx 4, 3, 6
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_unalign64__int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li 6, 29
+; CHECK-PREP10-NEXT:    rldicr 3, 3, 0, 23
+; CHECK-PREP10-NEXT:    rldic 6, 6, 35, 24
+; CHECK-PREP10-NEXT:    oris 6, 6, 54437
+; CHECK-PREP10-NEXT:    ori 7, 6, 4105
+; CHECK-PREP10-NEXT:    stdx 5, 3, 7
+; CHECK-PREP10-NEXT:    ori 5, 6, 4097
+; CHECK-PREP10-NEXT:    stdx 4, 3, 5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
   %or = or i64 %and, 1000000000001
@@ -1094,33 +957,19 @@ define dso_local void @st_disjoint_align64__int128___int128(i64 %ptr, i128 %str)
 ; CHECK-P10-NEXT:    stdx 4, 3, 5
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align64__int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis 6, 3725
-; CHECK-P9-NEXT:    rldicr 3, 3, 0, 23
-; CHECK-P9-NEXT:    ori 6, 6, 19025
-; CHECK-P9-NEXT:    rldic 6, 6, 12, 24
-; CHECK-P9-NEXT:    stdx 4, 3, 6
-; CHECK-P9-NEXT:    li 4, 29
-; CHECK-P9-NEXT:    rldic 4, 4, 35, 24
-; CHECK-P9-NEXT:    oris 4, 4, 54437
-; CHECK-P9-NEXT:    ori 4, 4, 4104
-; CHECK-P9-NEXT:    stdx 5, 3, 4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align64__int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 6, 29
-; CHECK-P8-NEXT:    lis 7, 3725
-; CHECK-P8-NEXT:    rldic 6, 6, 35, 24
-; CHECK-P8-NEXT:    ori 7, 7, 19025
-; CHECK-P8-NEXT:    oris 6, 6, 54437
-; CHECK-P8-NEXT:    rldicr 3, 3, 0, 23
-; CHECK-P8-NEXT:    rldic 7, 7, 12, 24
-; CHECK-P8-NEXT:    ori 6, 6, 4104
-; CHECK-P8-NEXT:    stdx 4, 3, 7
-; CHECK-P8-NEXT:    stdx 5, 3, 6
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align64__int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis 6, 3725
+; CHECK-PREP10-NEXT:    rldicr 3, 3, 0, 23
+; CHECK-PREP10-NEXT:    ori 6, 6, 19025
+; CHECK-PREP10-NEXT:    rldic 6, 6, 12, 24
+; CHECK-PREP10-NEXT:    stdx 4, 3, 6
+; CHECK-PREP10-NEXT:    li 4, 29
+; CHECK-PREP10-NEXT:    rldic 4, 4, 35, 24
+; CHECK-PREP10-NEXT:    oris 4, 4, 54437
+; CHECK-PREP10-NEXT:    ori 4, 4, 4104
+; CHECK-PREP10-NEXT:    stdx 5, 3, 4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
   %or = or i64 %and, 1000000000000
@@ -1131,29 +980,13 @@ entry:
 
 ; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
 define dso_local void @st_cst_unalign16__int128___int128(i128 %str) {
-; CHECK-P10-LABEL: st_cst_unalign16__int128___int128:
-; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    li 5, 263
-; CHECK-P10-NEXT:    std 4, 0(5)
-; CHECK-P10-NEXT:    li 4, 255
-; CHECK-P10-NEXT:    std 3, 0(4)
-; CHECK-P10-NEXT:    blr
-;
-; CHECK-P9-LABEL: st_cst_unalign16__int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li 5, 263
-; CHECK-P9-NEXT:    std 4, 0(5)
-; CHECK-P9-NEXT:    li 4, 255
-; CHECK-P9-NEXT:    std 3, 0(4)
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_cst_unalign16__int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 5, 263
-; CHECK-P8-NEXT:    li 6, 255
-; CHECK-P8-NEXT:    std 4, 0(5)
-; CHECK-P8-NEXT:    std 3, 0(6)
-; CHECK-P8-NEXT:    blr
+; CHECK-LABEL: st_cst_unalign16__int128___int128:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 5, 263
+; CHECK-NEXT:    std 4, 0(5)
+; CHECK-NEXT:    li 4, 255
+; CHECK-NEXT:    std 3, 0(4)
+; CHECK-NEXT:    blr
 entry:
   store i128 %str, ptr inttoptr (i64 255 to ptr), align 16
   ret void
@@ -1181,23 +1014,14 @@ define dso_local void @st_cst_unalign32__int128___int128(i128 %str) {
 ; CHECK-P10-NEXT:    std 3, 0(4)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_cst_unalign32__int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis 5, 1
-; CHECK-P9-NEXT:    ori 6, 5, 34471
-; CHECK-P9-NEXT:    std 4, 0(6)
-; CHECK-P9-NEXT:    ori 4, 5, 34463
-; CHECK-P9-NEXT:    std 3, 0(4)
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_cst_unalign32__int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis 5, 1
-; CHECK-P8-NEXT:    ori 6, 5, 34471
-; CHECK-P8-NEXT:    ori 5, 5, 34463
-; CHECK-P8-NEXT:    std 4, 0(6)
-; CHECK-P8-NEXT:    std 3, 0(5)
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_cst_unalign32__int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis 5, 1
+; CHECK-PREP10-NEXT:    ori 6, 5, 34471
+; CHECK-PREP10-NEXT:    std 4, 0(6)
+; CHECK-PREP10-NEXT:    ori 4, 5, 34463
+; CHECK-PREP10-NEXT:    std 3, 0(4)
+; CHECK-PREP10-NEXT:    blr
 entry:
   store i128 %str, ptr inttoptr (i64 99999 to ptr), align 16
   ret void
@@ -1229,27 +1053,16 @@ define dso_local void @st_cst_unalign64__int128___int128(i128 %str) {
 ; CHECK-P10-NEXT:    std 3, 0(4)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_cst_unalign64__int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li 5, 29
-; CHECK-P9-NEXT:    rldic 5, 5, 35, 24
-; CHECK-P9-NEXT:    oris 5, 5, 54437
-; CHECK-P9-NEXT:    ori 6, 5, 4105
-; CHECK-P9-NEXT:    std 4, 0(6)
-; CHECK-P9-NEXT:    ori 4, 5, 4097
-; CHECK-P9-NEXT:    std 3, 0(4)
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_cst_unalign64__int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 5, 29
-; CHECK-P8-NEXT:    rldic 5, 5, 35, 24
-; CHECK-P8-NEXT:    oris 5, 5, 54437
-; CHECK-P8-NEXT:    ori 6, 5, 4105
-; CHECK-P8-NEXT:    ori 5, 5, 4097
-; CHECK-P8-NEXT:    std 4, 0(6)
-; CHECK-P8-NEXT:    std 3, 0(5)
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_cst_unalign64__int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li 5, 29
+; CHECK-PREP10-NEXT:    rldic 5, 5, 35, 24
+; CHECK-PREP10-NEXT:    oris 5, 5, 54437
+; CHECK-PREP10-NEXT:    ori 6, 5, 4105
+; CHECK-PREP10-NEXT:    std 4, 0(6)
+; CHECK-PREP10-NEXT:    ori 4, 5, 4097
+; CHECK-PREP10-NEXT:    std 3, 0(4)
+; CHECK-PREP10-NEXT:    blr
 entry:
   store i128 %str, ptr inttoptr (i64 1000000000001 to ptr), align 16
   ret void
@@ -1268,31 +1081,18 @@ define dso_local void @st_cst_align64__int128___int128(i128 %str) {
 ; CHECK-P10-NEXT:    std 3, 0(4)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_cst_align64__int128___int128:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis 5, 3725
-; CHECK-P9-NEXT:    ori 5, 5, 19025
-; CHECK-P9-NEXT:    rldic 5, 5, 12, 24
-; CHECK-P9-NEXT:    std 3, 0(5)
-; CHECK-P9-NEXT:    li 3, 29
-; CHECK-P9-NEXT:    rldic 3, 3, 35, 24
-; CHECK-P9-NEXT:    oris 3, 3, 54437
-; CHECK-P9-NEXT:    ori 3, 3, 4104
-; CHECK-P9-NEXT:    std 4, 0(3)
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_cst_align64__int128___int128:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li 5, 29
-; CHECK-P8-NEXT:    lis 6, 3725
-; CHECK-P8-NEXT:    rldic 5, 5, 35, 24
-; CHECK-P8-NEXT:    ori 6, 6, 19025
-; CHECK-P8-NEXT:    oris 5, 5, 54437
-; CHECK-P8-NEXT:    rldic 6, 6, 12, 24
-; CHECK-P8-NEXT:    ori 5, 5, 4104
-; CHECK-P8-NEXT:    std 3, 0(6)
-; CHECK-P8-NEXT:    std 4, 0(5)
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_cst_align64__int128___int128:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis 5, 3725
+; CHECK-PREP10-NEXT:    ori 5, 5, 19025
+; CHECK-PREP10-NEXT:    rldic 5, 5, 12, 24
+; CHECK-PREP10-NEXT:    std 3, 0(5)
+; CHECK-PREP10-NEXT:    li 3, 29
+; CHECK-PREP10-NEXT:    rldic 3, 3, 35, 24
+; CHECK-PREP10-NEXT:    oris 3, 3, 54437
+; CHECK-PREP10-NEXT:    ori 3, 3, 4104
+; CHECK-PREP10-NEXT:    std 4, 0(3)
+; CHECK-PREP10-NEXT:    blr
 entry:
   store i128 %str, ptr inttoptr (i64 1000000000000 to ptr), align 4096
   ret void

diff  --git a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
index 9182df9a115e35b..b7f8b8af2472aa3 100644
--- a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
+++ b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
@@ -25,15 +25,15 @@ define <8 x i32> @test_large_vec_vaarg(i32 %n, ...) {
 ; LE-NEXT:    rldicr 3, 3, 0, 59
 ; LE-NEXT:    addi 4, 3, 16
 ; LE-NEXT:    std 4, -8(1)
-; LE-NEXT:    ld 4, -8(1)
 ; LE-NEXT:    lxvd2x 0, 0, 3
-; LE-NEXT:    addi 4, 4, 15
-; LE-NEXT:    rldicr 4, 4, 0, 59
+; LE-NEXT:    ld 3, -8(1)
+; LE-NEXT:    addi 3, 3, 15
+; LE-NEXT:    rldicr 3, 3, 0, 59
+; LE-NEXT:    addi 4, 3, 16
+; LE-NEXT:    std 4, -8(1)
 ; LE-NEXT:    xxswapd 34, 0
-; LE-NEXT:    addi 5, 4, 16
-; LE-NEXT:    std 5, -8(1)
-; LE-NEXT:    lxvd2x 1, 0, 4
-; LE-NEXT:    xxswapd 35, 1
+; LE-NEXT:    lxvd2x 0, 0, 3
+; LE-NEXT:    xxswapd 35, 0
 ; LE-NEXT:    blr
   %args = alloca ptr, align 4
   %x = va_arg ptr %args, <8 x i32>

diff  --git a/llvm/test/CodeGen/PowerPC/licm-remat.ll b/llvm/test/CodeGen/PowerPC/licm-remat.ll
index b1944a7107c1bff..ffdaf5d2481e3b6 100644
--- a/llvm/test/CodeGen/PowerPC/licm-remat.ll
+++ b/llvm/test/CodeGen/PowerPC/licm-remat.ll
@@ -20,8 +20,8 @@ declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture reado
 define linkonce_odr void @ZN6snappyDecompressor_(ptr %this, ptr %writer) {
 ; CHECK-LABEL: ZN6snappyDecompressor_:
 ; CHECK:       # %bb.0: # %entry
-; CHECK:       addis 3, 2, .L__ModuleStringPool at toc@ha
-; CHECK:       addi 25, 3, .L__ModuleStringPool at toc@l
+; CHECK:       addis 4, 2, .L__ModuleStringPool at toc@ha
+; CHECK:       addi 25, 4, .L__ModuleStringPool at toc@l
 ; CHECK:       .LBB0_2: # %for.cond
 ; CHECK-NOT:   addis {{[0-9]+}}, 2, .L__ModuleStringPool at toc@ha
 ; CHECK:       bctrl

diff  --git a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll
index 14f6ba0402139df..7b531087501923a 100644
--- a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll
+++ b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll
@@ -67,35 +67,34 @@
 define signext i32 @test(ptr nocapture %FP) local_unnamed_addr #0 {
 ; CHECKLX-LABEL: test:
 ; CHECKLX:       # %bb.0: # %entry
-; CHECKLX-NEXT:    addis 4, 2, .LC0 at toc@ha
-; CHECKLX-NEXT:    addis 5, 2, .LC1 at toc@ha
 ; CHECKLX-NEXT:    mr 12, 3
-; CHECKLX-NEXT:    ld 4, .LC0 at toc@l(4)
-; CHECKLX-NEXT:    ld 5, .LC1 at toc@l(5)
-; CHECKLX-NEXT:    lwz 6, 0(4)
-; CHECKLX-NEXT:    lwz 7, 0(5)
-; CHECKLX-NEXT:    cmpw 6, 7
-; CHECKLX-NEXT:    lwz 6, 0(4)
-; CHECKLX-NEXT:    bgt 0, .LBB0_2
+; CHECKLX-NEXT:    addis 3, 2, .LC0 at toc@ha
+; CHECKLX-NEXT:    addis 4, 2, .LC1 at toc@ha
+; CHECKLX-NEXT:    ld 3, .LC0 at toc@l(3)
+; CHECKLX-NEXT:    ld 5, .LC1 at toc@l(4)
+; CHECKLX-NEXT:    lwz 6, 0(3)
 ; CHECKLX-NEXT:    .p2align 5
 ; CHECKLX-NEXT:  .LBB0_1: # %if.end
 ; CHECKLX-NEXT:    #
-; CHECKLX-NEXT:    addi 3, 6, 1
-; CHECKLX-NEXT:    stw 3, 0(4)
-; CHECKLX-NEXT:    lwz 3, 0(4)
-; CHECKLX-NEXT:    lwz 6, 0(5)
-; CHECKLX-NEXT:    cmpw 3, 6
-; CHECKLX-NEXT:    lwz 6, 0(4)
-; CHECKLX-NEXT:    ble 0, .LBB0_1
-; CHECKLX-NEXT:  .LBB0_2: # %if.then
+; CHECKLX-NEXT:    lwz 7, 0(5)
+; CHECKLX-NEXT:    lwz 4, 0(3)
+; CHECKLX-NEXT:    cmpw 6, 7
+; CHECKLX-NEXT:    bgt 0, .LBB0_3
+; CHECKLX-NEXT:  # %bb.2: # %if.end
+; CHECKLX-NEXT:    #
+; CHECKLX-NEXT:    addi 4, 4, 1
+; CHECKLX-NEXT:    stw 4, 0(3)
+; CHECKLX-NEXT:    lwz 6, 0(3)
+; CHECKLX-NEXT:    b .LBB0_1
+; CHECKLX-NEXT:  .LBB0_3: # %if.then
 ; CHECKLX-NEXT:    mflr 0
 ; CHECKLX-NEXT:    stdu 1, -32(1)
 ; CHECKLX-NEXT:    std 2, 24(1)
 ; CHECKLX-NEXT:    std 0, 48(1)
 ; CHECKLX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECKLX-NEXT:    .cfi_offset lr, 16
-; CHECKLX-NEXT:    extsw 3, 6
 ; CHECKLX-NEXT:    mtctr 12
+; CHECKLX-NEXT:    extsw 3, 4
 ; CHECKLX-NEXT:    bctrl
 ; CHECKLX-NEXT:    ld 2, 24(1)
 ; CHECKLX-NEXT:    addi 1, 1, 32

diff  --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index 3d316777148240d..56edb8983b39527 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -222,16 +222,16 @@ define dso_local void @test4(ptr nocapture %c, ptr nocapture readonly %a) local_
 ;
 ; P8-AIX32-LABEL: test4:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r5, L..C0(r2) # %const.0
-; P8-AIX32-NEXT:    lwz r6, 28(r4)
-; P8-AIX32-NEXT:    lwz r4, 24(r4)
-; P8-AIX32-NEXT:    stw r6, -16(r1)
-; P8-AIX32-NEXT:    stw r4, -32(r1)
+; P8-AIX32-NEXT:    lwz r5, 24(r4)
+; P8-AIX32-NEXT:    lwz r4, 28(r4)
+; P8-AIX32-NEXT:    stw r4, -16(r1)
+; P8-AIX32-NEXT:    lwz r4, L..C0(r2) # %const.0
+; P8-AIX32-NEXT:    stw r5, -32(r1)
+; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
 ; P8-AIX32-NEXT:    addi r4, r1, -16
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r5
-; P8-AIX32-NEXT:    addi r5, r1, -32
 ; P8-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P8-AIX32-NEXT:    lxvw4x v4, 0, r5
+; P8-AIX32-NEXT:    addi r4, r1, -32
+; P8-AIX32-NEXT:    lxvw4x v4, 0, r4
 ; P8-AIX32-NEXT:    vperm v2, v4, v3, v2
 ; P8-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P8-AIX32-NEXT:    blr
@@ -299,16 +299,16 @@ define void @test5(ptr %a, ptr %in) {
 ;
 ; P8-AIX32-LABEL: test5:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r5, L..C1(r2) # %const.0
 ; P8-AIX32-NEXT:    lwz r4, 0(r4)
+; P8-AIX32-NEXT:    srawi r5, r4, 31
 ; P8-AIX32-NEXT:    stw r4, -16(r1)
-; P8-AIX32-NEXT:    srawi r4, r4, 31
-; P8-AIX32-NEXT:    stw r4, -32(r1)
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r5
+; P8-AIX32-NEXT:    lwz r4, L..C1(r2) # %const.0
+; P8-AIX32-NEXT:    stw r5, -32(r1)
+; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
 ; P8-AIX32-NEXT:    addi r4, r1, -16
-; P8-AIX32-NEXT:    addi r5, r1, -32
 ; P8-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P8-AIX32-NEXT:    lxvw4x v4, 0, r5
+; P8-AIX32-NEXT:    addi r4, r1, -32
+; P8-AIX32-NEXT:    lxvw4x v4, 0, r4
 ; P8-AIX32-NEXT:    vperm v2, v4, v3, v2
 ; P8-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P8-AIX32-NEXT:    blr
@@ -376,16 +376,16 @@ define void @test6(ptr %a, ptr %in) {
 ;
 ; P8-AIX32-LABEL: test6:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r6, L..C2(r2) # %const.0
 ; P8-AIX32-NEXT:    lwz r4, 0(r4)
 ; P8-AIX32-NEXT:    li r5, 0
 ; P8-AIX32-NEXT:    stw r5, -32(r1)
-; P8-AIX32-NEXT:    addi r5, r1, -16
 ; P8-AIX32-NEXT:    stw r4, -16(r1)
+; P8-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
+; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
 ; P8-AIX32-NEXT:    addi r4, r1, -32
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r6
 ; P8-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P8-AIX32-NEXT:    lxvw4x v4, 0, r5
+; P8-AIX32-NEXT:    addi r4, r1, -16
+; P8-AIX32-NEXT:    lxvw4x v4, 0, r4
 ; P8-AIX32-NEXT:    vperm v2, v3, v4, v2
 ; P8-AIX32-NEXT:    stxvw4x v2, 0, r3
 ; P8-AIX32-NEXT:    blr
@@ -823,12 +823,12 @@ define <16 x i8> @unadjusted_lxvdsx(ptr %s, ptr %t) {
 ; P8-AIX32:       # %bb.0: # %entry
 ; P8-AIX32-NEXT:    lwz r4, 4(r3)
 ; P8-AIX32-NEXT:    stw r4, -16(r1)
-; P8-AIX32-NEXT:    addi r4, r1, -32
 ; P8-AIX32-NEXT:    lwz r3, 0(r3)
 ; P8-AIX32-NEXT:    stw r3, -32(r1)
 ; P8-AIX32-NEXT:    addi r3, r1, -16
 ; P8-AIX32-NEXT:    lxvw4x vs0, 0, r3
-; P8-AIX32-NEXT:    lxvw4x vs1, 0, r4
+; P8-AIX32-NEXT:    addi r3, r1, -32
+; P8-AIX32-NEXT:    lxvw4x vs1, 0, r3
 ; P8-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
 ; P8-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P8-AIX32-NEXT:    blr
@@ -1250,11 +1250,11 @@ define <2 x double> @test_v2f64_multiple_use(ptr nocapture readonly %a, ptr noca
 ; P8-NEXT:    lfs f0, 0(r3)
 ; P8-NEXT:    lfd f1, 0(r4)
 ; P8-NEXT:    xsadddp f1, f1, f0
-; P8-NEXT:    xxspltd v2, vs0, 0
 ; P8-NEXT:    stfd f1, 0(r4)
 ; P8-NEXT:    lfd f1, 0(r5)
-; P8-NEXT:    xsadddp f1, f1, f0
-; P8-NEXT:    stfd f1, 0(r5)
+; P8-NEXT:    xxspltd v2, vs0, 0
+; P8-NEXT:    xsadddp f0, f1, f0
+; P8-NEXT:    stfd f0, 0(r5)
 ; P8-NEXT:    blr
 ;
 ; P7-LABEL: test_v2f64_multiple_use:
@@ -1286,11 +1286,11 @@ define <2 x double> @test_v2f64_multiple_use(ptr nocapture readonly %a, ptr noca
 ; P8-AIX32-NEXT:    lfs f0, 0(r3)
 ; P8-AIX32-NEXT:    lfd f1, 0(r4)
 ; P8-AIX32-NEXT:    xsadddp f1, f1, f0
-; P8-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P8-AIX32-NEXT:    stfd f1, 0(r4)
 ; P8-AIX32-NEXT:    lfd f1, 0(r5)
-; P8-AIX32-NEXT:    xsadddp f1, f1, f0
-; P8-AIX32-NEXT:    stfd f1, 0(r5)
+; P8-AIX32-NEXT:    xxmrghd v2, vs0, vs0
+; P8-AIX32-NEXT:    xsadddp f0, f1, f0
+; P8-AIX32-NEXT:    stfd f0, 0(r5)
 ; P8-AIX32-NEXT:    blr
 ;
 ; P7-AIX32-LABEL: test_v2f64_multiple_use:

diff  --git a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
index 8da3a33278f6717..299bd80b51469d3 100644
--- a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
+++ b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
@@ -70,12 +70,12 @@ define <2 x i64> @load_swap01(ptr %vp1, ptr %vp2) {
 define <4 x i32> @load_swap10(ptr %vp1, ptr %vp2) {
 ; CHECK-P8-LABEL: load_swap10:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -86,10 +86,10 @@ define <4 x i32> @load_swap10(ptr %vp1, ptr %vp2) {
 ;
 ; CHECK-P8-BE-LABEL: load_swap10:
 ; CHECK-P8-BE:       # %bb.0:
-; CHECK-P8-BE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
 ; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
-; CHECK-P8-BE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r4
+; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-BE-NEXT:    blr
 ;
@@ -110,12 +110,12 @@ define <4 x i32> @load_swap10(ptr %vp1, ptr %vp2) {
 define <4 x i32> @load_swap11(ptr %vp1, ptr %vp2) {
 ; CHECK-P8-LABEL: load_swap11:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -150,12 +150,12 @@ define <4 x i32> @load_swap11(ptr %vp1, ptr %vp2) {
 define <8 x i16> @load_swap20(ptr %vp1, ptr %vp2){
 ; CHECK-P8-LABEL: load_swap20:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -166,10 +166,10 @@ define <8 x i16> @load_swap20(ptr %vp1, ptr %vp2){
 ;
 ; CHECK-P8-BE-LABEL: load_swap20:
 ; CHECK-P8-BE:       # %bb.0:
-; CHECK-P8-BE-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
 ; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
-; CHECK-P8-BE-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
-; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r4
+; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-BE-NEXT:    blr
 ;
@@ -190,12 +190,12 @@ define <8 x i16> @load_swap20(ptr %vp1, ptr %vp2){
 define <8 x i16> @load_swap21(ptr %vp1, ptr %vp2){
 ; CHECK-P8-LABEL: load_swap21:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -230,12 +230,12 @@ define <8 x i16> @load_swap21(ptr %vp1, ptr %vp2){
 define <16 x i8> @load_swap30(ptr %vp1, ptr %vp2){
 ; CHECK-P8-LABEL: load_swap30:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -246,10 +246,10 @@ define <16 x i8> @load_swap30(ptr %vp1, ptr %vp2){
 ;
 ; CHECK-P8-BE-LABEL: load_swap30:
 ; CHECK-P8-BE:       # %bb.0:
-; CHECK-P8-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
 ; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
-; CHECK-P8-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
-; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r4
+; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
+; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-BE-NEXT:    blr
 ;
@@ -267,12 +267,12 @@ define <16 x i8> @load_swap30(ptr %vp1, ptr %vp2){
 define <16 x i8> @load_swap31(ptr %vp1, ptr %vp2){
 ; CHECK-P8-LABEL: load_swap31:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI7_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -332,12 +332,12 @@ define <2 x double> @load_swap40(ptr %vp1, ptr %vp2) {
 define <4 x float> @load_swap50(ptr %vp1, ptr %vp2) {
 ; CHECK-P8-LABEL: load_swap50:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI9_0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI9_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -348,10 +348,10 @@ define <4 x float> @load_swap50(ptr %vp1, ptr %vp2) {
 ;
 ; CHECK-P8-BE-LABEL: load_swap50:
 ; CHECK-P8-BE:       # %bb.0:
-; CHECK-P8-BE-NEXT:    addis r4, r2, .LCPI9_0 at toc@ha
 ; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
-; CHECK-P8-BE-NEXT:    addi r4, r4, .LCPI9_0 at toc@l
-; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r4
+; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
+; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
+; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-BE-NEXT:    blr
 ;
@@ -372,12 +372,12 @@ define <4 x float> @load_swap50(ptr %vp1, ptr %vp2) {
 define <4 x float> @load_swap51(ptr %vp1, ptr %vp2) {
 ; CHECK-P8-LABEL: load_swap51:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
 ; CHECK-P8-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
index d301f0172866e19..1da40d46aa77300 100644
--- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
+++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
@@ -40,10 +40,10 @@ define signext i32 @zeroEqualityTest01(ptr %x, ptr %y) {
 ; CHECK-NEXT:    cmpld 5, 6
 ; CHECK-NEXT:    bne 0, .LBB1_2
 ; CHECK-NEXT:  # %bb.1: # %loadbb1
-; CHECK-NEXT:    ld 3, 8(3)
+; CHECK-NEXT:    ld 5, 8(3)
 ; CHECK-NEXT:    ld 4, 8(4)
-; CHECK-NEXT:    cmpld 3, 4
 ; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    cmpld 5, 4
 ; CHECK-NEXT:    beqlr 0
 ; CHECK-NEXT:  .LBB1_2: # %res_block
 ; CHECK-NEXT:    li 3, 1
@@ -68,10 +68,10 @@ define signext i32 @zeroEqualityTest03(ptr %x, ptr %y) {
 ; CHECK-NEXT:    cmplw 5, 6
 ; CHECK-NEXT:    bne 0, .LBB2_3
 ; CHECK-NEXT:  # %bb.2: # %loadbb2
-; CHECK-NEXT:    lbz 3, 6(3)
+; CHECK-NEXT:    lbz 5, 6(3)
 ; CHECK-NEXT:    lbz 4, 6(4)
-; CHECK-NEXT:    cmplw 3, 4
 ; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    cmplw 5, 4
 ; CHECK-NEXT:    beqlr 0
 ; CHECK-NEXT:  .LBB2_3: # %res_block
 ; CHECK-NEXT:    li 3, 1
@@ -121,18 +121,18 @@ define signext i32 @equalityFoldTwoConstants() {
 define signext i32 @equalityFoldOneConstant(ptr %X) {
 ; CHECK-LABEL: equalityFoldOneConstant:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    ld 4, 0(3)
 ; CHECK-NEXT:    li 5, 1
+; CHECK-NEXT:    ld 4, 0(3)
 ; CHECK-NEXT:    rldic 5, 5, 32, 31
 ; CHECK-NEXT:    cmpld 4, 5
 ; CHECK-NEXT:    bne 0, .LBB6_2
 ; CHECK-NEXT:  # %bb.1: # %loadbb1
-; CHECK-NEXT:    lis 4, -32768
-; CHECK-NEXT:    ld 3, 8(3)
-; CHECK-NEXT:    ori 4, 4, 1
-; CHECK-NEXT:    rldic 4, 4, 1, 30
-; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    lis 5, -32768
+; CHECK-NEXT:    ld 4, 8(3)
 ; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    ori 5, 5, 1
+; CHECK-NEXT:    rldic 5, 5, 1, 30
+; CHECK-NEXT:    cmpld 4, 5
 ; CHECK-NEXT:    beq 0, .LBB6_3
 ; CHECK-NEXT:  .LBB6_2: # %res_block
 ; CHECK-NEXT:    li 3, 1

diff  --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll
index 9e984ec99a5734c..0634534b9c9df11 100644
--- a/llvm/test/CodeGen/PowerPC/memcmp.ll
+++ b/llvm/test/CodeGen/PowerPC/memcmp.ll
@@ -10,9 +10,9 @@ define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture reado
 ; CHECK-NEXT:    subfe 5, 4, 4
 ; CHECK-NEXT:    subc 4, 3, 4
 ; CHECK-NEXT:    subfe 3, 3, 3
-; CHECK-NEXT:    neg 4, 5
+; CHECK-NEXT:    neg 5, 5
 ; CHECK-NEXT:    neg 3, 3
-; CHECK-NEXT:    sub 3, 4, 3
+; CHECK-NEXT:    sub 3, 5, 3
 ; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    blr
   %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8)
@@ -26,9 +26,9 @@ define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture reado
 ; CHECK-NEXT:    lwbrx 4, 0, 4
 ; CHECK-NEXT:    sub 5, 4, 3
 ; CHECK-NEXT:    sub 3, 3, 4
-; CHECK-NEXT:    rldicl 4, 5, 1, 63
+; CHECK-NEXT:    rldicl 5, 5, 1, 63
 ; CHECK-NEXT:    rldicl 3, 3, 1, 63
-; CHECK-NEXT:    sub 3, 4, 3
+; CHECK-NEXT:    sub 3, 5, 3
 ; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    blr
   %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)

diff  --git a/llvm/test/CodeGen/PowerPC/memset-tail.ll b/llvm/test/CodeGen/PowerPC/memset-tail.ll
index ae14768219a9287..31c136d009ba5dc 100644
--- a/llvm/test/CodeGen/PowerPC/memset-tail.ll
+++ b/llvm/test/CodeGen/PowerPC/memset-tail.ll
@@ -852,10 +852,10 @@ entry:
 define dso_local void @memset2TailV1B2(ptr nocapture noundef writeonly %p) local_unnamed_addr {
 ; P8-BE-LABEL: memset2TailV1B2:
 ; P8-BE:       # %bb.0: # %entry
-; P8-BE-NEXT:    ld 4, L..C7(2) # %const.0
-; P8-BE-NEXT:    lxvw4x 0, 0, 4
 ; P8-BE-NEXT:    li 4, -23131
 ; P8-BE-NEXT:    sth 4, 16(3)
+; P8-BE-NEXT:    ld 4, L..C7(2) # %const.0
+; P8-BE-NEXT:    lxvw4x 0, 0, 4
 ; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    blr
 ;
@@ -877,11 +877,11 @@ define dso_local void @memset2TailV1B2(ptr nocapture noundef writeonly %p) local
 ;
 ; P8-LE-LABEL: memset2TailV1B2:
 ; P8-LE:       # %bb.0: # %entry
+; P8-LE-NEXT:    li 4, -23131
+; P8-LE-NEXT:    sth 4, 16(3)
 ; P8-LE-NEXT:    addis 4, 2, .LCPI16_0 at toc@ha
 ; P8-LE-NEXT:    addi 4, 4, .LCPI16_0 at toc@l
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    li 4, -23131
-; P8-LE-NEXT:    sth 4, 16(3)
 ; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    blr
 ;
@@ -908,10 +908,10 @@ entry:
 define dso_local void @memset2TailV1B1(ptr nocapture noundef writeonly %p) local_unnamed_addr {
 ; P8-BE-LABEL: memset2TailV1B1:
 ; P8-BE:       # %bb.0: # %entry
-; P8-BE-NEXT:    ld 4, L..C8(2) # %const.0
-; P8-BE-NEXT:    lxvw4x 0, 0, 4
 ; P8-BE-NEXT:    li 4, -91
 ; P8-BE-NEXT:    stb 4, 16(3)
+; P8-BE-NEXT:    ld 4, L..C8(2) # %const.0
+; P8-BE-NEXT:    lxvw4x 0, 0, 4
 ; P8-BE-NEXT:    stxvw4x 0, 0, 3
 ; P8-BE-NEXT:    blr
 ;
@@ -933,11 +933,11 @@ define dso_local void @memset2TailV1B1(ptr nocapture noundef writeonly %p) local
 ;
 ; P8-LE-LABEL: memset2TailV1B1:
 ; P8-LE:       # %bb.0: # %entry
+; P8-LE-NEXT:    li 4, -91
+; P8-LE-NEXT:    stb 4, 16(3)
 ; P8-LE-NEXT:    addis 4, 2, .LCPI17_0 at toc@ha
 ; P8-LE-NEXT:    addi 4, 4, .LCPI17_0 at toc@l
 ; P8-LE-NEXT:    lxvd2x 0, 0, 4
-; P8-LE-NEXT:    li 4, -91
-; P8-LE-NEXT:    stb 4, 16(3)
 ; P8-LE-NEXT:    stxvd2x 0, 0, 3
 ; P8-LE-NEXT:    blr
 ;
@@ -1082,10 +1082,10 @@ entry:
 define dso_local void @memsetTailV0B9(ptr nocapture noundef writeonly %p) local_unnamed_addr {
 ; P8-BE-LABEL: memsetTailV0B9:
 ; P8-BE:       # %bb.0: # %entry
+; P8-BE-NEXT:    li 4, 15
+; P8-BE-NEXT:    stb 4, 8(3)
 ; P8-BE-NEXT:    lis 4, 3855
-; P8-BE-NEXT:    li 5, 15
 ; P8-BE-NEXT:    ori 4, 4, 3855
-; P8-BE-NEXT:    stb 5, 8(3)
 ; P8-BE-NEXT:    rldimi 4, 4, 32, 0
 ; P8-BE-NEXT:    std 4, 0(3)
 ; P8-BE-NEXT:    blr
@@ -1111,10 +1111,10 @@ define dso_local void @memsetTailV0B9(ptr nocapture noundef writeonly %p) local_
 ;
 ; P8-LE-LABEL: memsetTailV0B9:
 ; P8-LE:       # %bb.0: # %entry
+; P8-LE-NEXT:    li 4, 15
+; P8-LE-NEXT:    stb 4, 8(3)
 ; P8-LE-NEXT:    lis 4, 3855
-; P8-LE-NEXT:    li 5, 15
 ; P8-LE-NEXT:    ori 4, 4, 3855
-; P8-LE-NEXT:    stb 5, 8(3)
 ; P8-LE-NEXT:    rldimi 4, 4, 32, 0
 ; P8-LE-NEXT:    std 4, 0(3)
 ; P8-LE-NEXT:    blr
@@ -1247,10 +1247,10 @@ entry:
 define dso_local void @memsetTailV0B5(ptr nocapture noundef writeonly %p) local_unnamed_addr {
 ; P8-BE-LABEL: memsetTailV0B5:
 ; P8-BE:       # %bb.0: # %entry
+; P8-BE-NEXT:    li 4, 15
+; P8-BE-NEXT:    stb 4, 4(3)
 ; P8-BE-NEXT:    lis 4, 3855
-; P8-BE-NEXT:    li 5, 15
 ; P8-BE-NEXT:    ori 4, 4, 3855
-; P8-BE-NEXT:    stb 5, 4(3)
 ; P8-BE-NEXT:    stw 4, 0(3)
 ; P8-BE-NEXT:    blr
 ;
@@ -1273,10 +1273,10 @@ define dso_local void @memsetTailV0B5(ptr nocapture noundef writeonly %p) local_
 ;
 ; P8-LE-LABEL: memsetTailV0B5:
 ; P8-LE:       # %bb.0: # %entry
+; P8-LE-NEXT:    li 4, 15
+; P8-LE-NEXT:    stb 4, 4(3)
 ; P8-LE-NEXT:    lis 4, 3855
-; P8-LE-NEXT:    li 5, 15
 ; P8-LE-NEXT:    ori 4, 4, 3855
-; P8-LE-NEXT:    stb 5, 4(3)
 ; P8-LE-NEXT:    stw 4, 0(3)
 ; P8-LE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-large.ll b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-large.ll
index 1d42d27f37f6ed2..b13b01b416e64e0 100644
--- a/llvm/test/CodeGen/PowerPC/mergeable-string-pool-large.ll
+++ b/llvm/test/CodeGen/PowerPC/mergeable-string-pool-large.ll
@@ -257,8 +257,8 @@ define dso_local signext i32 @str1() local_unnamed_addr #0 {
 ; AIX64:       # %bb.0: # %entry
 ; AIX64-NEXT:    mflr r0
 ; AIX64-NEXT:    stdu r1, -112(r1)
-; AIX64-NEXT:    ld r3, L..C0(r2) # @__ModuleStringPool
 ; AIX64-NEXT:    li r4, 0
+; AIX64-NEXT:    ld r3, L..C0(r2) # @__ModuleStringPool
 ; AIX64-NEXT:    std r0, 128(r1)
 ; AIX64-NEXT:    ori r4, r4, 35612
 ; AIX64-NEXT:    add r3, r3, r4
@@ -318,18 +318,18 @@ define dso_local signext i32 @array0() local_unnamed_addr #0 {
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr r0
 ; AIX32-NEXT:    stwu r1, -96(r1)
-; AIX32-NEXT:    lwz r3, L..C0(r2) # @__ModuleStringPool
-; AIX32-NEXT:    lis r4, 0
-; AIX32-NEXT:    stw r0, 104(r1)
-; AIX32-NEXT:    ori r5, r4, 35596
-; AIX32-NEXT:    ori r4, r4, 35584
-; AIX32-NEXT:    lxvw4x vs0, r3, r5
-; AIX32-NEXT:    lxvw4x vs1, r3, r4
-; AIX32-NEXT:    li r4, 12
+; AIX32-NEXT:    lis r6, 0
+; AIX32-NEXT:    lwz r4, L..C0(r2) # @__ModuleStringPool
+; AIX32-NEXT:    li r5, 12
 ; AIX32-NEXT:    addi r3, r1, 64
-; AIX32-NEXT:    rlwimi r4, r3, 0, 30, 27
-; AIX32-NEXT:    stxvw4x vs0, 0, r4
-; AIX32-NEXT:    stxvw4x vs1, 0, r3
+; AIX32-NEXT:    stw r0, 104(r1)
+; AIX32-NEXT:    ori r7, r6, 35596
+; AIX32-NEXT:    rlwimi r5, r3, 0, 30, 27
+; AIX32-NEXT:    lxvw4x vs0, r4, r7
+; AIX32-NEXT:    stxvw4x vs0, 0, r5
+; AIX32-NEXT:    ori r5, r6, 35584
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
+; AIX32-NEXT:    stxvw4x vs0, 0, r3
 ; AIX32-NEXT:    bl .calleeInt[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    addi r1, r1, 96
@@ -341,17 +341,17 @@ define dso_local signext i32 @array0() local_unnamed_addr #0 {
 ; AIX64:       # %bb.0: # %entry
 ; AIX64-NEXT:    mflr r0
 ; AIX64-NEXT:    stdu r1, -144(r1)
-; AIX64-NEXT:    ld r3, L..C0(r2) # @__ModuleStringPool
-; AIX64-NEXT:    li r4, 0
+; AIX64-NEXT:    li r3, 0
+; AIX64-NEXT:    ld r4, L..C0(r2) # @__ModuleStringPool
 ; AIX64-NEXT:    std r0, 160(r1)
-; AIX64-NEXT:    ori r5, r4, 35596
-; AIX64-NEXT:    ori r4, r4, 35584
-; AIX64-NEXT:    lxvw4x vs0, r3, r5
-; AIX64-NEXT:    lxvw4x vs1, r3, r4
-; AIX64-NEXT:    addi r4, r1, 124
+; AIX64-NEXT:    ori r5, r3, 35596
+; AIX64-NEXT:    ori r3, r3, 35584
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
+; AIX64-NEXT:    addi r5, r1, 124
+; AIX64-NEXT:    stxvw4x vs0, 0, r5
+; AIX64-NEXT:    lxvw4x vs0, r4, r3
 ; AIX64-NEXT:    addi r3, r1, 112
-; AIX64-NEXT:    stxvw4x vs0, 0, r4
-; AIX64-NEXT:    stxvw4x vs1, 0, r3
+; AIX64-NEXT:    stxvw4x vs0, 0, r3
 ; AIX64-NEXT:    bl .calleeInt[PR]
 ; AIX64-NEXT:    nop
 ; AIX64-NEXT:    addi r1, r1, 144
@@ -370,11 +370,11 @@ define dso_local signext i32 @array0() local_unnamed_addr #0 {
 ; LINUX64BE-NEXT:    ori r5, r4, 35596
 ; LINUX64BE-NEXT:    ori r4, r4, 35584
 ; LINUX64BE-NEXT:    lxvw4x vs0, r3, r5
-; LINUX64BE-NEXT:    lxvw4x vs1, r3, r4
-; LINUX64BE-NEXT:    addi r4, r1, 124
+; LINUX64BE-NEXT:    addi r5, r1, 124
+; LINUX64BE-NEXT:    stxvw4x vs0, 0, r5
+; LINUX64BE-NEXT:    lxvw4x vs0, r3, r4
 ; LINUX64BE-NEXT:    addi r3, r1, 112
-; LINUX64BE-NEXT:    stxvw4x vs0, 0, r4
-; LINUX64BE-NEXT:    stxvw4x vs1, 0, r3
+; LINUX64BE-NEXT:    stxvw4x vs0, 0, r3
 ; LINUX64BE-NEXT:    bl calleeInt
 ; LINUX64BE-NEXT:    nop
 ; LINUX64BE-NEXT:    addi r1, r1, 144
@@ -393,11 +393,11 @@ define dso_local signext i32 @array0() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    ori r5, r4, 35596
 ; LINUX64LE-NEXT:    ori r4, r4, 35584
 ; LINUX64LE-NEXT:    lxvd2x vs0, r3, r5
-; LINUX64LE-NEXT:    lxvd2x vs1, r3, r4
-; LINUX64LE-NEXT:    addi r4, r1, 44
+; LINUX64LE-NEXT:    addi r5, r1, 44
+; LINUX64LE-NEXT:    stxvd2x vs0, 0, r5
+; LINUX64LE-NEXT:    lxvd2x vs0, r3, r4
 ; LINUX64LE-NEXT:    addi r3, r1, 32
-; LINUX64LE-NEXT:    stxvd2x vs0, 0, r4
-; LINUX64LE-NEXT:    stxvd2x vs1, 0, r3
+; LINUX64LE-NEXT:    stxvd2x vs0, 0, r3
 ; LINUX64LE-NEXT:    bl calleeInt
 ; LINUX64LE-NEXT:    nop
 ; LINUX64LE-NEXT:    addi r1, r1, 64
@@ -420,27 +420,27 @@ define dso_local signext i32 @array1() local_unnamed_addr #0 {
 ; AIX32-NEXT:    stwu r1, -176(r1)
 ; AIX32-NEXT:    lwz r4, L..C0(r2) # @__ModuleStringPool
 ; AIX32-NEXT:    li r5, 96
-; AIX32-NEXT:    li r6, 80
-; AIX32-NEXT:    li r7, 64
-; AIX32-NEXT:    li r8, 48
-; AIX32-NEXT:    li r9, 32
-; AIX32-NEXT:    li r10, 16
 ; AIX32-NEXT:    addi r3, r1, 64
 ; AIX32-NEXT:    stw r0, 184(r1)
 ; AIX32-NEXT:    lxvw4x vs0, r4, r5
-; AIX32-NEXT:    lxvw4x vs1, r4, r6
-; AIX32-NEXT:    lxvw4x vs2, r4, r7
-; AIX32-NEXT:    lxvw4x vs3, r4, r8
-; AIX32-NEXT:    lxvw4x vs4, r4, r9
-; AIX32-NEXT:    lxvw4x vs5, r4, r10
-; AIX32-NEXT:    lxvw4x vs6, 0, r4
 ; AIX32-NEXT:    stxvw4x vs0, r3, r5
-; AIX32-NEXT:    stxvw4x vs1, r3, r6
-; AIX32-NEXT:    stxvw4x vs2, r3, r7
-; AIX32-NEXT:    stxvw4x vs3, r3, r8
-; AIX32-NEXT:    stxvw4x vs4, r3, r9
-; AIX32-NEXT:    stxvw4x vs5, r3, r10
-; AIX32-NEXT:    stxvw4x vs6, 0, r3
+; AIX32-NEXT:    li r5, 80
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
+; AIX32-NEXT:    stxvw4x vs0, r3, r5
+; AIX32-NEXT:    li r5, 64
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
+; AIX32-NEXT:    stxvw4x vs0, r3, r5
+; AIX32-NEXT:    li r5, 48
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
+; AIX32-NEXT:    stxvw4x vs0, r3, r5
+; AIX32-NEXT:    li r5, 32
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
+; AIX32-NEXT:    stxvw4x vs0, r3, r5
+; AIX32-NEXT:    li r5, 16
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
+; AIX32-NEXT:    stxvw4x vs0, r3, r5
+; AIX32-NEXT:    lxvw4x vs0, 0, r4
+; AIX32-NEXT:    stxvw4x vs0, 0, r3
 ; AIX32-NEXT:    bl .calleeInt[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    addi r1, r1, 176
@@ -454,27 +454,27 @@ define dso_local signext i32 @array1() local_unnamed_addr #0 {
 ; AIX64-NEXT:    stdu r1, -224(r1)
 ; AIX64-NEXT:    ld r4, L..C0(r2) # @__ModuleStringPool
 ; AIX64-NEXT:    li r5, 96
-; AIX64-NEXT:    li r6, 80
-; AIX64-NEXT:    li r7, 64
-; AIX64-NEXT:    li r8, 48
-; AIX64-NEXT:    li r9, 32
-; AIX64-NEXT:    li r10, 16
 ; AIX64-NEXT:    addi r3, r1, 112
 ; AIX64-NEXT:    std r0, 240(r1)
 ; AIX64-NEXT:    lxvw4x vs0, r4, r5
-; AIX64-NEXT:    lxvw4x vs1, r4, r6
-; AIX64-NEXT:    lxvw4x vs2, r4, r7
-; AIX64-NEXT:    lxvw4x vs3, r4, r8
-; AIX64-NEXT:    lxvw4x vs4, r4, r9
-; AIX64-NEXT:    lxvw4x vs5, r4, r10
-; AIX64-NEXT:    lxvw4x vs6, 0, r4
 ; AIX64-NEXT:    stxvw4x vs0, r3, r5
-; AIX64-NEXT:    stxvw4x vs1, r3, r6
-; AIX64-NEXT:    stxvw4x vs2, r3, r7
-; AIX64-NEXT:    stxvw4x vs3, r3, r8
-; AIX64-NEXT:    stxvw4x vs4, r3, r9
-; AIX64-NEXT:    stxvw4x vs5, r3, r10
-; AIX64-NEXT:    stxvw4x vs6, 0, r3
+; AIX64-NEXT:    li r5, 80
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
+; AIX64-NEXT:    stxvw4x vs0, r3, r5
+; AIX64-NEXT:    li r5, 64
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
+; AIX64-NEXT:    stxvw4x vs0, r3, r5
+; AIX64-NEXT:    li r5, 48
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
+; AIX64-NEXT:    stxvw4x vs0, r3, r5
+; AIX64-NEXT:    li r5, 32
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
+; AIX64-NEXT:    stxvw4x vs0, r3, r5
+; AIX64-NEXT:    li r5, 16
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
+; AIX64-NEXT:    stxvw4x vs0, r3, r5
+; AIX64-NEXT:    lxvw4x vs0, 0, r4
+; AIX64-NEXT:    stxvw4x vs0, 0, r3
 ; AIX64-NEXT:    bl .calleeInt[PR]
 ; AIX64-NEXT:    nop
 ; AIX64-NEXT:    addi r1, r1, 224
@@ -486,30 +486,30 @@ define dso_local signext i32 @array1() local_unnamed_addr #0 {
 ; LINUX64BE:       # %bb.0: # %entry
 ; LINUX64BE-NEXT:    mflr r0
 ; LINUX64BE-NEXT:    stdu r1, -224(r1)
-; LINUX64BE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
-; LINUX64BE-NEXT:    li r4, 96
-; LINUX64BE-NEXT:    li r6, 80
-; LINUX64BE-NEXT:    li r7, 64
-; LINUX64BE-NEXT:    li r8, 48
-; LINUX64BE-NEXT:    li r9, 32
-; LINUX64BE-NEXT:    li r10, 16
-; LINUX64BE-NEXT:    addi r5, r3, .L__ModuleStringPool at toc@l
+; LINUX64BE-NEXT:    addis r4, r2, .L__ModuleStringPool at toc@ha
+; LINUX64BE-NEXT:    li r5, 96
 ; LINUX64BE-NEXT:    addi r3, r1, 112
 ; LINUX64BE-NEXT:    std r0, 240(r1)
-; LINUX64BE-NEXT:    lxvw4x vs0, r5, r4
-; LINUX64BE-NEXT:    lxvw4x vs1, r5, r6
-; LINUX64BE-NEXT:    lxvw4x vs2, r5, r7
-; LINUX64BE-NEXT:    lxvw4x vs3, r5, r8
-; LINUX64BE-NEXT:    lxvw4x vs4, r5, r9
-; LINUX64BE-NEXT:    lxvw4x vs5, r5, r10
-; LINUX64BE-NEXT:    lxvw4x vs6, 0, r5
-; LINUX64BE-NEXT:    stxvw4x vs0, r3, r4
-; LINUX64BE-NEXT:    stxvw4x vs1, r3, r6
-; LINUX64BE-NEXT:    stxvw4x vs2, r3, r7
-; LINUX64BE-NEXT:    stxvw4x vs3, r3, r8
-; LINUX64BE-NEXT:    stxvw4x vs4, r3, r9
-; LINUX64BE-NEXT:    stxvw4x vs5, r3, r10
-; LINUX64BE-NEXT:    stxvw4x vs6, 0, r3
+; LINUX64BE-NEXT:    addi r4, r4, .L__ModuleStringPool at toc@l
+; LINUX64BE-NEXT:    lxvw4x vs0, r4, r5
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r5
+; LINUX64BE-NEXT:    li r5, 80
+; LINUX64BE-NEXT:    lxvw4x vs0, r4, r5
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r5
+; LINUX64BE-NEXT:    li r5, 64
+; LINUX64BE-NEXT:    lxvw4x vs0, r4, r5
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r5
+; LINUX64BE-NEXT:    li r5, 48
+; LINUX64BE-NEXT:    lxvw4x vs0, r4, r5
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r5
+; LINUX64BE-NEXT:    li r5, 32
+; LINUX64BE-NEXT:    lxvw4x vs0, r4, r5
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r5
+; LINUX64BE-NEXT:    li r5, 16
+; LINUX64BE-NEXT:    lxvw4x vs0, r4, r5
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r5
+; LINUX64BE-NEXT:    lxvw4x vs0, 0, r4
+; LINUX64BE-NEXT:    stxvw4x vs0, 0, r3
 ; LINUX64BE-NEXT:    bl calleeInt
 ; LINUX64BE-NEXT:    nop
 ; LINUX64BE-NEXT:    addi r1, r1, 224
@@ -521,30 +521,30 @@ define dso_local signext i32 @array1() local_unnamed_addr #0 {
 ; LINUX64LE:       # %bb.0: # %entry
 ; LINUX64LE-NEXT:    mflr r0
 ; LINUX64LE-NEXT:    stdu r1, -144(r1)
-; LINUX64LE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
-; LINUX64LE-NEXT:    li r4, 96
-; LINUX64LE-NEXT:    li r6, 80
-; LINUX64LE-NEXT:    li r7, 64
-; LINUX64LE-NEXT:    li r8, 48
-; LINUX64LE-NEXT:    li r9, 32
-; LINUX64LE-NEXT:    li r10, 16
-; LINUX64LE-NEXT:    addi r5, r3, .L__ModuleStringPool at toc@l
+; LINUX64LE-NEXT:    addis r4, r2, .L__ModuleStringPool at toc@ha
+; LINUX64LE-NEXT:    li r5, 96
 ; LINUX64LE-NEXT:    addi r3, r1, 32
 ; LINUX64LE-NEXT:    std r0, 160(r1)
-; LINUX64LE-NEXT:    lxvd2x vs0, r5, r4
-; LINUX64LE-NEXT:    lxvd2x vs1, r5, r6
-; LINUX64LE-NEXT:    lxvd2x vs2, r5, r7
-; LINUX64LE-NEXT:    lxvd2x vs3, r5, r8
-; LINUX64LE-NEXT:    lxvd2x vs4, r5, r9
-; LINUX64LE-NEXT:    lxvd2x vs5, r5, r10
-; LINUX64LE-NEXT:    lxvd2x vs6, 0, r5
-; LINUX64LE-NEXT:    stxvd2x vs0, r3, r4
-; LINUX64LE-NEXT:    stxvd2x vs1, r3, r6
-; LINUX64LE-NEXT:    stxvd2x vs2, r3, r7
-; LINUX64LE-NEXT:    stxvd2x vs3, r3, r8
-; LINUX64LE-NEXT:    stxvd2x vs4, r3, r9
-; LINUX64LE-NEXT:    stxvd2x vs5, r3, r10
-; LINUX64LE-NEXT:    stxvd2x vs6, 0, r3
+; LINUX64LE-NEXT:    addi r4, r4, .L__ModuleStringPool at toc@l
+; LINUX64LE-NEXT:    lxvd2x vs0, r4, r5
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r5
+; LINUX64LE-NEXT:    li r5, 80
+; LINUX64LE-NEXT:    lxvd2x vs0, r4, r5
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r5
+; LINUX64LE-NEXT:    li r5, 64
+; LINUX64LE-NEXT:    lxvd2x vs0, r4, r5
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r5
+; LINUX64LE-NEXT:    li r5, 48
+; LINUX64LE-NEXT:    lxvd2x vs0, r4, r5
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r5
+; LINUX64LE-NEXT:    li r5, 32
+; LINUX64LE-NEXT:    lxvd2x vs0, r4, r5
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r5
+; LINUX64LE-NEXT:    li r5, 16
+; LINUX64LE-NEXT:    lxvd2x vs0, r4, r5
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r5
+; LINUX64LE-NEXT:    lxvd2x vs0, 0, r4
+; LINUX64LE-NEXT:    stxvd2x vs0, 0, r3
 ; LINUX64LE-NEXT:    bl calleeInt
 ; LINUX64LE-NEXT:    nop
 ; LINUX64LE-NEXT:    addi r1, r1, 144
@@ -567,33 +567,33 @@ define dso_local signext i32 @array2() local_unnamed_addr #0 {
 ; AIX32-NEXT:    stwu r1, -176(r1)
 ; AIX32-NEXT:    lwz r4, L..C0(r2) # @__ModuleStringPool
 ; AIX32-NEXT:    li r3, 208
-; AIX32-NEXT:    li r5, 192
-; AIX32-NEXT:    li r6, 176
-; AIX32-NEXT:    li r7, 160
-; AIX32-NEXT:    li r8, 144
+; AIX32-NEXT:    li r5, 96
 ; AIX32-NEXT:    stw r0, 184(r1)
 ; AIX32-NEXT:    lxvw4x vs0, r4, r3
-; AIX32-NEXT:    lxvw4x vs1, r4, r5
-; AIX32-NEXT:    li r5, 96
 ; AIX32-NEXT:    addi r3, r1, 64
-; AIX32-NEXT:    lxvw4x vs2, r4, r6
-; AIX32-NEXT:    lxvw4x vs3, r4, r7
-; AIX32-NEXT:    li r6, 128
-; AIX32-NEXT:    li r7, 112
-; AIX32-NEXT:    lxvw4x vs4, r4, r8
-; AIX32-NEXT:    lxvw4x vs5, r4, r6
 ; AIX32-NEXT:    stxvw4x vs0, r3, r5
-; AIX32-NEXT:    lxvw4x vs0, r4, r7
-; AIX32-NEXT:    li r4, 80
+; AIX32-NEXT:    li r5, 192
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
+; AIX32-NEXT:    li r5, 80
+; AIX32-NEXT:    stxvw4x vs0, r3, r5
+; AIX32-NEXT:    li r5, 176
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
 ; AIX32-NEXT:    li r5, 64
-; AIX32-NEXT:    stxvw4x vs1, r3, r4
-; AIX32-NEXT:    li r4, 48
-; AIX32-NEXT:    stxvw4x vs2, r3, r5
+; AIX32-NEXT:    stxvw4x vs0, r3, r5
+; AIX32-NEXT:    li r5, 160
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
+; AIX32-NEXT:    li r5, 48
+; AIX32-NEXT:    stxvw4x vs0, r3, r5
+; AIX32-NEXT:    li r5, 144
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
 ; AIX32-NEXT:    li r5, 32
-; AIX32-NEXT:    stxvw4x vs3, r3, r4
-; AIX32-NEXT:    li r4, 16
-; AIX32-NEXT:    stxvw4x vs4, r3, r5
-; AIX32-NEXT:    stxvw4x vs5, r3, r4
+; AIX32-NEXT:    stxvw4x vs0, r3, r5
+; AIX32-NEXT:    li r5, 128
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
+; AIX32-NEXT:    li r5, 16
+; AIX32-NEXT:    stxvw4x vs0, r3, r5
+; AIX32-NEXT:    li r5, 112
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
 ; AIX32-NEXT:    stxvw4x vs0, 0, r3
 ; AIX32-NEXT:    bl .calleeInt[PR]
 ; AIX32-NEXT:    nop
@@ -608,33 +608,33 @@ define dso_local signext i32 @array2() local_unnamed_addr #0 {
 ; AIX64-NEXT:    stdu r1, -224(r1)
 ; AIX64-NEXT:    ld r4, L..C0(r2) # @__ModuleStringPool
 ; AIX64-NEXT:    li r3, 208
-; AIX64-NEXT:    li r5, 192
-; AIX64-NEXT:    li r6, 176
-; AIX64-NEXT:    li r7, 160
-; AIX64-NEXT:    li r8, 144
+; AIX64-NEXT:    li r5, 96
 ; AIX64-NEXT:    std r0, 240(r1)
 ; AIX64-NEXT:    lxvw4x vs0, r4, r3
-; AIX64-NEXT:    lxvw4x vs1, r4, r5
-; AIX64-NEXT:    li r5, 96
 ; AIX64-NEXT:    addi r3, r1, 112
-; AIX64-NEXT:    lxvw4x vs2, r4, r6
-; AIX64-NEXT:    lxvw4x vs3, r4, r7
-; AIX64-NEXT:    li r6, 128
-; AIX64-NEXT:    li r7, 112
-; AIX64-NEXT:    lxvw4x vs4, r4, r8
-; AIX64-NEXT:    lxvw4x vs5, r4, r6
 ; AIX64-NEXT:    stxvw4x vs0, r3, r5
-; AIX64-NEXT:    lxvw4x vs0, r4, r7
-; AIX64-NEXT:    li r4, 80
+; AIX64-NEXT:    li r5, 192
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
+; AIX64-NEXT:    li r5, 80
+; AIX64-NEXT:    stxvw4x vs0, r3, r5
+; AIX64-NEXT:    li r5, 176
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
 ; AIX64-NEXT:    li r5, 64
-; AIX64-NEXT:    stxvw4x vs1, r3, r4
-; AIX64-NEXT:    li r4, 48
-; AIX64-NEXT:    stxvw4x vs2, r3, r5
+; AIX64-NEXT:    stxvw4x vs0, r3, r5
+; AIX64-NEXT:    li r5, 160
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
+; AIX64-NEXT:    li r5, 48
+; AIX64-NEXT:    stxvw4x vs0, r3, r5
+; AIX64-NEXT:    li r5, 144
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
 ; AIX64-NEXT:    li r5, 32
-; AIX64-NEXT:    stxvw4x vs3, r3, r4
-; AIX64-NEXT:    li r4, 16
-; AIX64-NEXT:    stxvw4x vs4, r3, r5
-; AIX64-NEXT:    stxvw4x vs5, r3, r4
+; AIX64-NEXT:    stxvw4x vs0, r3, r5
+; AIX64-NEXT:    li r5, 128
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
+; AIX64-NEXT:    li r5, 16
+; AIX64-NEXT:    stxvw4x vs0, r3, r5
+; AIX64-NEXT:    li r5, 112
+; AIX64-NEXT:    lxvw4x vs0, r4, r5
 ; AIX64-NEXT:    stxvw4x vs0, 0, r3
 ; AIX64-NEXT:    bl .calleeInt[PR]
 ; AIX64-NEXT:    nop
@@ -649,34 +649,34 @@ define dso_local signext i32 @array2() local_unnamed_addr #0 {
 ; LINUX64BE-NEXT:    stdu r1, -224(r1)
 ; LINUX64BE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
 ; LINUX64BE-NEXT:    li r4, 208
-; LINUX64BE-NEXT:    li r6, 176
-; LINUX64BE-NEXT:    li r7, 144
+; LINUX64BE-NEXT:    li r5, 96
 ; LINUX64BE-NEXT:    std r0, 240(r1)
-; LINUX64BE-NEXT:    addi r5, r3, .L__ModuleStringPool at toc@l
-; LINUX64BE-NEXT:    li r3, 192
-; LINUX64BE-NEXT:    lxvw4x vs0, r5, r4
-; LINUX64BE-NEXT:    li r4, 160
-; LINUX64BE-NEXT:    lxvw4x vs1, r5, r3
+; LINUX64BE-NEXT:    addi r6, r3, .L__ModuleStringPool at toc@l
 ; LINUX64BE-NEXT:    addi r3, r1, 112
-; LINUX64BE-NEXT:    lxvw4x vs2, r5, r6
-; LINUX64BE-NEXT:    li r6, 96
-; LINUX64BE-NEXT:    lxvw4x vs3, r5, r4
-; LINUX64BE-NEXT:    li r4, 128
-; LINUX64BE-NEXT:    lxvw4x vs4, r5, r7
-; LINUX64BE-NEXT:    li r7, 112
-; LINUX64BE-NEXT:    lxvw4x vs5, r5, r4
+; LINUX64BE-NEXT:    lxvw4x vs0, r6, r4
+; LINUX64BE-NEXT:    li r4, 192
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r5
+; LINUX64BE-NEXT:    lxvw4x vs0, r6, r4
 ; LINUX64BE-NEXT:    li r4, 80
-; LINUX64BE-NEXT:    stxvw4x vs0, r3, r6
-; LINUX64BE-NEXT:    lxvw4x vs0, r5, r7
-; LINUX64BE-NEXT:    li r5, 64
-; LINUX64BE-NEXT:    stxvw4x vs1, r3, r4
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r4
+; LINUX64BE-NEXT:    li r4, 176
+; LINUX64BE-NEXT:    lxvw4x vs0, r6, r4
+; LINUX64BE-NEXT:    li r4, 64
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r4
+; LINUX64BE-NEXT:    li r4, 160
+; LINUX64BE-NEXT:    lxvw4x vs0, r6, r4
 ; LINUX64BE-NEXT:    li r4, 48
-; LINUX64BE-NEXT:    stxvw4x vs2, r3, r5
-; LINUX64BE-NEXT:    li r5, 32
-; LINUX64BE-NEXT:    stxvw4x vs3, r3, r4
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r4
+; LINUX64BE-NEXT:    li r4, 144
+; LINUX64BE-NEXT:    lxvw4x vs0, r6, r4
+; LINUX64BE-NEXT:    li r4, 32
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r4
+; LINUX64BE-NEXT:    li r4, 128
+; LINUX64BE-NEXT:    lxvw4x vs0, r6, r4
 ; LINUX64BE-NEXT:    li r4, 16
-; LINUX64BE-NEXT:    stxvw4x vs4, r3, r5
-; LINUX64BE-NEXT:    stxvw4x vs5, r3, r4
+; LINUX64BE-NEXT:    stxvw4x vs0, r3, r4
+; LINUX64BE-NEXT:    li r4, 112
+; LINUX64BE-NEXT:    lxvw4x vs0, r6, r4
 ; LINUX64BE-NEXT:    stxvw4x vs0, 0, r3
 ; LINUX64BE-NEXT:    bl calleeInt
 ; LINUX64BE-NEXT:    nop
@@ -691,34 +691,34 @@ define dso_local signext i32 @array2() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    stdu r1, -144(r1)
 ; LINUX64LE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
 ; LINUX64LE-NEXT:    li r4, 208
-; LINUX64LE-NEXT:    li r6, 176
-; LINUX64LE-NEXT:    li r7, 144
+; LINUX64LE-NEXT:    li r5, 96
 ; LINUX64LE-NEXT:    std r0, 160(r1)
-; LINUX64LE-NEXT:    addi r5, r3, .L__ModuleStringPool at toc@l
-; LINUX64LE-NEXT:    li r3, 192
-; LINUX64LE-NEXT:    lxvd2x vs0, r5, r4
-; LINUX64LE-NEXT:    li r4, 160
-; LINUX64LE-NEXT:    lxvd2x vs1, r5, r3
+; LINUX64LE-NEXT:    addi r6, r3, .L__ModuleStringPool at toc@l
 ; LINUX64LE-NEXT:    addi r3, r1, 32
-; LINUX64LE-NEXT:    lxvd2x vs2, r5, r6
-; LINUX64LE-NEXT:    li r6, 96
-; LINUX64LE-NEXT:    lxvd2x vs3, r5, r4
-; LINUX64LE-NEXT:    li r4, 128
-; LINUX64LE-NEXT:    lxvd2x vs4, r5, r7
-; LINUX64LE-NEXT:    li r7, 112
-; LINUX64LE-NEXT:    lxvd2x vs5, r5, r4
+; LINUX64LE-NEXT:    lxvd2x vs0, r6, r4
+; LINUX64LE-NEXT:    li r4, 192
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r5
+; LINUX64LE-NEXT:    lxvd2x vs0, r6, r4
 ; LINUX64LE-NEXT:    li r4, 80
-; LINUX64LE-NEXT:    stxvd2x vs0, r3, r6
-; LINUX64LE-NEXT:    lxvd2x vs0, r5, r7
-; LINUX64LE-NEXT:    li r5, 64
-; LINUX64LE-NEXT:    stxvd2x vs1, r3, r4
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r4
+; LINUX64LE-NEXT:    li r4, 176
+; LINUX64LE-NEXT:    lxvd2x vs0, r6, r4
+; LINUX64LE-NEXT:    li r4, 64
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r4
+; LINUX64LE-NEXT:    li r4, 160
+; LINUX64LE-NEXT:    lxvd2x vs0, r6, r4
 ; LINUX64LE-NEXT:    li r4, 48
-; LINUX64LE-NEXT:    stxvd2x vs2, r3, r5
-; LINUX64LE-NEXT:    li r5, 32
-; LINUX64LE-NEXT:    stxvd2x vs3, r3, r4
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r4
+; LINUX64LE-NEXT:    li r4, 144
+; LINUX64LE-NEXT:    lxvd2x vs0, r6, r4
+; LINUX64LE-NEXT:    li r4, 32
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r4
+; LINUX64LE-NEXT:    li r4, 128
+; LINUX64LE-NEXT:    lxvd2x vs0, r6, r4
 ; LINUX64LE-NEXT:    li r4, 16
-; LINUX64LE-NEXT:    stxvd2x vs4, r3, r5
-; LINUX64LE-NEXT:    stxvd2x vs5, r3, r4
+; LINUX64LE-NEXT:    stxvd2x vs0, r3, r4
+; LINUX64LE-NEXT:    li r4, 112
+; LINUX64LE-NEXT:    lxvd2x vs0, r6, r4
 ; LINUX64LE-NEXT:    stxvd2x vs0, 0, r3
 ; LINUX64LE-NEXT:    bl calleeInt
 ; LINUX64LE-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll b/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
index 441ec41e0d054d8..a6c5057dde57d3f 100644
--- a/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
+++ b/llvm/test/CodeGen/PowerPC/mergeable-string-pool.ll
@@ -8,7 +8,7 @@
 ; RUN:   -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefixes=LINUX64LE,LINUXDATA
 
 
-;; This @GLOBALSTRING is a user of @.str which causes @.str to not get pooled. 
+;; This @GLOBALSTRING is a user of @.str which causes @.str to not get pooled.
 @.str = private unnamed_addr constant [47 x i8] c"This is the global string that is at the top.\0A\00", align 1
 @GLOBALSTRING = dso_local local_unnamed_addr global ptr @.str, align 8
 
@@ -169,18 +169,19 @@ define dso_local signext i32 @str3() local_unnamed_addr #0 {
 ; AIX32-NEXT:    mflr r0
 ; AIX32-NEXT:    stwu r1, -64(r1)
 ; AIX32-NEXT:    stw r0, 72(r1)
+; AIX32-NEXT:    stw r30, 56(r1) # 4-byte Folded Spill
+; AIX32-NEXT:    lwz r30, L..C0(r2) # @__ModuleStringPool
+; AIX32-NEXT:    addi r3, r30, 434
 ; AIX32-NEXT:    stw r31, 60(r1) # 4-byte Folded Spill
-; AIX32-NEXT:    lwz r31, L..C0(r2) # @__ModuleStringPool
-; AIX32-NEXT:    addi r3, r31, 434
 ; AIX32-NEXT:    bl .callee[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    addi r4, r31, 388
 ; AIX32-NEXT:    mr r31, r3
-; AIX32-NEXT:    mr r3, r4
+; AIX32-NEXT:    addi r3, r30, 388
 ; AIX32-NEXT:    bl .callee[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    add r3, r3, r31
 ; AIX32-NEXT:    lwz r31, 60(r1) # 4-byte Folded Reload
+; AIX32-NEXT:    lwz r30, 56(r1) # 4-byte Folded Reload
 ; AIX32-NEXT:    addi r1, r1, 64
 ; AIX32-NEXT:    lwz r0, 8(r1)
 ; AIX32-NEXT:    mtlr r0
@@ -193,8 +194,8 @@ define dso_local signext i32 @str3() local_unnamed_addr #0 {
 ; AIX64-NEXT:    std r0, 144(r1)
 ; AIX64-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
 ; AIX64-NEXT:    ld r30, L..C0(r2) # @__ModuleStringPool
-; AIX64-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
 ; AIX64-NEXT:    addi r3, r30, 434
+; AIX64-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
 ; AIX64-NEXT:    bl .callee[PR]
 ; AIX64-NEXT:    nop
 ; AIX64-NEXT:    mr r31, r3
@@ -217,8 +218,8 @@ define dso_local signext i32 @str3() local_unnamed_addr #0 {
 ; LINUX64BE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
 ; LINUX64BE-NEXT:    std r0, 160(r1)
 ; LINUX64BE-NEXT:    std r29, 120(r1) # 8-byte Folded Spill
-; LINUX64BE-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; LINUX64BE-NEXT:    addi r29, r3, .L__ModuleStringPool at toc@l
+; LINUX64BE-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; LINUX64BE-NEXT:    addi r3, r29, 434
 ; LINUX64BE-NEXT:    bl callee
 ; LINUX64BE-NEXT:    nop
@@ -397,17 +398,17 @@ define dso_local signext i32 @array1() local_unnamed_addr #0 {
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr r0
 ; AIX32-NEXT:    stwu r1, -96(r1)
-; AIX32-NEXT:    lwz r3, L..C0(r2) # @__ModuleStringPool
-; AIX32-NEXT:    li r4, 372
-; AIX32-NEXT:    li r5, 360
-; AIX32-NEXT:    stw r0, 104(r1)
-; AIX32-NEXT:    lxvw4x vs0, r3, r4
-; AIX32-NEXT:    lxvw4x vs1, r3, r5
-; AIX32-NEXT:    li r4, 12
+; AIX32-NEXT:    lwz r4, L..C0(r2) # @__ModuleStringPool
+; AIX32-NEXT:    li r6, 372
+; AIX32-NEXT:    li r5, 12
 ; AIX32-NEXT:    addi r3, r1, 64
-; AIX32-NEXT:    rlwimi r4, r3, 0, 30, 27
-; AIX32-NEXT:    stxvw4x vs0, 0, r4
-; AIX32-NEXT:    stxvw4x vs1, 0, r3
+; AIX32-NEXT:    stw r0, 104(r1)
+; AIX32-NEXT:    rlwimi r5, r3, 0, 30, 27
+; AIX32-NEXT:    lxvw4x vs0, r4, r6
+; AIX32-NEXT:    stxvw4x vs0, 0, r5
+; AIX32-NEXT:    li r5, 360
+; AIX32-NEXT:    lxvw4x vs0, r4, r5
+; AIX32-NEXT:    stxvw4x vs0, 0, r3
 ; AIX32-NEXT:    bl .calleeInt[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    addi r1, r1, 96
@@ -421,14 +422,14 @@ define dso_local signext i32 @array1() local_unnamed_addr #0 {
 ; AIX64-NEXT:    stdu r1, -144(r1)
 ; AIX64-NEXT:    ld r3, L..C0(r2) # @__ModuleStringPool
 ; AIX64-NEXT:    li r4, 372
-; AIX64-NEXT:    li r5, 360
 ; AIX64-NEXT:    std r0, 160(r1)
 ; AIX64-NEXT:    lxvw4x vs0, r3, r4
-; AIX64-NEXT:    lxvw4x vs1, r3, r5
 ; AIX64-NEXT:    addi r4, r1, 124
-; AIX64-NEXT:    addi r3, r1, 112
 ; AIX64-NEXT:    stxvw4x vs0, 0, r4
-; AIX64-NEXT:    stxvw4x vs1, 0, r3
+; AIX64-NEXT:    li r4, 360
+; AIX64-NEXT:    lxvw4x vs0, r3, r4
+; AIX64-NEXT:    addi r3, r1, 112
+; AIX64-NEXT:    stxvw4x vs0, 0, r3
 ; AIX64-NEXT:    bl .calleeInt[PR]
 ; AIX64-NEXT:    nop
 ; AIX64-NEXT:    addi r1, r1, 144
@@ -442,15 +443,15 @@ define dso_local signext i32 @array1() local_unnamed_addr #0 {
 ; LINUX64BE-NEXT:    stdu r1, -144(r1)
 ; LINUX64BE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
 ; LINUX64BE-NEXT:    li r4, 372
-; LINUX64BE-NEXT:    li r5, 360
 ; LINUX64BE-NEXT:    std r0, 160(r1)
 ; LINUX64BE-NEXT:    addi r3, r3, .L__ModuleStringPool at toc@l
 ; LINUX64BE-NEXT:    lxvw4x vs0, r3, r4
-; LINUX64BE-NEXT:    lxvw4x vs1, r3, r5
 ; LINUX64BE-NEXT:    addi r4, r1, 124
-; LINUX64BE-NEXT:    addi r3, r1, 112
 ; LINUX64BE-NEXT:    stxvw4x vs0, 0, r4
-; LINUX64BE-NEXT:    stxvw4x vs1, 0, r3
+; LINUX64BE-NEXT:    li r4, 360
+; LINUX64BE-NEXT:    lxvw4x vs0, r3, r4
+; LINUX64BE-NEXT:    addi r3, r1, 112
+; LINUX64BE-NEXT:    stxvw4x vs0, 0, r3
 ; LINUX64BE-NEXT:    bl calleeInt
 ; LINUX64BE-NEXT:    nop
 ; LINUX64BE-NEXT:    addi r1, r1, 144
@@ -464,15 +465,15 @@ define dso_local signext i32 @array1() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    stdu r1, -64(r1)
 ; LINUX64LE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
 ; LINUX64LE-NEXT:    li r4, 372
-; LINUX64LE-NEXT:    li r5, 360
 ; LINUX64LE-NEXT:    std r0, 80(r1)
 ; LINUX64LE-NEXT:    addi r3, r3, .L__ModuleStringPool at toc@l
 ; LINUX64LE-NEXT:    lxvd2x vs0, r3, r4
-; LINUX64LE-NEXT:    lxvd2x vs1, r3, r5
 ; LINUX64LE-NEXT:    addi r4, r1, 44
-; LINUX64LE-NEXT:    addi r3, r1, 32
 ; LINUX64LE-NEXT:    stxvd2x vs0, 0, r4
-; LINUX64LE-NEXT:    stxvd2x vs1, 0, r3
+; LINUX64LE-NEXT:    li r4, 360
+; LINUX64LE-NEXT:    lxvd2x vs0, r3, r4
+; LINUX64LE-NEXT:    addi r3, r1, 32
+; LINUX64LE-NEXT:    stxvd2x vs0, 0, r3
 ; LINUX64LE-NEXT:    bl calleeInt
 ; LINUX64LE-NEXT:    nop
 ; LINUX64LE-NEXT:    addi r1, r1, 64
@@ -532,21 +533,20 @@ define dso_local signext i32 @str6() local_unnamed_addr #0 {
 ; AIX32-NEXT:    mflr r0
 ; AIX32-NEXT:    stwu r1, -80(r1)
 ; AIX32-NEXT:    li r3, 17152
-; AIX32-NEXT:    lis r4, 16963
 ; AIX32-NEXT:    stw r0, 88(r1)
-; AIX32-NEXT:    lis r5, 16706
-; AIX32-NEXT:    sth r3, 72(r1)
-; AIX32-NEXT:    ori r3, r4, 16706
-; AIX32-NEXT:    ori r4, r5, 17217
 ; AIX32-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
+; AIX32-NEXT:    sth r3, 72(r1)
+; AIX32-NEXT:    lis r3, 16963
+; AIX32-NEXT:    ori r3, r3, 16706
 ; AIX32-NEXT:    stw r3, 68(r1)
+; AIX32-NEXT:    lis r3, 16706
+; AIX32-NEXT:    ori r3, r3, 17217
+; AIX32-NEXT:    stw r3, 64(r1)
 ; AIX32-NEXT:    addi r3, r1, 64
-; AIX32-NEXT:    stw r4, 64(r1)
 ; AIX32-NEXT:    bl .callee[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    addi r4, r1, 69
 ; AIX32-NEXT:    mr r31, r3
-; AIX32-NEXT:    mr r3, r4
+; AIX32-NEXT:    addi r3, r1, 69
 ; AIX32-NEXT:    bl .callee[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    add r3, r3, r31
@@ -560,14 +560,14 @@ define dso_local signext i32 @str6() local_unnamed_addr #0 {
 ; AIX64:       # %bb.0: # %entry
 ; AIX64-NEXT:    mflr r0
 ; AIX64-NEXT:    stdu r1, -144(r1)
-; AIX64-NEXT:    lis r3, 16706
+; AIX64-NEXT:    li r3, 17152
 ; AIX64-NEXT:    std r0, 160(r1)
-; AIX64-NEXT:    li r4, 17152
 ; AIX64-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
+; AIX64-NEXT:    sth r3, 128(r1)
+; AIX64-NEXT:    lis r3, 16706
 ; AIX64-NEXT:    ori r3, r3, 17217
 ; AIX64-NEXT:    rldic r3, r3, 32, 1
 ; AIX64-NEXT:    oris r3, r3, 16963
-; AIX64-NEXT:    sth r4, 128(r1)
 ; AIX64-NEXT:    ori r3, r3, 16706
 ; AIX64-NEXT:    std r3, 120(r1)
 ; AIX64-NEXT:    addi r3, r1, 120
@@ -589,14 +589,14 @@ define dso_local signext i32 @str6() local_unnamed_addr #0 {
 ; LINUX64BE:       # %bb.0: # %entry
 ; LINUX64BE-NEXT:    mflr r0
 ; LINUX64BE-NEXT:    stdu r1, -144(r1)
-; LINUX64BE-NEXT:    lis r3, 16706
+; LINUX64BE-NEXT:    li r3, 17152
 ; LINUX64BE-NEXT:    std r0, 160(r1)
-; LINUX64BE-NEXT:    li r4, 17152
 ; LINUX64BE-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
+; LINUX64BE-NEXT:    sth r3, 120(r1)
+; LINUX64BE-NEXT:    lis r3, 16706
 ; LINUX64BE-NEXT:    ori r3, r3, 17217
 ; LINUX64BE-NEXT:    rldic r3, r3, 32, 1
 ; LINUX64BE-NEXT:    oris r3, r3, 16963
-; LINUX64BE-NEXT:    sth r4, 120(r1)
 ; LINUX64BE-NEXT:    ori r3, r3, 16706
 ; LINUX64BE-NEXT:    std r3, 112(r1)
 ; LINUX64BE-NEXT:    addi r3, r1, 112
@@ -619,11 +619,11 @@ define dso_local signext i32 @str6() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    mflr r0
 ; LINUX64LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; LINUX64LE-NEXT:    stdu r1, -64(r1)
-; LINUX64LE-NEXT:    lis r3, 8480
+; LINUX64LE-NEXT:    li r3, 67
 ; LINUX64LE-NEXT:    std r0, 80(r1)
-; LINUX64LE-NEXT:    li r4, 67
+; LINUX64LE-NEXT:    sth r3, 40(r1)
+; LINUX64LE-NEXT:    lis r3, 8480
 ; LINUX64LE-NEXT:    ori r3, r3, 41377
-; LINUX64LE-NEXT:    sth r4, 40(r1)
 ; LINUX64LE-NEXT:    rldic r3, r3, 33, 1
 ; LINUX64LE-NEXT:    oris r3, r3, 16707
 ; LINUX64LE-NEXT:    ori r3, r3, 16961
@@ -665,10 +665,9 @@ define dso_local signext i32 @str7() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lwz r3, 0(r3)
 ; AIX32-NEXT:    bl .callee[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    lwz r4, L..C0(r2) # @__ModuleStringPool
 ; AIX32-NEXT:    mr r31, r3
-; AIX32-NEXT:    addi r4, r4, 458
-; AIX32-NEXT:    mr r3, r4
+; AIX32-NEXT:    lwz r3, L..C0(r2) # @__ModuleStringPool
+; AIX32-NEXT:    addi r3, r3, 458
 ; AIX32-NEXT:    bl .callee[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    add r3, r3, r31
@@ -707,8 +706,8 @@ define dso_local signext i32 @str7() local_unnamed_addr #0 {
 ; LINUX64BE-NEXT:    stdu r1, -128(r1)
 ; LINUX64BE-NEXT:    std r0, 144(r1)
 ; LINUX64BE-NEXT:    addis r3, r2, GLOBALSTRING at toc@ha
-; LINUX64BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
 ; LINUX64BE-NEXT:    ld r3, GLOBALSTRING at toc@l(r3)
+; LINUX64BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
 ; LINUX64BE-NEXT:    bl callee
 ; LINUX64BE-NEXT:    nop
 ; LINUX64BE-NEXT:    mr r30, r3
@@ -766,10 +765,9 @@ define dso_local signext i32 @mixed1() local_unnamed_addr #0 {
 ; AIX32-NEXT:    stw r31, 60(r1) # 4-byte Folded Spill
 ; AIX32-NEXT:    bl .calleeInt[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    lwz r4, L..C0(r2) # @__ModuleStringPool
 ; AIX32-NEXT:    mr r31, r3
-; AIX32-NEXT:    addi r4, r4, 400
-; AIX32-NEXT:    mr r3, r4
+; AIX32-NEXT:    lwz r3, L..C0(r2) # @__ModuleStringPool
+; AIX32-NEXT:    addi r3, r3, 400
 ; AIX32-NEXT:    bl .callee[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    add r3, r3, r31
@@ -863,33 +861,28 @@ define dso_local signext i32 @mixed2() local_unnamed_addr #0 {
 ; AIX32-NEXT:    stw r0, 120(r1)
 ; AIX32-NEXT:    stw r30, 104(r1) # 4-byte Folded Spill
 ; AIX32-NEXT:    lwz r30, L..C0(r2) # @__ModuleStringPool
-; AIX32-NEXT:    li r3, 372
-; AIX32-NEXT:    li r4, 360
-; AIX32-NEXT:    stw r31, 108(r1) # 4-byte Folded Spill
-; AIX32-NEXT:    lxvw4x vs0, r30, r3
-; AIX32-NEXT:    lxvw4x vs1, r30, r4
+; AIX32-NEXT:    li r5, 372
 ; AIX32-NEXT:    li r4, 12
 ; AIX32-NEXT:    addi r3, r1, 64
+; AIX32-NEXT:    stw r31, 108(r1) # 4-byte Folded Spill
 ; AIX32-NEXT:    rlwimi r4, r3, 0, 30, 27
+; AIX32-NEXT:    lxvw4x vs0, r30, r5
 ; AIX32-NEXT:    stxvw4x vs0, 0, r4
-; AIX32-NEXT:    stxvw4x vs1, 0, r3
+; AIX32-NEXT:    li r4, 360
+; AIX32-NEXT:    lxvw4x vs0, r30, r4
+; AIX32-NEXT:    stxvw4x vs0, 0, r3
 ; AIX32-NEXT:    bl .calleeInt[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    lwz r4, L..C3(r2) # @IntArray2
 ; AIX32-NEXT:    mr r31, r3
-; AIX32-NEXT:    mr r3, r4
+; AIX32-NEXT:    lwz r3, L..C3(r2) # @IntArray2
 ; AIX32-NEXT:    bl .calleeInt[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    addi r4, r30, 400
-; AIX32-NEXT:    mr r5, r3
-; AIX32-NEXT:    mr r3, r4
-; AIX32-NEXT:    add r31, r5, r31
+; AIX32-NEXT:    add r31, r3, r31
+; AIX32-NEXT:    addi r3, r30, 400
 ; AIX32-NEXT:    bl .callee[PR]
 ; AIX32-NEXT:    nop
-; AIX32-NEXT:    addi r4, r30, 473
-; AIX32-NEXT:    mr r5, r3
-; AIX32-NEXT:    mr r3, r4
-; AIX32-NEXT:    add r31, r31, r5
+; AIX32-NEXT:    add r31, r31, r3
+; AIX32-NEXT:    addi r3, r30, 473
 ; AIX32-NEXT:    bl .callee[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    add r3, r31, r3
@@ -908,28 +901,26 @@ define dso_local signext i32 @mixed2() local_unnamed_addr #0 {
 ; AIX64-NEXT:    std r30, 144(r1) # 8-byte Folded Spill
 ; AIX64-NEXT:    ld r30, L..C0(r2) # @__ModuleStringPool
 ; AIX64-NEXT:    li r3, 372
-; AIX64-NEXT:    li r4, 360
 ; AIX64-NEXT:    std r31, 152(r1) # 8-byte Folded Spill
 ; AIX64-NEXT:    lxvw4x vs0, r30, r3
-; AIX64-NEXT:    lxvw4x vs1, r30, r4
-; AIX64-NEXT:    addi r4, r1, 124
+; AIX64-NEXT:    addi r3, r1, 124
+; AIX64-NEXT:    stxvw4x vs0, 0, r3
+; AIX64-NEXT:    li r3, 360
+; AIX64-NEXT:    lxvw4x vs0, r30, r3
 ; AIX64-NEXT:    addi r3, r1, 112
-; AIX64-NEXT:    stxvw4x vs0, 0, r4
-; AIX64-NEXT:    stxvw4x vs1, 0, r3
+; AIX64-NEXT:    stxvw4x vs0, 0, r3
 ; AIX64-NEXT:    bl .calleeInt[PR]
 ; AIX64-NEXT:    nop
 ; AIX64-NEXT:    mr r31, r3
 ; AIX64-NEXT:    ld r3, L..C3(r2) # @IntArray2
 ; AIX64-NEXT:    bl .calleeInt[PR]
 ; AIX64-NEXT:    nop
-; AIX64-NEXT:    addi r4, r30, 400
 ; AIX64-NEXT:    add r31, r3, r31
-; AIX64-NEXT:    mr r3, r4
+; AIX64-NEXT:    addi r3, r30, 400
 ; AIX64-NEXT:    bl .callee[PR]
 ; AIX64-NEXT:    nop
-; AIX64-NEXT:    addi r4, r30, 473
 ; AIX64-NEXT:    add r31, r31, r3
-; AIX64-NEXT:    mr r3, r4
+; AIX64-NEXT:    addi r3, r30, 473
 ; AIX64-NEXT:    bl .callee[PR]
 ; AIX64-NEXT:    nop
 ; AIX64-NEXT:    add r3, r31, r3
@@ -948,16 +939,16 @@ define dso_local signext i32 @mixed2() local_unnamed_addr #0 {
 ; LINUX64BE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
 ; LINUX64BE-NEXT:    std r0, 192(r1)
 ; LINUX64BE-NEXT:    std r29, 152(r1) # 8-byte Folded Spill
-; LINUX64BE-NEXT:    li r4, 360
+; LINUX64BE-NEXT:    li r4, 372
 ; LINUX64BE-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
 ; LINUX64BE-NEXT:    addi r29, r3, .L__ModuleStringPool at toc@l
-; LINUX64BE-NEXT:    li r3, 372
+; LINUX64BE-NEXT:    addi r3, r1, 124
+; LINUX64BE-NEXT:    lxvw4x vs0, r29, r4
+; LINUX64BE-NEXT:    stxvw4x vs0, 0, r3
+; LINUX64BE-NEXT:    li r3, 360
 ; LINUX64BE-NEXT:    lxvw4x vs0, r29, r3
 ; LINUX64BE-NEXT:    addi r3, r1, 112
-; LINUX64BE-NEXT:    lxvw4x vs1, r29, r4
-; LINUX64BE-NEXT:    addi r4, r1, 124
-; LINUX64BE-NEXT:    stxvw4x vs0, 0, r4
-; LINUX64BE-NEXT:    stxvw4x vs1, 0, r3
+; LINUX64BE-NEXT:    stxvw4x vs0, 0, r3
 ; LINUX64BE-NEXT:    bl calleeInt
 ; LINUX64BE-NEXT:    nop
 ; LINUX64BE-NEXT:    mr r30, r3
@@ -965,14 +956,12 @@ define dso_local signext i32 @mixed2() local_unnamed_addr #0 {
 ; LINUX64BE-NEXT:    addi r3, r3, IntArray2 at toc@l
 ; LINUX64BE-NEXT:    bl calleeInt
 ; LINUX64BE-NEXT:    nop
-; LINUX64BE-NEXT:    addi r4, r29, 400
 ; LINUX64BE-NEXT:    add r30, r3, r30
-; LINUX64BE-NEXT:    mr r3, r4
+; LINUX64BE-NEXT:    addi r3, r29, 400
 ; LINUX64BE-NEXT:    bl callee
 ; LINUX64BE-NEXT:    nop
-; LINUX64BE-NEXT:    addi r4, r29, 473
 ; LINUX64BE-NEXT:    add r30, r30, r3
-; LINUX64BE-NEXT:    mr r3, r4
+; LINUX64BE-NEXT:    addi r3, r29, 473
 ; LINUX64BE-NEXT:    bl callee
 ; LINUX64BE-NEXT:    nop
 ; LINUX64BE-NEXT:    add r3, r30, r3
@@ -991,16 +980,16 @@ define dso_local signext i32 @mixed2() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; LINUX64LE-NEXT:    stdu r1, -96(r1)
 ; LINUX64LE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
-; LINUX64LE-NEXT:    li r4, 360
+; LINUX64LE-NEXT:    li r4, 372
 ; LINUX64LE-NEXT:    std r0, 112(r1)
 ; LINUX64LE-NEXT:    addi r29, r3, .L__ModuleStringPool at toc@l
-; LINUX64LE-NEXT:    li r3, 372
+; LINUX64LE-NEXT:    addi r3, r1, 44
+; LINUX64LE-NEXT:    lxvd2x vs0, r29, r4
+; LINUX64LE-NEXT:    stxvd2x vs0, 0, r3
+; LINUX64LE-NEXT:    li r3, 360
 ; LINUX64LE-NEXT:    lxvd2x vs0, r29, r3
-; LINUX64LE-NEXT:    lxvd2x vs1, r29, r4
-; LINUX64LE-NEXT:    addi r4, r1, 44
 ; LINUX64LE-NEXT:    addi r3, r1, 32
-; LINUX64LE-NEXT:    stxvd2x vs0, 0, r4
-; LINUX64LE-NEXT:    stxvd2x vs1, 0, r3
+; LINUX64LE-NEXT:    stxvd2x vs0, 0, r3
 ; LINUX64LE-NEXT:    bl calleeInt
 ; LINUX64LE-NEXT:    nop
 ; LINUX64LE-NEXT:    mr r30, r3
@@ -1008,14 +997,12 @@ define dso_local signext i32 @mixed2() local_unnamed_addr #0 {
 ; LINUX64LE-NEXT:    addi r3, r3, IntArray2 at toc@l
 ; LINUX64LE-NEXT:    bl calleeInt
 ; LINUX64LE-NEXT:    nop
-; LINUX64LE-NEXT:    addi r4, r29, 400
 ; LINUX64LE-NEXT:    add r30, r3, r30
-; LINUX64LE-NEXT:    mr r3, r4
+; LINUX64LE-NEXT:    addi r3, r29, 400
 ; LINUX64LE-NEXT:    bl callee
 ; LINUX64LE-NEXT:    nop
-; LINUX64LE-NEXT:    addi r4, r29, 473
 ; LINUX64LE-NEXT:    add r30, r30, r3
-; LINUX64LE-NEXT:    mr r3, r4
+; LINUX64LE-NEXT:    addi r3, r29, 473
 ; LINUX64LE-NEXT:    bl callee
 ; LINUX64LE-NEXT:    nop
 ; LINUX64LE-NEXT:    add r3, r30, r3

diff  --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index f21e1d4f296fa08..7c197449201faf1 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -67,23 +67,23 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
 ; LE-PWR8-LABEL: testLdSt:
 ; LE-PWR8:       # %bb.0: # %entry
 ; LE-PWR8-NEXT:    addis r3, r2, f at toc@ha
-; LE-PWR8-NEXT:    li r4, 96
-; LE-PWR8-NEXT:    li r5, 112
+; LE-PWR8-NEXT:    li r4, 64
 ; LE-PWR8-NEXT:    addi r3, r3, f at toc@l
 ; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
-; LE-PWR8-NEXT:    li r4, 64
-; LE-PWR8-NEXT:    lxvd2x vs1, r3, r5
-; LE-PWR8-NEXT:    li r5, 80
+; LE-PWR8-NEXT:    li r4, 80
+; LE-PWR8-NEXT:    lxvd2x vs1, r3, r4
+; LE-PWR8-NEXT:    li r4, 96
 ; LE-PWR8-NEXT:    lxvd2x vs2, r3, r4
-; LE-PWR8-NEXT:    lxvd2x vs3, r3, r5
+; LE-PWR8-NEXT:    li r4, 112
+; LE-PWR8-NEXT:    lxvd2x vs3, r3, r4
 ; LE-PWR8-NEXT:    li r4, 176
-; LE-PWR8-NEXT:    li r5, 160
-; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
-; LE-PWR8-NEXT:    li r4, 144
-; LE-PWR8-NEXT:    stxvd2x vs0, r3, r5
-; LE-PWR8-NEXT:    li r5, 128
 ; LE-PWR8-NEXT:    stxvd2x vs3, r3, r4
-; LE-PWR8-NEXT:    stxvd2x vs2, r3, r5
+; LE-PWR8-NEXT:    li r4, 160
+; LE-PWR8-NEXT:    stxvd2x vs2, r3, r4
+; LE-PWR8-NEXT:    li r4, 144
+; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; LE-PWR8-NEXT:    li r4, 128
+; LE-PWR8-NEXT:    stxvd2x vs0, r3, r4
 ; LE-PWR8-NEXT:    blr
 ;
 ; BE-PWR9-LABEL: testLdSt:
@@ -103,23 +103,23 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PWR8-LABEL: testLdSt:
 ; BE-PWR8:       # %bb.0: # %entry
 ; BE-PWR8-NEXT:    addis r3, r2, f at toc@ha
-; BE-PWR8-NEXT:    li r4, 96
-; BE-PWR8-NEXT:    li r5, 112
+; BE-PWR8-NEXT:    li r4, 64
 ; BE-PWR8-NEXT:    addi r3, r3, f at toc@l
 ; BE-PWR8-NEXT:    lxvd2x vs0, r3, r4
-; BE-PWR8-NEXT:    li r4, 64
-; BE-PWR8-NEXT:    lxvd2x vs1, r3, r5
-; BE-PWR8-NEXT:    li r5, 80
+; BE-PWR8-NEXT:    li r4, 80
+; BE-PWR8-NEXT:    lxvd2x vs1, r3, r4
+; BE-PWR8-NEXT:    li r4, 96
 ; BE-PWR8-NEXT:    lxvd2x vs2, r3, r4
-; BE-PWR8-NEXT:    lxvd2x vs3, r3, r5
+; BE-PWR8-NEXT:    li r4, 112
+; BE-PWR8-NEXT:    lxvd2x vs3, r3, r4
 ; BE-PWR8-NEXT:    li r4, 176
-; BE-PWR8-NEXT:    li r5, 160
-; BE-PWR8-NEXT:    stxvd2x vs1, r3, r4
-; BE-PWR8-NEXT:    li r4, 144
-; BE-PWR8-NEXT:    stxvd2x vs0, r3, r5
-; BE-PWR8-NEXT:    li r5, 128
 ; BE-PWR8-NEXT:    stxvd2x vs3, r3, r4
-; BE-PWR8-NEXT:    stxvd2x vs2, r3, r5
+; BE-PWR8-NEXT:    li r4, 160
+; BE-PWR8-NEXT:    stxvd2x vs2, r3, r4
+; BE-PWR8-NEXT:    li r4, 144
+; BE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; BE-PWR8-NEXT:    li r4, 128
+; BE-PWR8-NEXT:    stxvd2x vs0, r3, r4
 ; BE-PWR8-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds <512 x i1>, ptr @f, i64 1
@@ -187,21 +187,21 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
 ; LE-PWR8:       # %bb.0: # %entry
 ; LE-PWR8-NEXT:    addis r5, r2, f at toc@ha
 ; LE-PWR8-NEXT:    sldi r3, r3, 6
-; LE-PWR8-NEXT:    li r6, 48
-; LE-PWR8-NEXT:    li r8, 16
-; LE-PWR8-NEXT:    li r9, 32
+; LE-PWR8-NEXT:    li r7, 16
+; LE-PWR8-NEXT:    li r8, 32
+; LE-PWR8-NEXT:    li r9, 48
 ; LE-PWR8-NEXT:    addi r5, r5, f at toc@l
-; LE-PWR8-NEXT:    add r7, r5, r3
-; LE-PWR8-NEXT:    lxvd2x vs0, r5, r3
+; LE-PWR8-NEXT:    add r6, r5, r3
+; LE-PWR8-NEXT:    lxvd2x vs3, r5, r3
 ; LE-PWR8-NEXT:    sldi r3, r4, 6
-; LE-PWR8-NEXT:    lxvd2x vs1, r7, r6
-; LE-PWR8-NEXT:    lxvd2x vs2, r7, r8
-; LE-PWR8-NEXT:    add r4, r5, r3
-; LE-PWR8-NEXT:    lxvd2x vs3, r7, r9
-; LE-PWR8-NEXT:    stxvd2x vs0, r5, r3
-; LE-PWR8-NEXT:    stxvd2x vs1, r4, r6
-; LE-PWR8-NEXT:    stxvd2x vs3, r4, r9
-; LE-PWR8-NEXT:    stxvd2x vs2, r4, r8
+; LE-PWR8-NEXT:    lxvd2x vs0, r6, r7
+; LE-PWR8-NEXT:    lxvd2x vs1, r6, r8
+; LE-PWR8-NEXT:    lxvd2x vs2, r6, r9
+; LE-PWR8-NEXT:    stxvd2x vs3, r5, r3
+; LE-PWR8-NEXT:    add r3, r5, r3
+; LE-PWR8-NEXT:    stxvd2x vs2, r3, r9
+; LE-PWR8-NEXT:    stxvd2x vs1, r3, r8
+; LE-PWR8-NEXT:    stxvd2x vs0, r3, r7
 ; LE-PWR8-NEXT:    blr
 ;
 ; BE-PWR9-LABEL: testXLdSt:
@@ -226,21 +226,21 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PWR8:       # %bb.0: # %entry
 ; BE-PWR8-NEXT:    addis r5, r2, f at toc@ha
 ; BE-PWR8-NEXT:    sldi r3, r3, 6
-; BE-PWR8-NEXT:    li r6, 32
-; BE-PWR8-NEXT:    li r7, 48
-; BE-PWR8-NEXT:    li r9, 16
+; BE-PWR8-NEXT:    li r7, 32
+; BE-PWR8-NEXT:    li r8, 48
+; BE-PWR8-NEXT:    sldi r4, r4, 6
 ; BE-PWR8-NEXT:    addi r5, r5, f at toc@l
-; BE-PWR8-NEXT:    add r8, r5, r3
-; BE-PWR8-NEXT:    lxvd2x vs2, r5, r3
-; BE-PWR8-NEXT:    sldi r3, r4, 6
-; BE-PWR8-NEXT:    lxvd2x vs0, r8, r6
-; BE-PWR8-NEXT:    lxvd2x vs1, r8, r7
-; BE-PWR8-NEXT:    add r4, r5, r3
-; BE-PWR8-NEXT:    lxvd2x vs3, r8, r9
-; BE-PWR8-NEXT:    stxvd2x vs2, r5, r3
-; BE-PWR8-NEXT:    stxvd2x vs1, r4, r7
-; BE-PWR8-NEXT:    stxvd2x vs0, r4, r6
-; BE-PWR8-NEXT:    stxvd2x vs3, r4, r9
+; BE-PWR8-NEXT:    add r6, r5, r3
+; BE-PWR8-NEXT:    lxvd2x vs0, r5, r3
+; BE-PWR8-NEXT:    li r3, 16
+; BE-PWR8-NEXT:    lxvd2x vs1, r6, r3
+; BE-PWR8-NEXT:    lxvd2x vs2, r6, r7
+; BE-PWR8-NEXT:    lxvd2x vs3, r6, r8
+; BE-PWR8-NEXT:    add r6, r5, r4
+; BE-PWR8-NEXT:    stxvd2x vs0, r5, r4
+; BE-PWR8-NEXT:    stxvd2x vs3, r6, r8
+; BE-PWR8-NEXT:    stxvd2x vs2, r6, r7
+; BE-PWR8-NEXT:    stxvd2x vs1, r6, r3
 ; BE-PWR8-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds <512 x i1>, ptr @f, i64 %SrcIdx
@@ -302,23 +302,23 @@ define dso_local void @testUnalignedLdSt() {
 ; LE-PWR8-LABEL: testUnalignedLdSt:
 ; LE-PWR8:       # %bb.0: # %entry
 ; LE-PWR8-NEXT:    addis r3, r2, f at toc@ha
-; LE-PWR8-NEXT:    li r4, 43
-; LE-PWR8-NEXT:    li r5, 59
+; LE-PWR8-NEXT:    li r4, 11
 ; LE-PWR8-NEXT:    addi r3, r3, f at toc@l
 ; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
-; LE-PWR8-NEXT:    li r4, 11
-; LE-PWR8-NEXT:    lxvd2x vs1, r3, r5
-; LE-PWR8-NEXT:    li r5, 27
+; LE-PWR8-NEXT:    li r4, 27
+; LE-PWR8-NEXT:    lxvd2x vs1, r3, r4
+; LE-PWR8-NEXT:    li r4, 43
 ; LE-PWR8-NEXT:    lxvd2x vs2, r3, r4
-; LE-PWR8-NEXT:    lxvd2x vs3, r3, r5
+; LE-PWR8-NEXT:    li r4, 59
+; LE-PWR8-NEXT:    lxvd2x vs3, r3, r4
 ; LE-PWR8-NEXT:    li r4, 67
-; LE-PWR8-NEXT:    li r5, 51
-; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
-; LE-PWR8-NEXT:    li r4, 35
-; LE-PWR8-NEXT:    stxvd2x vs0, r3, r5
-; LE-PWR8-NEXT:    li r5, 19
 ; LE-PWR8-NEXT:    stxvd2x vs3, r3, r4
-; LE-PWR8-NEXT:    stxvd2x vs2, r3, r5
+; LE-PWR8-NEXT:    li r4, 51
+; LE-PWR8-NEXT:    stxvd2x vs2, r3, r4
+; LE-PWR8-NEXT:    li r4, 35
+; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; LE-PWR8-NEXT:    li r4, 19
+; LE-PWR8-NEXT:    stxvd2x vs0, r3, r4
 ; LE-PWR8-NEXT:    blr
 ;
 ; BE-PWR9-LABEL: testUnalignedLdSt:
@@ -346,23 +346,23 @@ define dso_local void @testUnalignedLdSt() {
 ; BE-PWR8-LABEL: testUnalignedLdSt:
 ; BE-PWR8:       # %bb.0: # %entry
 ; BE-PWR8-NEXT:    addis r3, r2, f at toc@ha
-; BE-PWR8-NEXT:    li r4, 43
-; BE-PWR8-NEXT:    li r5, 59
+; BE-PWR8-NEXT:    li r4, 11
 ; BE-PWR8-NEXT:    addi r3, r3, f at toc@l
 ; BE-PWR8-NEXT:    lxvd2x vs0, r3, r4
-; BE-PWR8-NEXT:    li r4, 11
-; BE-PWR8-NEXT:    lxvd2x vs1, r3, r5
-; BE-PWR8-NEXT:    li r5, 27
+; BE-PWR8-NEXT:    li r4, 27
+; BE-PWR8-NEXT:    lxvd2x vs1, r3, r4
+; BE-PWR8-NEXT:    li r4, 43
 ; BE-PWR8-NEXT:    lxvd2x vs2, r3, r4
-; BE-PWR8-NEXT:    lxvd2x vs3, r3, r5
+; BE-PWR8-NEXT:    li r4, 59
+; BE-PWR8-NEXT:    lxvd2x vs3, r3, r4
 ; BE-PWR8-NEXT:    li r4, 67
-; BE-PWR8-NEXT:    li r5, 51
-; BE-PWR8-NEXT:    stxvd2x vs1, r3, r4
-; BE-PWR8-NEXT:    li r4, 35
-; BE-PWR8-NEXT:    stxvd2x vs0, r3, r5
-; BE-PWR8-NEXT:    li r5, 19
 ; BE-PWR8-NEXT:    stxvd2x vs3, r3, r4
-; BE-PWR8-NEXT:    stxvd2x vs2, r3, r5
+; BE-PWR8-NEXT:    li r4, 51
+; BE-PWR8-NEXT:    stxvd2x vs2, r3, r4
+; BE-PWR8-NEXT:    li r4, 35
+; BE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; BE-PWR8-NEXT:    li r4, 19
+; BE-PWR8-NEXT:    stxvd2x vs0, r3, r4
 ; BE-PWR8-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i8, ptr @f, i64 11
@@ -405,14 +405,14 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; LE-PWR8:       # %bb.0: # %entry
 ; LE-PWR8-NEXT:    addis r3, r2, g at toc@ha
 ; LE-PWR8-NEXT:    li r4, 32
-; LE-PWR8-NEXT:    li r5, 48
 ; LE-PWR8-NEXT:    addi r3, r3, g at toc@l
 ; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
-; LE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; LE-PWR8-NEXT:    li r4, 48
+; LE-PWR8-NEXT:    lxvd2x vs1, r3, r4
 ; LE-PWR8-NEXT:    li r4, 80
-; LE-PWR8-NEXT:    li r5, 64
 ; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
-; LE-PWR8-NEXT:    stxvd2x vs0, r3, r5
+; LE-PWR8-NEXT:    li r4, 64
+; LE-PWR8-NEXT:    stxvd2x vs0, r3, r4
 ; LE-PWR8-NEXT:    blr
 ;
 ; BE-PWR9-LABEL: testLdStPair:
@@ -429,14 +429,14 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PWR8:       # %bb.0: # %entry
 ; BE-PWR8-NEXT:    addis r3, r2, g at toc@ha
 ; BE-PWR8-NEXT:    li r4, 32
-; BE-PWR8-NEXT:    li r5, 48
 ; BE-PWR8-NEXT:    addi r3, r3, g at toc@l
 ; BE-PWR8-NEXT:    lxvd2x vs0, r3, r4
-; BE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; BE-PWR8-NEXT:    li r4, 48
+; BE-PWR8-NEXT:    lxvd2x vs1, r3, r4
 ; BE-PWR8-NEXT:    li r4, 80
-; BE-PWR8-NEXT:    li r5, 64
 ; BE-PWR8-NEXT:    stxvd2x vs1, r3, r4
-; BE-PWR8-NEXT:    stxvd2x vs0, r3, r5
+; BE-PWR8-NEXT:    li r4, 64
+; BE-PWR8-NEXT:    stxvd2x vs0, r3, r4
 ; BE-PWR8-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds <256 x i1>, ptr @g, i64 1
@@ -521,15 +521,15 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PWR8:       # %bb.0: # %entry
 ; BE-PWR8-NEXT:    addis r5, r2, g at toc@ha
 ; BE-PWR8-NEXT:    sldi r3, r3, 5
-; BE-PWR8-NEXT:    li r7, 16
+; BE-PWR8-NEXT:    sldi r4, r4, 5
 ; BE-PWR8-NEXT:    addi r5, r5, g at toc@l
 ; BE-PWR8-NEXT:    add r6, r5, r3
 ; BE-PWR8-NEXT:    lxvd2x vs0, r5, r3
-; BE-PWR8-NEXT:    sldi r3, r4, 5
-; BE-PWR8-NEXT:    lxvd2x vs1, r6, r7
-; BE-PWR8-NEXT:    add r4, r5, r3
-; BE-PWR8-NEXT:    stxvd2x vs0, r5, r3
-; BE-PWR8-NEXT:    stxvd2x vs1, r4, r7
+; BE-PWR8-NEXT:    li r3, 16
+; BE-PWR8-NEXT:    lxvd2x vs1, r6, r3
+; BE-PWR8-NEXT:    add r6, r5, r4
+; BE-PWR8-NEXT:    stxvd2x vs0, r5, r4
+; BE-PWR8-NEXT:    stxvd2x vs1, r6, r3
 ; BE-PWR8-NEXT:    blr
 entry:
   %arrayidx = getelementptr inbounds <256 x i1>, ptr @g, i64 %SrcIdx
@@ -576,14 +576,14 @@ define dso_local void @testUnalignedLdStPair() {
 ; LE-PWR8:       # %bb.0: # %entry
 ; LE-PWR8-NEXT:    addis r3, r2, g at toc@ha
 ; LE-PWR8-NEXT:    li r4, 11
-; LE-PWR8-NEXT:    li r5, 27
 ; LE-PWR8-NEXT:    addi r3, r3, g at toc@l
 ; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
-; LE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; LE-PWR8-NEXT:    li r4, 27
+; LE-PWR8-NEXT:    lxvd2x vs1, r3, r4
 ; LE-PWR8-NEXT:    li r4, 35
-; LE-PWR8-NEXT:    li r5, 19
 ; LE-PWR8-NEXT:    stxvd2x vs1, r3, r4
-; LE-PWR8-NEXT:    stxvd2x vs0, r3, r5
+; LE-PWR8-NEXT:    li r4, 19
+; LE-PWR8-NEXT:    stxvd2x vs0, r3, r4
 ; LE-PWR8-NEXT:    blr
 ;
 ; BE-PWR9-LABEL: testUnalignedLdStPair:
@@ -604,14 +604,14 @@ define dso_local void @testUnalignedLdStPair() {
 ; BE-PWR8:       # %bb.0: # %entry
 ; BE-PWR8-NEXT:    addis r3, r2, g at toc@ha
 ; BE-PWR8-NEXT:    li r4, 11
-; BE-PWR8-NEXT:    li r5, 27
 ; BE-PWR8-NEXT:    addi r3, r3, g at toc@l
 ; BE-PWR8-NEXT:    lxvd2x vs0, r3, r4
-; BE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; BE-PWR8-NEXT:    li r4, 27
+; BE-PWR8-NEXT:    lxvd2x vs1, r3, r4
 ; BE-PWR8-NEXT:    li r4, 35
-; BE-PWR8-NEXT:    li r5, 19
 ; BE-PWR8-NEXT:    stxvd2x vs1, r3, r4
-; BE-PWR8-NEXT:    stxvd2x vs0, r3, r5
+; BE-PWR8-NEXT:    li r4, 19
+; BE-PWR8-NEXT:    stxvd2x vs0, r3, r4
 ; BE-PWR8-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i8, ptr @g, i64 11

diff  --git a/llvm/test/CodeGen/PowerPC/mulld.ll b/llvm/test/CodeGen/PowerPC/mulld.ll
index 95b4125c28c517d..2a75e88d4d8bd15 100644
--- a/llvm/test/CodeGen/PowerPC/mulld.ll
+++ b/llvm/test/CodeGen/PowerPC/mulld.ll
@@ -20,7 +20,7 @@ define void @bn_mul_comba8(ptr nocapture %r, ptr nocapture readonly %a, ptr noca
 ; CHECK-ITIN:    mulhdu
 ; CHECK-ITIN-NEXT:    mulld
 ; CHECK-ITIN-NEXT:    mulhdu
-; CHECK-ITIN-NEXT:    mulld
+; CHECK-ITIN:    mulld
 ; CHECK-ITIN-NEXT:    mulhdu
 
   %1 = load i64, ptr %a, align 8

diff  --git a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll
index 18896c24f07150a..799ba63a4df2748 100644
--- a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll
+++ b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll
@@ -4,36 +4,35 @@
 define signext i32 @test(ptr noalias %PtrA, ptr noalias %PtrB, i32 signext %LenA, i32 signext %LenB) #0 {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    li 6, 0
-; CHECK-NEXT:    addi 7, 3, 4
+; CHECK-NEXT:    addi 6, 3, 4
 ; CHECK-NEXT:    addi 4, 4, -4
 ; CHECK-NEXT:    li 8, 0
+; CHECK-NEXT:    li 7, 0
 ; CHECK-NEXT:  .LBB0_1: # %block3
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB0_2 Depth 2
-; CHECK-NEXT:    mr 9, 6
-; CHECK-NEXT:    addi 6, 6, 1
-; CHECK-NEXT:    extsw 8, 8
-; CHECK-NEXT:    cmpw 6, 5
-; CHECK-NEXT:    extsw 9, 9
-; CHECK-NEXT:    crnot 20, 0
-; CHECK-NEXT:    sldi 10, 8, 2
-; CHECK-NEXT:    sldi 9, 9, 2
+; CHECK-NEXT:    extsw 9, 8
 ; CHECK-NEXT:    addi 8, 8, 1
+; CHECK-NEXT:    extsw 7, 7
+; CHECK-NEXT:    cmpw 8, 5
+; CHECK-NEXT:    sldi 10, 7, 2
+; CHECK-NEXT:    sldi 9, 9, 2
+; CHECK-NEXT:    addi 7, 7, 1
 ; CHECK-NEXT:    add 10, 4, 10
+; CHECK-NEXT:    crnot 20, 0
 ; CHECK-NEXT:    bc 12, 20, .LBB0_5
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_2: # %if.end
 ; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    lwz 11, 4(10)
-; CHECK-NEXT:    cmplwi 11, 0
+; CHECK-NEXT:    lwz 12, 4(10)
 ; CHECK-NEXT:    addi 11, 10, 4
+; CHECK-NEXT:    cmplwi 12, 0
 ; CHECK-NEXT:    beq 0, .LBB0_4
 ; CHECK-NEXT:  # %bb.3: # %if.then4
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lwzx 12, 7, 9
-; CHECK-NEXT:    addi 8, 8, 1
+; CHECK-NEXT:    lwzx 12, 6, 9
+; CHECK-NEXT:    addi 7, 7, 1
 ; CHECK-NEXT:    stw 12, 8(10)
 ; CHECK-NEXT:    mr 10, 11
 ; CHECK-NEXT:    bc 4, 20, .LBB0_2
@@ -41,9 +40,9 @@ define signext i32 @test(ptr noalias %PtrA, ptr noalias %PtrB, i32 signext %LenA
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_4: # %if.end9
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lwzx 10, 7, 9
+; CHECK-NEXT:    lwzx 10, 6, 9
 ; CHECK-NEXT:    addi 10, 10, 1
-; CHECK-NEXT:    stwx 10, 7, 9
+; CHECK-NEXT:    stwx 10, 6, 9
 ; CHECK-NEXT:    b .LBB0_1
 ; CHECK-NEXT:  .LBB0_5: # %if.then
 ; CHECK-NEXT:    lwax 3, 9, 3

diff  --git a/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll b/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll
index 8c3b747b0349578..1bad824913368be 100644
--- a/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll
+++ b/llvm/test/CodeGen/PowerPC/non-debug-mi-search-frspxsrsp.ll
@@ -18,25 +18,25 @@ define dso_local void @test(ptr nocapture readonly %Fptr, ptr nocapture %Vptr) l
 ; CHECK-NEXT:    vspltisw 2, 1
 ; CHECK-NEXT:  .Ltmp0:
 ; CHECK-NEXT:    .loc 1 2 38 prologue_end # test.c:2:38
-; CHECK-NEXT:    lfs 1, 0(3)
+; CHECK-NEXT:    lfs 0, 0(3)
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:  .Ltmp1:
 ; CHECK-NEXT:    .loc 1 0 38 is_stmt 0 # test.c:0:38
+; CHECK-NEXT:    xvcvsxwdp 1, 34
 ; CHECK-NEXT:    lfd 2, .LCPI0_0 at toc@l(3)
-; CHECK-NEXT:    xvcvsxwdp 0, 34
 ; CHECK-NEXT:    .loc 1 2 27 # test.c:2:27
-; CHECK-NEXT:    xssubdp 0, 0, 1
+; CHECK-NEXT:    xssubdp 1, 1, 0
 ; CHECK-NEXT:    .loc 1 2 45 # test.c:2:45
-; CHECK-NEXT:    xsadddp 0, 0, 2
+; CHECK-NEXT:    xsadddp 1, 1, 2
 ; CHECK-NEXT:  .Ltmp2:
 ; CHECK-NEXT:    #DEBUG_VALUE: test:Val <- undef
 ; CHECK-NEXT:    .loc 1 0 45 # test.c:0:45
 ; CHECK-NEXT:    xxlxor 2, 2, 2
 ; CHECK-NEXT:    .loc 1 3 26 is_stmt 1 # test.c:3:26
-; CHECK-NEXT:    xxmrghd 1, 1, 2
-; CHECK-NEXT:    xxmrghd 0, 2, 0
-; CHECK-NEXT:    xvcvdpsp 34, 1
-; CHECK-NEXT:    xvcvdpsp 35, 0
+; CHECK-NEXT:    xxmrghd 0, 0, 2
+; CHECK-NEXT:    xvcvdpsp 34, 0
+; CHECK-NEXT:    xxmrghd 1, 2, 1
+; CHECK-NEXT:    xvcvdpsp 35, 1
 ; CHECK-NEXT:    vmrgew 2, 2, 3
 ; CHECK-NEXT:    .loc 1 3 9 is_stmt 0 # test.c:3:9
 ; CHECK-NEXT:    xxswapd 0, 34

diff  --git a/llvm/test/CodeGen/PowerPC/p8-isel-sched.ll b/llvm/test/CodeGen/PowerPC/p8-isel-sched.ll
index b7452bd385fa422..7e2515ff70938f9 100644
--- a/llvm/test/CodeGen/PowerPC/p8-isel-sched.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-isel-sched.ll
@@ -30,11 +30,11 @@ entry:
 ; CHECK-NO-ISEL: bc 12, 2, [[TRUE:.LBB[0-9]+]]
 ; CHECK-NO-ISEL: b [[SUCCESSOR:.LBB[0-9]+]]
 ; CHECK-NO-ISEL: [[TRUE]]
-; CHECK-NO-ISEL-NEXT: addi {{[0-9]+}}, {{[0-9]+}}, 0
+; CHECK-NO-ISEL: addi {{[0-9]+}}, {{[0-9]+}}, -2
 ; CHECK: addi
 ; CHECK: isel
 ; CHECK-NO-ISEL: bc 12, 2, [[TRUE:.LBB[0-9]+]]
-; CHECK-NO-ISEL: ori 10, 11, 0
+; CHECK-NO-ISEL: ori 3, 7, 0
 ; CHECK-NO-ISEL-NEXT: b [[SUCCESSOR:.LBB[0-9]+]]
 ; CHECK-NO-ISEL: [[TRUE]]
 ; CHECK: blr

diff  --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
index 194807f1d3aa454..ba25d1584f2822b 100644
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -1112,11 +1112,11 @@ entry:
 define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) {
 ; CHECK-LABEL: getvelsc:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    clrldi r4, r5, 32
-; CHECK-NEXT:    li r3, 7
-; CHECK-NEXT:    andi. r5, r4, 8
-; CHECK-NEXT:    andc r3, r3, r4
-; CHECK-NEXT:    lvsl v3, 0, r5
+; CHECK-NEXT:    clrldi r3, r5, 32
+; CHECK-NEXT:    andi. r4, r3, 8
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    li r4, 7
+; CHECK-NEXT:    andc r3, r4, r3
 ; CHECK-NEXT:    sldi r3, r3, 3
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
@@ -1126,14 +1126,14 @@ define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) {
 ;
 ; CHECK-LE-LABEL: getvelsc:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    li r3, 8
-; CHECK-LE-NEXT:    clrldi r4, r5, 32
-; CHECK-LE-NEXT:    andc r3, r3, r4
-; CHECK-LE-NEXT:    lvsl v3, 0, r3
-; CHECK-LE-NEXT:    li r3, 7
-; CHECK-LE-NEXT:    and r3, r3, r4
-; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    clrldi r3, r5, 32
+; CHECK-LE-NEXT:    li r4, 8
+; CHECK-LE-NEXT:    andc r4, r4, r3
+; CHECK-LE-NEXT:    lvsl v3, 0, r4
+; CHECK-LE-NEXT:    li r4, 7
+; CHECK-LE-NEXT:    and r3, r4, r3
 ; CHECK-LE-NEXT:    sldi r3, r3, 3
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
 ; CHECK-LE-NEXT:    srd r3, r4, r3
 ; CHECK-LE-NEXT:    extsb r3, r3
@@ -1142,10 +1142,10 @@ define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) {
 ; CHECK-AIX-LABEL: getvelsc:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    andi. 4, 3, 8
+; CHECK-AIX-NEXT:    lvsl 3, 0, 4
 ; CHECK-AIX-NEXT:    li 4, 7
-; CHECK-AIX-NEXT:    andi. 5, 3, 8
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    sldi 3, 3, 3
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
@@ -1163,11 +1163,11 @@ entry:
 define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) {
 ; CHECK-LABEL: getveluc:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    clrldi r4, r5, 32
-; CHECK-NEXT:    li r3, 7
-; CHECK-NEXT:    andi. r5, r4, 8
-; CHECK-NEXT:    andc r3, r3, r4
-; CHECK-NEXT:    lvsl v3, 0, r5
+; CHECK-NEXT:    clrldi r3, r5, 32
+; CHECK-NEXT:    andi. r4, r3, 8
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    li r4, 7
+; CHECK-NEXT:    andc r3, r4, r3
 ; CHECK-NEXT:    sldi r3, r3, 3
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
@@ -1177,14 +1177,14 @@ define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) {
 ;
 ; CHECK-LE-LABEL: getveluc:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    li r3, 8
-; CHECK-LE-NEXT:    clrldi r4, r5, 32
-; CHECK-LE-NEXT:    andc r3, r3, r4
-; CHECK-LE-NEXT:    lvsl v3, 0, r3
-; CHECK-LE-NEXT:    li r3, 7
-; CHECK-LE-NEXT:    and r3, r3, r4
-; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    clrldi r3, r5, 32
+; CHECK-LE-NEXT:    li r4, 8
+; CHECK-LE-NEXT:    andc r4, r4, r3
+; CHECK-LE-NEXT:    lvsl v3, 0, r4
+; CHECK-LE-NEXT:    li r4, 7
+; CHECK-LE-NEXT:    and r3, r4, r3
 ; CHECK-LE-NEXT:    sldi r3, r3, 3
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
 ; CHECK-LE-NEXT:    srd r3, r4, r3
 ; CHECK-LE-NEXT:    clrldi r3, r3, 56
@@ -1193,10 +1193,10 @@ define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) {
 ; CHECK-AIX-LABEL: getveluc:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    andi. 4, 3, 8
+; CHECK-AIX-NEXT:    lvsl 3, 0, 4
 ; CHECK-AIX-NEXT:    li 4, 7
-; CHECK-AIX-NEXT:    andi. 5, 3, 8
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    sldi 3, 3, 3
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
@@ -1678,13 +1678,13 @@ entry:
 define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) {
 ; CHECK-LABEL: getvelss:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    clrldi r4, r5, 32
-; CHECK-NEXT:    li r3, 3
-; CHECK-NEXT:    andi. r5, r4, 4
-; CHECK-NEXT:    andc r3, r3, r4
-; CHECK-NEXT:    sldi r5, r5, 1
+; CHECK-NEXT:    clrldi r3, r5, 32
+; CHECK-NEXT:    andi. r4, r3, 4
+; CHECK-NEXT:    sldi r4, r4, 1
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    li r4, 3
+; CHECK-NEXT:    andc r3, r4, r3
 ; CHECK-NEXT:    sldi r3, r3, 4
-; CHECK-NEXT:    lvsl v3, 0, r5
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
 ; CHECK-NEXT:    srd r3, r4, r3
@@ -1693,15 +1693,15 @@ define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) {
 ;
 ; CHECK-LE-LABEL: getvelss:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    li r3, 4
-; CHECK-LE-NEXT:    clrldi r4, r5, 32
-; CHECK-LE-NEXT:    andc r3, r3, r4
-; CHECK-LE-NEXT:    sldi r3, r3, 1
-; CHECK-LE-NEXT:    lvsl v3, 0, r3
-; CHECK-LE-NEXT:    li r3, 3
-; CHECK-LE-NEXT:    and r3, r3, r4
-; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    clrldi r3, r5, 32
+; CHECK-LE-NEXT:    li r4, 4
+; CHECK-LE-NEXT:    andc r4, r4, r3
+; CHECK-LE-NEXT:    sldi r4, r4, 1
+; CHECK-LE-NEXT:    lvsl v3, 0, r4
+; CHECK-LE-NEXT:    li r4, 3
+; CHECK-LE-NEXT:    and r3, r4, r3
 ; CHECK-LE-NEXT:    sldi r3, r3, 4
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
 ; CHECK-LE-NEXT:    srd r3, r4, r3
 ; CHECK-LE-NEXT:    extsh r3, r3
@@ -1710,12 +1710,12 @@ define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) {
 ; CHECK-AIX-LABEL: getvelss:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    andi. 4, 3, 4
+; CHECK-AIX-NEXT:    sldi 4, 4, 1
+; CHECK-AIX-NEXT:    lvsl 3, 0, 4
 ; CHECK-AIX-NEXT:    li 4, 3
-; CHECK-AIX-NEXT:    andi. 5, 3, 4
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
-; CHECK-AIX-NEXT:    sldi 5, 5, 1
 ; CHECK-AIX-NEXT:    sldi 3, 3, 4
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
 ; CHECK-AIX-NEXT:    srd 3, 4, 3
@@ -1732,13 +1732,13 @@ entry:
 define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) {
 ; CHECK-LABEL: getvelus:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    clrldi r4, r5, 32
-; CHECK-NEXT:    li r3, 3
-; CHECK-NEXT:    andi. r5, r4, 4
-; CHECK-NEXT:    andc r3, r3, r4
-; CHECK-NEXT:    sldi r5, r5, 1
+; CHECK-NEXT:    clrldi r3, r5, 32
+; CHECK-NEXT:    andi. r4, r3, 4
+; CHECK-NEXT:    sldi r4, r4, 1
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    li r4, 3
+; CHECK-NEXT:    andc r3, r4, r3
 ; CHECK-NEXT:    sldi r3, r3, 4
-; CHECK-NEXT:    lvsl v3, 0, r5
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
 ; CHECK-NEXT:    srd r3, r4, r3
@@ -1747,15 +1747,15 @@ define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) {
 ;
 ; CHECK-LE-LABEL: getvelus:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    li r3, 4
-; CHECK-LE-NEXT:    clrldi r4, r5, 32
-; CHECK-LE-NEXT:    andc r3, r3, r4
-; CHECK-LE-NEXT:    sldi r3, r3, 1
-; CHECK-LE-NEXT:    lvsl v3, 0, r3
-; CHECK-LE-NEXT:    li r3, 3
-; CHECK-LE-NEXT:    and r3, r3, r4
-; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    clrldi r3, r5, 32
+; CHECK-LE-NEXT:    li r4, 4
+; CHECK-LE-NEXT:    andc r4, r4, r3
+; CHECK-LE-NEXT:    sldi r4, r4, 1
+; CHECK-LE-NEXT:    lvsl v3, 0, r4
+; CHECK-LE-NEXT:    li r4, 3
+; CHECK-LE-NEXT:    and r3, r4, r3
 ; CHECK-LE-NEXT:    sldi r3, r3, 4
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
 ; CHECK-LE-NEXT:    srd r3, r4, r3
 ; CHECK-LE-NEXT:    clrldi r3, r3, 48
@@ -1764,12 +1764,12 @@ define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) {
 ; CHECK-AIX-LABEL: getvelus:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    andi. 4, 3, 4
+; CHECK-AIX-NEXT:    sldi 4, 4, 1
+; CHECK-AIX-NEXT:    lvsl 3, 0, 4
 ; CHECK-AIX-NEXT:    li 4, 3
-; CHECK-AIX-NEXT:    andi. 5, 3, 4
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
-; CHECK-AIX-NEXT:    sldi 5, 5, 1
 ; CHECK-AIX-NEXT:    sldi 3, 3, 4
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
 ; CHECK-AIX-NEXT:    srd 3, 4, 3
@@ -2000,13 +2000,13 @@ entry:
 define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) {
 ; CHECK-LABEL: getvelsi:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    clrldi r4, r5, 32
-; CHECK-NEXT:    li r3, 1
-; CHECK-NEXT:    andi. r5, r4, 2
-; CHECK-NEXT:    andc r3, r3, r4
-; CHECK-NEXT:    sldi r5, r5, 2
+; CHECK-NEXT:    clrldi r3, r5, 32
+; CHECK-NEXT:    andi. r4, r3, 2
+; CHECK-NEXT:    sldi r4, r4, 2
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    li r4, 1
+; CHECK-NEXT:    andc r3, r4, r3
 ; CHECK-NEXT:    sldi r3, r3, 5
-; CHECK-NEXT:    lvsl v3, 0, r5
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
 ; CHECK-NEXT:    srd r3, r4, r3
@@ -2015,15 +2015,15 @@ define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) {
 ;
 ; CHECK-LE-LABEL: getvelsi:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    li r3, 2
-; CHECK-LE-NEXT:    clrldi r4, r5, 32
-; CHECK-LE-NEXT:    andc r3, r3, r4
-; CHECK-LE-NEXT:    sldi r3, r3, 2
-; CHECK-LE-NEXT:    lvsl v3, 0, r3
-; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    and r3, r3, r4
-; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    clrldi r3, r5, 32
+; CHECK-LE-NEXT:    li r4, 2
+; CHECK-LE-NEXT:    andc r4, r4, r3
+; CHECK-LE-NEXT:    sldi r4, r4, 2
+; CHECK-LE-NEXT:    lvsl v3, 0, r4
+; CHECK-LE-NEXT:    li r4, 1
+; CHECK-LE-NEXT:    and r3, r4, r3
 ; CHECK-LE-NEXT:    sldi r3, r3, 5
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
 ; CHECK-LE-NEXT:    srd r3, r4, r3
 ; CHECK-LE-NEXT:    extsw r3, r3
@@ -2032,12 +2032,12 @@ define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) {
 ; CHECK-AIX-LABEL: getvelsi:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    andi. 4, 3, 2
+; CHECK-AIX-NEXT:    sldi 4, 4, 2
+; CHECK-AIX-NEXT:    lvsl 3, 0, 4
 ; CHECK-AIX-NEXT:    li 4, 1
-; CHECK-AIX-NEXT:    andi. 5, 3, 2
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
-; CHECK-AIX-NEXT:    sldi 5, 5, 2
 ; CHECK-AIX-NEXT:    sldi 3, 3, 5
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
 ; CHECK-AIX-NEXT:    srd 3, 4, 3
@@ -2053,13 +2053,13 @@ entry:
 define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) {
 ; CHECK-LABEL: getvelui:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    clrldi r4, r5, 32
-; CHECK-NEXT:    li r3, 1
-; CHECK-NEXT:    andi. r5, r4, 2
-; CHECK-NEXT:    andc r3, r3, r4
-; CHECK-NEXT:    sldi r5, r5, 2
+; CHECK-NEXT:    clrldi r3, r5, 32
+; CHECK-NEXT:    andi. r4, r3, 2
+; CHECK-NEXT:    sldi r4, r4, 2
+; CHECK-NEXT:    lvsl v3, 0, r4
+; CHECK-NEXT:    li r4, 1
+; CHECK-NEXT:    andc r3, r4, r3
 ; CHECK-NEXT:    sldi r3, r3, 5
-; CHECK-NEXT:    lvsl v3, 0, r5
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    mfvsrd r4, v2
 ; CHECK-NEXT:    srd r3, r4, r3
@@ -2068,15 +2068,15 @@ define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) {
 ;
 ; CHECK-LE-LABEL: getvelui:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    li r3, 2
-; CHECK-LE-NEXT:    clrldi r4, r5, 32
-; CHECK-LE-NEXT:    andc r3, r3, r4
-; CHECK-LE-NEXT:    sldi r3, r3, 2
-; CHECK-LE-NEXT:    lvsl v3, 0, r3
-; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    and r3, r3, r4
-; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
+; CHECK-LE-NEXT:    clrldi r3, r5, 32
+; CHECK-LE-NEXT:    li r4, 2
+; CHECK-LE-NEXT:    andc r4, r4, r3
+; CHECK-LE-NEXT:    sldi r4, r4, 2
+; CHECK-LE-NEXT:    lvsl v3, 0, r4
+; CHECK-LE-NEXT:    li r4, 1
+; CHECK-LE-NEXT:    and r3, r4, r3
 ; CHECK-LE-NEXT:    sldi r3, r3, 5
+; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    mfvsrd r4, v2
 ; CHECK-LE-NEXT:    srd r3, r4, r3
 ; CHECK-LE-NEXT:    clrldi r3, r3, 32
@@ -2085,12 +2085,12 @@ define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) {
 ; CHECK-AIX-LABEL: getvelui:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    clrldi 3, 3, 32
+; CHECK-AIX-NEXT:    andi. 4, 3, 2
+; CHECK-AIX-NEXT:    sldi 4, 4, 2
+; CHECK-AIX-NEXT:    lvsl 3, 0, 4
 ; CHECK-AIX-NEXT:    li 4, 1
-; CHECK-AIX-NEXT:    andi. 5, 3, 2
 ; CHECK-AIX-NEXT:    andc 3, 4, 3
-; CHECK-AIX-NEXT:    sldi 5, 5, 2
 ; CHECK-AIX-NEXT:    sldi 3, 3, 5
-; CHECK-AIX-NEXT:    lvsl 3, 0, 5
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    mfvsrd 4, 34
 ; CHECK-AIX-NEXT:    srd 3, 4, 3
@@ -2214,9 +2214,9 @@ define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
 ;
 ; CHECK-LE-LABEL: getvelsl:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    clrldi r4, r5, 32
-; CHECK-LE-NEXT:    andc r3, r3, r4
+; CHECK-LE-NEXT:    clrldi r3, r5, 32
+; CHECK-LE-NEXT:    li r4, 1
+; CHECK-LE-NEXT:    andc r3, r4, r3
 ; CHECK-LE-NEXT:    sldi r3, r3, 3
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
@@ -2252,9 +2252,9 @@ define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
 ;
 ; CHECK-LE-LABEL: getvelul:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    clrldi r4, r5, 32
-; CHECK-LE-NEXT:    andc r3, r3, r4
+; CHECK-LE-NEXT:    clrldi r3, r5, 32
+; CHECK-LE-NEXT:    li r4, 1
+; CHECK-LE-NEXT:    andc r3, r4, r3
 ; CHECK-LE-NEXT:    sldi r3, r3, 3
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
@@ -2472,9 +2472,9 @@ define double @getveld(<2 x double> %vd, i32 signext %i) {
 ;
 ; CHECK-LE-LABEL: getveld:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    li r3, 1
-; CHECK-LE-NEXT:    clrldi r4, r5, 32
-; CHECK-LE-NEXT:    andc r3, r3, r4
+; CHECK-LE-NEXT:    clrldi r3, r5, 32
+; CHECK-LE-NEXT:    li r4, 1
+; CHECK-LE-NEXT:    andc r3, r4, r3
 ; CHECK-LE-NEXT:    sldi r3, r3, 3
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3

diff  --git a/llvm/test/CodeGen/PowerPC/peephole-align.ll b/llvm/test/CodeGen/PowerPC/peephole-align.ll
index 18a8da8281a020f..da7ada7211ac731 100644
--- a/llvm/test/CodeGen/PowerPC/peephole-align.ll
+++ b/llvm/test/CodeGen/PowerPC/peephole-align.ll
@@ -1,4 +1,4 @@
-; RUN: llc -relocation-model=static -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+; RUN: llc -relocation-model=static -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s -check-prefix=P7
 ; RUN: llc -relocation-model=static -verify-machineinstrs -mcpu=pwr8 -O1 -code-model=medium <%s | FileCheck %s
 
 ; Test peephole optimization for medium code model (32-bit TOC offsets)
@@ -208,13 +208,21 @@ entry:
 
 ; CHECK-LABEL: test_d2:
 ; CHECK: addis [[REGSTRUCT:[0-9]+]], 2, d2v at toc@ha
+; CHECK: ld [[REG0_0:[0-9]+]], d2v at toc@l([[REGSTRUCT]])
 ; CHECK: addi [[BASEV:[0-9]+]], [[REGSTRUCT]], d2v at toc@l
-; CHECK-DAG: ld [[REG0_0:[0-9]+]], d2v at toc@l([[REGSTRUCT]])
-; CHECK-DAG: ld [[REG1_0:[0-9]+]], 8([[BASEV]])
 ; CHECK-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; CHECK-DAG: ld [[REG1_0:[0-9]+]], 8([[BASEV]])
 ; CHECK-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
 ; CHECK-DAG: std [[REG0_1]], d2v at toc@l([[REGSTRUCT]])
 ; CHECK-DAG: std [[REG1_1]], 8([[BASEV]])
+; P7: addis [[REGSTRUCT:[0-9]+]], 2, d2v at toc@ha
+; P7: addi [[BASEV:[0-9]+]], [[REGSTRUCT]], d2v at toc@l
+; P7: ld [[REG0_0:[0-9]+]], d2v at toc@l([[REGSTRUCT]])
+; P7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; P7-DAG: ld [[REG1_0:[0-9]+]], 8([[BASEV]])
+; P7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; P7-DAG: std [[REG0_1]], d2v at toc@l([[REGSTRUCT]])
+; P7-DAG: std [[REG1_1]], 8([[BASEV]])
 
 define dso_local void @test_d2() nounwind {
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/pow-025-075-intrinsic-scalar-mass-fast.ll b/llvm/test/CodeGen/PowerPC/pow-025-075-intrinsic-scalar-mass-fast.ll
index b3efd523ebfe3f3..84efc1f1fa3a7b8 100644
--- a/llvm/test/CodeGen/PowerPC/pow-025-075-intrinsic-scalar-mass-fast.ll
+++ b/llvm/test/CodeGen/PowerPC/pow-025-075-intrinsic-scalar-mass-fast.ll
@@ -10,31 +10,31 @@ declare double @llvm.pow.f64 (double, double);
 define float @llvmintr_powf_f32_fast025(float %a) #1 {
 ; CHECK-LNX-LABEL: llvmintr_powf_f32_fast025:
 ; CHECK-LNX:       # %bb.0: # %entry
-; CHECK-LNX-NEXT:    xsrsqrtesp 0, 1
-; CHECK-LNX-NEXT:    vspltisw 2, -3
+; CHECK-LNX-NEXT:    xsrsqrtesp 2, 1
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; CHECK-LNX-NEXT:    lfs 4, .LCPI0_0 at toc@l(3)
+; CHECK-LNX-NEXT:    vspltisw 2, -3
+; CHECK-LNX-NEXT:    lfs 0, .LCPI0_0 at toc@l(3)
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
-; CHECK-LNX-NEXT:    lfs 5, .LCPI0_1 at toc@l(3)
-; CHECK-LNX-NEXT:    xvcvsxwdp 2, 34
-; CHECK-LNX-NEXT:    xsmulsp 3, 1, 0
+; CHECK-LNX-NEXT:    xxlxor 5, 5, 5
+; CHECK-LNX-NEXT:    xsmulsp 3, 1, 2
+; CHECK-LNX-NEXT:    xsabsdp 1, 1
+; CHECK-LNX-NEXT:    xsmulsp 4, 3, 0
+; CHECK-LNX-NEXT:    xsmulsp 2, 3, 2
+; CHECK-LNX-NEXT:    xvcvsxwdp 3, 34
+; CHECK-LNX-NEXT:    xsaddsp 2, 2, 3
+; CHECK-LNX-NEXT:    xsmulsp 2, 4, 2
+; CHECK-LNX-NEXT:    lfs 4, .LCPI0_1 at toc@l(3)
+; CHECK-LNX-NEXT:    xssubsp 1, 1, 4
+; CHECK-LNX-NEXT:    fsel 1, 1, 2, 5
+; CHECK-LNX-NEXT:    xsrsqrtesp 2, 1
+; CHECK-LNX-NEXT:    xsmulsp 6, 1, 2
 ; CHECK-LNX-NEXT:    xsabsdp 1, 1
-; CHECK-LNX-NEXT:    xsmulsp 0, 3, 0
-; CHECK-LNX-NEXT:    xsmulsp 3, 3, 4
-; CHECK-LNX-NEXT:    xssubsp 1, 1, 5
-; CHECK-LNX-NEXT:    xsaddsp 0, 0, 2
-; CHECK-LNX-NEXT:    xsmulsp 0, 3, 0
-; CHECK-LNX-NEXT:    xxlxor 3, 3, 3
-; CHECK-LNX-NEXT:    fsel 0, 1, 0, 3
-; CHECK-LNX-NEXT:    xsrsqrtesp 1, 0
-; CHECK-LNX-NEXT:    xsmulsp 6, 0, 1
-; CHECK-LNX-NEXT:    xsabsdp 0, 0
-; CHECK-LNX-NEXT:    xsmulsp 1, 6, 1
-; CHECK-LNX-NEXT:    xsmulsp 4, 6, 4
-; CHECK-LNX-NEXT:    xssubsp 0, 0, 5
-; CHECK-LNX-NEXT:    xsaddsp 1, 1, 2
-; CHECK-LNX-NEXT:    xsmulsp 1, 4, 1
-; CHECK-LNX-NEXT:    fsel 1, 0, 1, 3
+; CHECK-LNX-NEXT:    xsmulsp 2, 6, 2
+; CHECK-LNX-NEXT:    xsmulsp 0, 6, 0
+; CHECK-LNX-NEXT:    xssubsp 1, 1, 4
+; CHECK-LNX-NEXT:    xsaddsp 2, 2, 3
+; CHECK-LNX-NEXT:    xsmulsp 0, 0, 2
+; CHECK-LNX-NEXT:    fsel 1, 1, 0, 5
 ; CHECK-LNX-NEXT:    blr
 ;
 ; CHECK-AIX-LABEL: llvmintr_powf_f32_fast025:
@@ -60,19 +60,19 @@ entry:
 define double @llvmintr_pow_f64_fast025(double %a) #1 {
 ; CHECK-LNX-LABEL: llvmintr_pow_f64_fast025:
 ; CHECK-LNX:       # %bb.0: # %entry
-; CHECK-LNX-NEXT:    vspltisw 2, -3
 ; CHECK-LNX-NEXT:    xstsqrtdp 0, 1
+; CHECK-LNX-NEXT:    vspltisw 2, -3
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
-; CHECK-LNX-NEXT:    lfs 0, .LCPI1_0 at toc@l(3)
 ; CHECK-LNX-NEXT:    xvcvsxwdp 2, 34
+; CHECK-LNX-NEXT:    lfs 0, .LCPI1_0 at toc@l(3)
 ; CHECK-LNX-NEXT:    bc 12, 2, .LBB1_3
 ; CHECK-LNX-NEXT:  # %bb.1: # %entry
 ; CHECK-LNX-NEXT:    xsrsqrtedp 3, 1
-; CHECK-LNX-NEXT:    xsmuldp 4, 1, 3
-; CHECK-LNX-NEXT:    xsmuldp 4, 4, 3
-; CHECK-LNX-NEXT:    xsmuldp 3, 3, 0
-; CHECK-LNX-NEXT:    xsadddp 4, 4, 2
-; CHECK-LNX-NEXT:    xsmuldp 3, 3, 4
+; CHECK-LNX-NEXT:    xsmuldp 5, 1, 3
+; CHECK-LNX-NEXT:    xsmuldp 4, 3, 0
+; CHECK-LNX-NEXT:    xsmuldp 3, 5, 3
+; CHECK-LNX-NEXT:    xsadddp 3, 3, 2
+; CHECK-LNX-NEXT:    xsmuldp 3, 4, 3
 ; CHECK-LNX-NEXT:    xsmuldp 1, 1, 3
 ; CHECK-LNX-NEXT:    xsmuldp 3, 1, 3
 ; CHECK-LNX-NEXT:    xsmuldp 1, 1, 0
@@ -124,32 +124,32 @@ entry:
 define float @llvmintr_powf_f32_fast075(float %a) #1 {
 ; CHECK-LNX-LABEL: llvmintr_powf_f32_fast075:
 ; CHECK-LNX:       # %bb.0: # %entry
-; CHECK-LNX-NEXT:    xsrsqrtesp 0, 1
-; CHECK-LNX-NEXT:    vspltisw 2, -3
+; CHECK-LNX-NEXT:    xsrsqrtesp 2, 1
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
-; CHECK-LNX-NEXT:    lfs 4, .LCPI2_0 at toc@l(3)
+; CHECK-LNX-NEXT:    vspltisw 2, -3
+; CHECK-LNX-NEXT:    lfs 0, .LCPI2_0 at toc@l(3)
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
-; CHECK-LNX-NEXT:    lfs 5, .LCPI2_1 at toc@l(3)
-; CHECK-LNX-NEXT:    xvcvsxwdp 2, 34
-; CHECK-LNX-NEXT:    xsmulsp 3, 1, 0
+; CHECK-LNX-NEXT:    xxlxor 5, 5, 5
+; CHECK-LNX-NEXT:    xsmulsp 3, 1, 2
 ; CHECK-LNX-NEXT:    xsabsdp 1, 1
-; CHECK-LNX-NEXT:    xsmulsp 0, 3, 0
-; CHECK-LNX-NEXT:    xsmulsp 3, 3, 4
-; CHECK-LNX-NEXT:    xssubsp 1, 1, 5
-; CHECK-LNX-NEXT:    xsaddsp 0, 0, 2
-; CHECK-LNX-NEXT:    xsmulsp 0, 3, 0
-; CHECK-LNX-NEXT:    xxlxor 3, 3, 3
-; CHECK-LNX-NEXT:    fsel 0, 1, 0, 3
-; CHECK-LNX-NEXT:    xsrsqrtesp 1, 0
-; CHECK-LNX-NEXT:    xsmulsp 6, 0, 1
-; CHECK-LNX-NEXT:    xsmulsp 1, 6, 1
-; CHECK-LNX-NEXT:    xsmulsp 4, 6, 4
-; CHECK-LNX-NEXT:    xsaddsp 1, 1, 2
-; CHECK-LNX-NEXT:    xsabsdp 2, 0
-; CHECK-LNX-NEXT:    xsmulsp 1, 4, 1
-; CHECK-LNX-NEXT:    xssubsp 2, 2, 5
-; CHECK-LNX-NEXT:    fsel 1, 2, 1, 3
-; CHECK-LNX-NEXT:    xsmulsp 1, 0, 1
+; CHECK-LNX-NEXT:    xsmulsp 4, 3, 0
+; CHECK-LNX-NEXT:    xsmulsp 2, 3, 2
+; CHECK-LNX-NEXT:    xvcvsxwdp 3, 34
+; CHECK-LNX-NEXT:    xsaddsp 2, 2, 3
+; CHECK-LNX-NEXT:    xsmulsp 2, 4, 2
+; CHECK-LNX-NEXT:    lfs 4, .LCPI2_1 at toc@l(3)
+; CHECK-LNX-NEXT:    xssubsp 1, 1, 4
+; CHECK-LNX-NEXT:    fsel 1, 1, 2, 5
+; CHECK-LNX-NEXT:    xsrsqrtesp 2, 1
+; CHECK-LNX-NEXT:    xsmulsp 6, 1, 2
+; CHECK-LNX-NEXT:    xsmulsp 2, 6, 2
+; CHECK-LNX-NEXT:    xsmulsp 0, 6, 0
+; CHECK-LNX-NEXT:    xsaddsp 2, 2, 3
+; CHECK-LNX-NEXT:    xsmulsp 0, 0, 2
+; CHECK-LNX-NEXT:    xsabsdp 2, 1
+; CHECK-LNX-NEXT:    xssubsp 2, 2, 4
+; CHECK-LNX-NEXT:    fsel 0, 2, 0, 5
+; CHECK-LNX-NEXT:    xsmulsp 1, 1, 0
 ; CHECK-LNX-NEXT:    blr
 ;
 ; CHECK-AIX-LABEL: llvmintr_powf_f32_fast075:
@@ -175,19 +175,19 @@ entry:
 define double @llvmintr_pow_f64_fast075(double %a) #1 {
 ; CHECK-LNX-LABEL: llvmintr_pow_f64_fast075:
 ; CHECK-LNX:       # %bb.0: # %entry
-; CHECK-LNX-NEXT:    vspltisw 2, -3
 ; CHECK-LNX-NEXT:    xstsqrtdp 0, 1
+; CHECK-LNX-NEXT:    vspltisw 2, -3
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
-; CHECK-LNX-NEXT:    lfs 0, .LCPI3_0 at toc@l(3)
 ; CHECK-LNX-NEXT:    xvcvsxwdp 2, 34
+; CHECK-LNX-NEXT:    lfs 0, .LCPI3_0 at toc@l(3)
 ; CHECK-LNX-NEXT:    bc 12, 2, .LBB3_3
 ; CHECK-LNX-NEXT:  # %bb.1: # %entry
 ; CHECK-LNX-NEXT:    xsrsqrtedp 3, 1
-; CHECK-LNX-NEXT:    xsmuldp 4, 1, 3
-; CHECK-LNX-NEXT:    xsmuldp 4, 4, 3
-; CHECK-LNX-NEXT:    xsmuldp 3, 3, 0
-; CHECK-LNX-NEXT:    xsadddp 4, 4, 2
-; CHECK-LNX-NEXT:    xsmuldp 3, 3, 4
+; CHECK-LNX-NEXT:    xsmuldp 5, 1, 3
+; CHECK-LNX-NEXT:    xsmuldp 4, 3, 0
+; CHECK-LNX-NEXT:    xsmuldp 3, 5, 3
+; CHECK-LNX-NEXT:    xsadddp 3, 3, 2
+; CHECK-LNX-NEXT:    xsmuldp 3, 4, 3
 ; CHECK-LNX-NEXT:    xsmuldp 1, 1, 3
 ; CHECK-LNX-NEXT:    xsmuldp 3, 1, 3
 ; CHECK-LNX-NEXT:    xsmuldp 1, 1, 0

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
index 6e6e5326c05bc3e..0171e27e80901d2 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
@@ -10,15 +10,15 @@ define dso_local fastcc void @BuildVectorICE() unnamed_addr {
 ; 32BIT:       # %bb.0: # %entry
 ; 32BIT-NEXT:    stwu 1, -64(1)
 ; 32BIT-NEXT:    .cfi_def_cfa_offset 64
-; 32BIT-NEXT:    li 3, .LCPI0_0 at l
-; 32BIT-NEXT:    lis 4, .LCPI0_0 at ha
-; 32BIT-NEXT:    addi 5, 1, 16
-; 32BIT-NEXT:    addi 6, 1, 48
-; 32BIT-NEXT:    li 7, 0
+; 32BIT-NEXT:    li 4, .LCPI0_0 at l
+; 32BIT-NEXT:    lis 5, .LCPI0_0 at ha
 ; 32BIT-NEXT:    lxvw4x 34, 0, 3
-; 32BIT-NEXT:    lxvw4x 35, 4, 3
 ; 32BIT-NEXT:    li 3, 0
+; 32BIT-NEXT:    addi 6, 1, 48
+; 32BIT-NEXT:    li 7, 0
+; 32BIT-NEXT:    lxvw4x 35, 5, 4
 ; 32BIT-NEXT:    addi 4, 1, 32
+; 32BIT-NEXT:    addi 5, 1, 16
 ; 32BIT-NEXT:    .p2align 4
 ; 32BIT-NEXT:  .LBB0_1: # %while.body
 ; 32BIT-NEXT:    #
@@ -36,8 +36,8 @@ define dso_local fastcc void @BuildVectorICE() unnamed_addr {
 ;
 ; 64BIT-LABEL: BuildVectorICE:
 ; 64BIT:       # %bb.0: # %entry
-; 64BIT-NEXT:    li 3, 0
 ; 64BIT-NEXT:    lxvw4x 34, 0, 3
+; 64BIT-NEXT:    li 3, 0
 ; 64BIT-NEXT:    rldimi 3, 3, 32, 0
 ; 64BIT-NEXT:    mtfprd 0, 3
 ; 64BIT-NEXT:    li 3, 0

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-clear-before-return.ll b/llvm/test/CodeGen/PowerPC/ppc-clear-before-return.ll
index f77bd692cc7f0f5..5e808708fdab8ce 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-clear-before-return.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-clear-before-return.ll
@@ -19,11 +19,10 @@ define dso_local i64 @test_xor(ptr nocapture noundef readonly %inp) local_unname
 ;
 ; 32BIT-LABEL: test_xor:
 ; 32BIT:       # %bb.0: # %entry
-; 32BIT-NEXT:    mr r4, r3
+; 32BIT-NEXT:    lbz r4, 0(r3)
+; 32BIT-NEXT:    lbz r3, 1(r3)
+; 32BIT-NEXT:    xor r4, r3, r4
 ; 32BIT-NEXT:    li r3, 0
-; 32BIT-NEXT:    lbz r5, 0(r4)
-; 32BIT-NEXT:    lbz r4, 1(r4)
-; 32BIT-NEXT:    xor r4, r4, r5
 ; 32BIT-NEXT:    blr
 entry:
   %0 = load i8, ptr %inp, align 1
@@ -46,13 +45,12 @@ define dso_local i64 @test_xor2(ptr nocapture noundef readonly %inp) local_unnam
 ;
 ; 32BIT-LABEL: test_xor2:
 ; 32BIT:       # %bb.0: # %entry
-; 32BIT-NEXT:    mr r4, r3
-; 32BIT-NEXT:    li r3, 0
-; 32BIT-NEXT:    lbz r5, 0(r4)
-; 32BIT-NEXT:    lbz r6, 1(r4)
-; 32BIT-NEXT:    lbz r4, 2(r4)
-; 32BIT-NEXT:    xor r5, r6, r5
+; 32BIT-NEXT:    lbz r4, 0(r3)
+; 32BIT-NEXT:    lbz r5, 1(r3)
+; 32BIT-NEXT:    lbz r3, 2(r3)
 ; 32BIT-NEXT:    xor r4, r5, r4
+; 32BIT-NEXT:    xor r4, r4, r3
+; 32BIT-NEXT:    li r3, 0
 ; 32BIT-NEXT:    blr
 entry:
   %0 = load i8, ptr %inp, align 1
@@ -76,11 +74,10 @@ define dso_local i64 @test_or(ptr nocapture noundef readonly %inp) local_unnamed
 ;
 ; 32BIT-LABEL: test_or:
 ; 32BIT:       # %bb.0: # %entry
-; 32BIT-NEXT:    mr r4, r3
+; 32BIT-NEXT:    lbz r4, 0(r3)
+; 32BIT-NEXT:    lbz r3, 1(r3)
+; 32BIT-NEXT:    or r4, r3, r4
 ; 32BIT-NEXT:    li r3, 0
-; 32BIT-NEXT:    lbz r5, 0(r4)
-; 32BIT-NEXT:    lbz r4, 1(r4)
-; 32BIT-NEXT:    or r4, r4, r5
 ; 32BIT-NEXT:    blr
 entry:
   %0 = load i8, ptr %inp, align 1
@@ -103,13 +100,12 @@ define dso_local i64 @test_or2(ptr nocapture noundef readonly %inp) local_unname
 ;
 ; 32BIT-LABEL: test_or2:
 ; 32BIT:       # %bb.0: # %entry
-; 32BIT-NEXT:    mr r4, r3
-; 32BIT-NEXT:    li r3, 0
-; 32BIT-NEXT:    lbz r5, 0(r4)
-; 32BIT-NEXT:    lbz r6, 1(r4)
-; 32BIT-NEXT:    lbz r4, 2(r4)
-; 32BIT-NEXT:    or r5, r6, r5
+; 32BIT-NEXT:    lbz r4, 0(r3)
+; 32BIT-NEXT:    lbz r5, 1(r3)
+; 32BIT-NEXT:    lbz r3, 2(r3)
 ; 32BIT-NEXT:    or r4, r5, r4
+; 32BIT-NEXT:    or r4, r4, r3
+; 32BIT-NEXT:    li r3, 0
 ; 32BIT-NEXT:    blr
 entry:
   %0 = load i8, ptr %inp, align 1
@@ -133,11 +129,10 @@ define dso_local i64 @test_and(ptr nocapture noundef readonly %inp) local_unname
 ;
 ; 32BIT-LABEL: test_and:
 ; 32BIT:       # %bb.0: # %entry
-; 32BIT-NEXT:    mr r4, r3
+; 32BIT-NEXT:    lbz r4, 0(r3)
+; 32BIT-NEXT:    lbz r3, 1(r3)
+; 32BIT-NEXT:    and r4, r3, r4
 ; 32BIT-NEXT:    li r3, 0
-; 32BIT-NEXT:    lbz r5, 0(r4)
-; 32BIT-NEXT:    lbz r4, 1(r4)
-; 32BIT-NEXT:    and r4, r4, r5
 ; 32BIT-NEXT:    blr
 entry:
   %0 = load i8, ptr %inp, align 1
@@ -160,13 +155,12 @@ define dso_local i64 @test_and2(ptr nocapture noundef readonly %inp) local_unnam
 ;
 ; 32BIT-LABEL: test_and2:
 ; 32BIT:       # %bb.0: # %entry
-; 32BIT-NEXT:    mr r4, r3
-; 32BIT-NEXT:    li r3, 0
-; 32BIT-NEXT:    lbz r5, 0(r4)
-; 32BIT-NEXT:    lbz r6, 1(r4)
-; 32BIT-NEXT:    lbz r4, 2(r4)
-; 32BIT-NEXT:    and r5, r6, r5
+; 32BIT-NEXT:    lbz r4, 0(r3)
+; 32BIT-NEXT:    lbz r5, 1(r3)
+; 32BIT-NEXT:    lbz r3, 2(r3)
 ; 32BIT-NEXT:    and r4, r5, r4
+; 32BIT-NEXT:    and r4, r4, r3
+; 32BIT-NEXT:    li r3, 0
 ; 32BIT-NEXT:    blr
 entry:
   %0 = load i8, ptr %inp, align 1
@@ -185,24 +179,23 @@ define dso_local i64 @test_mixed(ptr nocapture noundef readonly %inp) local_unna
 ; 64BIT:       # %bb.0: # %entry
 ; 64BIT-NEXT:    lbz r4, 0(r3)
 ; 64BIT-NEXT:    lbz r5, 1(r3)
-; 64BIT-NEXT:    lbz r6, 2(r3)
-; 64BIT-NEXT:    lbz r3, 3(r3)
 ; 64BIT-NEXT:    and r4, r5, r4
-; 64BIT-NEXT:    xor r4, r4, r6
+; 64BIT-NEXT:    lbz r5, 2(r3)
+; 64BIT-NEXT:    lbz r3, 3(r3)
+; 64BIT-NEXT:    xor r4, r4, r5
 ; 64BIT-NEXT:    or r3, r4, r3
 ; 64BIT-NEXT:    blr
 ;
 ; 32BIT-LABEL: test_mixed:
 ; 32BIT:       # %bb.0: # %entry
-; 32BIT-NEXT:    mr r4, r3
+; 32BIT-NEXT:    lbz r4, 0(r3)
+; 32BIT-NEXT:    lbz r5, 1(r3)
+; 32BIT-NEXT:    and r4, r5, r4
+; 32BIT-NEXT:    lbz r5, 2(r3)
+; 32BIT-NEXT:    lbz r3, 3(r3)
+; 32BIT-NEXT:    xor r4, r4, r5
+; 32BIT-NEXT:    or r4, r4, r3
 ; 32BIT-NEXT:    li r3, 0
-; 32BIT-NEXT:    lbz r5, 0(r4)
-; 32BIT-NEXT:    lbz r6, 1(r4)
-; 32BIT-NEXT:    lbz r7, 2(r4)
-; 32BIT-NEXT:    lbz r4, 3(r4)
-; 32BIT-NEXT:    and r5, r6, r5
-; 32BIT-NEXT:    xor r5, r5, r7
-; 32BIT-NEXT:    or r4, r5, r4
 ; 32BIT-NEXT:    blr
 entry:
   %0 = load i8, ptr %inp, align 1
@@ -268,9 +261,8 @@ define dso_local i64 @test_load(ptr nocapture noundef readonly %inp) local_unnam
 ;
 ; 32BIT-LABEL: test_load:
 ; 32BIT:       # %bb.0: # %entry
-; 32BIT-NEXT:    mr r4, r3
+; 32BIT-NEXT:    lbz r4, 0(r3)
 ; 32BIT-NEXT:    li r3, 0
-; 32BIT-NEXT:    lbz r4, 0(r4)
 ; 32BIT-NEXT:    blr
 entry:
   %0 = load i8, ptr %inp, align 1
@@ -288,11 +280,10 @@ define dso_local i64 @test_and32(ptr nocapture noundef readonly %inp) local_unna
 ;
 ; 32BIT-LABEL: test_and32:
 ; 32BIT:       # %bb.0: # %entry
-; 32BIT-NEXT:    mr r4, r3
+; 32BIT-NEXT:    lwz r4, 0(r3)
+; 32BIT-NEXT:    lwz r3, 4(r3)
+; 32BIT-NEXT:    and r4, r3, r4
 ; 32BIT-NEXT:    li r3, 0
-; 32BIT-NEXT:    lwz r5, 0(r4)
-; 32BIT-NEXT:    lwz r4, 4(r4)
-; 32BIT-NEXT:    and r4, r4, r5
 ; 32BIT-NEXT:    blr
 entry:
   %0 = load i32, ptr %inp, align 4

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
index f3a568a6de44c59..bb8337d237f51bc 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll
@@ -6,51 +6,28 @@
 
 ; Function Attrs: norecurse nounwind readonly
 define signext i32 @limit_loop(i32 signext %iters, ptr nocapture readonly %vec, i32 signext %limit) local_unnamed_addr {
-; V01-LABEL: limit_loop:
-; V01:       # %bb.0: # %entry
-; V01-NEXT:    mr 6, 3
-; V01-NEXT:    li 3, 0
-; V01-NEXT:    cmpwi 6, 0
-; V01-NEXT:    blelr 0
-; V01-NEXT:  # %bb.1: # %for.body.preheader
-; V01-NEXT:    mtctr 6
-; V01-NEXT:    addi 4, 4, -4
-; V01-NEXT:    b .LBB0_3
-; V01-NEXT:    .p2align 4
-; V01-NEXT:  .LBB0_2: # %for.cond
-; V01-NEXT:    #
-; V01-NEXT:    bdzlr
-; V01-NEXT:  .LBB0_3: # %for.body
-; V01-NEXT:    #
-; V01-NEXT:    lwzu 6, 4(4)
-; V01-NEXT:    cmpw 6, 5
-; V01-NEXT:    blt 0, .LBB0_2
-; V01-NEXT:  # %bb.4:
-; V01-NEXT:    li 3, 1
-; V01-NEXT:    blr
-;
-; V23-LABEL: limit_loop:
-; V23:       # %bb.0: # %entry
-; V23-NEXT:    mr 6, 3
-; V23-NEXT:    li 3, 0
-; V23-NEXT:    cmpwi 6, 0
-; V23-NEXT:    blelr 0
-; V23-NEXT:  # %bb.1: # %for.body.preheader
-; V23-NEXT:    addi 4, 4, -4
-; V23-NEXT:    mtctr 6
-; V23-NEXT:    b .LBB0_3
-; V23-NEXT:    .p2align 4
-; V23-NEXT:  .LBB0_2: # %for.cond
-; V23-NEXT:    #
-; V23-NEXT:    bdzlr
-; V23-NEXT:  .LBB0_3: # %for.body
-; V23-NEXT:    #
-; V23-NEXT:    lwzu 6, 4(4)
-; V23-NEXT:    cmpw 6, 5
-; V23-NEXT:    blt 0, .LBB0_2
-; V23-NEXT:  # %bb.4:
-; V23-NEXT:    li 3, 1
-; V23-NEXT:    blr
+; CHECK-LABEL: limit_loop:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mr 6, 3
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    cmpwi 6, 0
+; CHECK-NEXT:    blelr 0
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    mtctr 6
+; CHECK-NEXT:    addi 4, 4, -4
+; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_2: # %for.cond
+; CHECK-NEXT:    #
+; CHECK-NEXT:    bdzlr
+; CHECK-NEXT:  .LBB0_3: # %for.body
+; CHECK-NEXT:    #
+; CHECK-NEXT:    lwzu 6, 4(4)
+; CHECK-NEXT:    cmpw 6, 5
+; CHECK-NEXT:    blt 0, .LBB0_2
+; CHECK-NEXT:  # %bb.4:
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    blr
 entry:
   %cmp5 = icmp sgt i32 %iters, 0
   br i1 %cmp5, label %for.body.preheader, label %cleanup
@@ -78,8 +55,9 @@ cleanup:                                          ; preds = %for.body, %for.cond
 
 
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
 ; CHECK-V0: {{.*}}
 ; CHECK-V1: {{.*}}
 ; CHECK-V2: {{.*}}
 ; CHECK-V3: {{.*}}
+; V01: {{.*}}
+; V23: {{.*}}

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll b/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
index 17f04b9587d7c37..f9d2c259a4c197b 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-rotate-clear.ll
@@ -11,11 +11,11 @@
 define dso_local i64 @rotatemask32(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX32-LABEL: rotatemask32:
 ; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    cntlzw r5, r4
+; AIX32-NEXT:    cntlzw r5, r3
 ; AIX32-NEXT:    cmplwi r3, 0
-; AIX32-NEXT:    cntlzw r3, r3
-; AIX32-NEXT:    addi r5, r5, 32
-; AIX32-NEXT:    iseleq r3, r5, r3
+; AIX32-NEXT:    cntlzw r3, r4
+; AIX32-NEXT:    addi r3, r3, 32
+; AIX32-NEXT:    iseleq r3, r3, r5
 ; AIX32-NEXT:    rlwnm r4, r4, r3, 1, 31
 ; AIX32-NEXT:    li r3, 0
 ; AIX32-NEXT:    blr
@@ -53,23 +53,23 @@ declare i32 @llvm.fshl.i32(i32, i32, i32) #0
 define dso_local i64 @rotatemask64(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX32-LABEL: rotatemask64:
 ; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    cntlzw r5, r4
-; AIX32-NEXT:    cntlzw r6, r3
 ; AIX32-NEXT:    cmplwi r3, 0
-; AIX32-NEXT:    addi r5, r5, 32
-; AIX32-NEXT:    iseleq r5, r5, r6
+; AIX32-NEXT:    cntlzw r6, r4
+; AIX32-NEXT:    addi r6, r6, 32
+; AIX32-NEXT:    cntlzw r5, r3
+; AIX32-NEXT:    iseleq r5, r6, r5
 ; AIX32-NEXT:    andi. r6, r5, 32
 ; AIX32-NEXT:    clrlwi r5, r5, 27
 ; AIX32-NEXT:    iseleq r6, r3, r4
-; AIX32-NEXT:    subfic r7, r5, 32
 ; AIX32-NEXT:    iseleq r3, r4, r3
-; AIX32-NEXT:    slw r8, r6, r5
-; AIX32-NEXT:    srw r6, r6, r7
-; AIX32-NEXT:    srw r4, r3, r7
-; AIX32-NEXT:    slw r3, r3, r5
-; AIX32-NEXT:    or r5, r8, r4
-; AIX32-NEXT:    or r4, r3, r6
-; AIX32-NEXT:    clrlwi r3, r5, 1
+; AIX32-NEXT:    subfic r7, r5, 32
+; AIX32-NEXT:    slw r4, r3, r5
+; AIX32-NEXT:    srw r3, r3, r7
+; AIX32-NEXT:    slw r5, r6, r5
+; AIX32-NEXT:    srw r8, r6, r7
+; AIX32-NEXT:    or r3, r5, r3
+; AIX32-NEXT:    or r4, r4, r8
+; AIX32-NEXT:    clrlwi r3, r3, 1
 ; AIX32-NEXT:    blr
 ;
 ; AIX64-LABEL: rotatemask64:
@@ -101,23 +101,23 @@ declare i64 @llvm.fshl.i64(i64, i64, i64) #1
 define dso_local i64 @rotatemask64_2(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX32-LABEL: rotatemask64_2:
 ; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    cntlzw r5, r4
-; AIX32-NEXT:    cntlzw r6, r3
 ; AIX32-NEXT:    cmplwi r3, 0
-; AIX32-NEXT:    addi r5, r5, 32
-; AIX32-NEXT:    iseleq r5, r5, r6
+; AIX32-NEXT:    cntlzw r6, r4
+; AIX32-NEXT:    addi r6, r6, 32
+; AIX32-NEXT:    cntlzw r5, r3
+; AIX32-NEXT:    iseleq r5, r6, r5
 ; AIX32-NEXT:    andi. r6, r5, 32
 ; AIX32-NEXT:    clrlwi r5, r5, 27
 ; AIX32-NEXT:    iseleq r6, r3, r4
-; AIX32-NEXT:    subfic r7, r5, 32
 ; AIX32-NEXT:    iseleq r3, r4, r3
-; AIX32-NEXT:    slw r8, r6, r5
-; AIX32-NEXT:    srw r6, r6, r7
-; AIX32-NEXT:    srw r4, r3, r7
-; AIX32-NEXT:    slw r3, r3, r5
-; AIX32-NEXT:    or r5, r8, r4
-; AIX32-NEXT:    or r4, r3, r6
-; AIX32-NEXT:    clrlwi r3, r5, 1
+; AIX32-NEXT:    subfic r7, r5, 32
+; AIX32-NEXT:    slw r4, r3, r5
+; AIX32-NEXT:    srw r3, r3, r7
+; AIX32-NEXT:    slw r5, r6, r5
+; AIX32-NEXT:    srw r8, r6, r7
+; AIX32-NEXT:    or r3, r5, r3
+; AIX32-NEXT:    or r4, r4, r8
+; AIX32-NEXT:    clrlwi r3, r3, 1
 ; AIX32-NEXT:    blr
 ;
 ; AIX64-LABEL: rotatemask64_2:
@@ -147,21 +147,21 @@ entry:
 define dso_local i64 @rotatemask64_3(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX32-LABEL: rotatemask64_3:
 ; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    cntlzw r5, r4
-; AIX32-NEXT:    cntlzw r6, r3
 ; AIX32-NEXT:    cmplwi r3, 0
-; AIX32-NEXT:    addi r5, r5, 32
-; AIX32-NEXT:    iseleq r5, r5, r6
+; AIX32-NEXT:    cntlzw r6, r4
+; AIX32-NEXT:    addi r6, r6, 32
+; AIX32-NEXT:    cntlzw r5, r3
+; AIX32-NEXT:    iseleq r5, r6, r5
 ; AIX32-NEXT:    andi. r6, r5, 32
 ; AIX32-NEXT:    clrlwi r5, r5, 27
 ; AIX32-NEXT:    iseleq r6, r3, r4
-; AIX32-NEXT:    subfic r7, r5, 32
 ; AIX32-NEXT:    iseleq r3, r4, r3
-; AIX32-NEXT:    srw r4, r6, r7
-; AIX32-NEXT:    slw r8, r3, r5
+; AIX32-NEXT:    subfic r7, r5, 32
+; AIX32-NEXT:    srw r8, r6, r7
+; AIX32-NEXT:    slw r4, r3, r5
 ; AIX32-NEXT:    srw r3, r3, r7
 ; AIX32-NEXT:    slw r5, r6, r5
-; AIX32-NEXT:    or r4, r8, r4
+; AIX32-NEXT:    or r4, r4, r8
 ; AIX32-NEXT:    or r3, r5, r3
 ; AIX32-NEXT:    clrlwi r3, r3, 1
 ; AIX32-NEXT:    rlwinm r4, r4, 0, 0, 23
@@ -201,17 +201,17 @@ define dso_local i64 @rotatemask64_nocount(i64 noundef %word, i64 noundef %clz)
 ; AIX32-LABEL: rotatemask64_nocount:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    andi. r5, r6, 32
-; AIX32-NEXT:    clrlwi r5, r6, 27
-; AIX32-NEXT:    iseleq r6, r3, r4
-; AIX32-NEXT:    subfic r7, r5, 32
+; AIX32-NEXT:    clrlwi r6, r6, 27
+; AIX32-NEXT:    subfic r7, r6, 32
+; AIX32-NEXT:    iseleq r5, r3, r4
 ; AIX32-NEXT:    iseleq r3, r4, r3
-; AIX32-NEXT:    slw r8, r6, r5
-; AIX32-NEXT:    srw r4, r3, r7
-; AIX32-NEXT:    srw r6, r6, r7
-; AIX32-NEXT:    slw r3, r3, r5
-; AIX32-NEXT:    or r5, r8, r4
-; AIX32-NEXT:    or r4, r3, r6
-; AIX32-NEXT:    clrlwi r3, r5, 8
+; AIX32-NEXT:    srw r8, r5, r7
+; AIX32-NEXT:    slw r4, r3, r6
+; AIX32-NEXT:    srw r3, r3, r7
+; AIX32-NEXT:    slw r5, r5, r6
+; AIX32-NEXT:    or r3, r5, r3
+; AIX32-NEXT:    or r4, r4, r8
+; AIX32-NEXT:    clrlwi r3, r3, 8
 ; AIX32-NEXT:    blr
 ;
 ; AIX64-LABEL: rotatemask64_nocount:
@@ -238,17 +238,17 @@ define dso_local i64 @builtincheck(i64 noundef %word, i64 noundef %shift) local_
 ; AIX32-LABEL: builtincheck:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    andi. r5, r6, 32
-; AIX32-NEXT:    clrlwi r5, r6, 27
-; AIX32-NEXT:    iseleq r6, r3, r4
-; AIX32-NEXT:    subfic r7, r5, 32
+; AIX32-NEXT:    clrlwi r6, r6, 27
+; AIX32-NEXT:    subfic r7, r6, 32
+; AIX32-NEXT:    iseleq r5, r3, r4
 ; AIX32-NEXT:    iseleq r3, r4, r3
-; AIX32-NEXT:    slw r8, r6, r5
-; AIX32-NEXT:    srw r4, r3, r7
-; AIX32-NEXT:    srw r6, r6, r7
-; AIX32-NEXT:    slw r3, r3, r5
-; AIX32-NEXT:    or r5, r8, r4
-; AIX32-NEXT:    or r4, r3, r6
-; AIX32-NEXT:    clrlwi r3, r5, 1
+; AIX32-NEXT:    srw r8, r5, r7
+; AIX32-NEXT:    slw r4, r3, r6
+; AIX32-NEXT:    srw r3, r3, r7
+; AIX32-NEXT:    slw r5, r5, r6
+; AIX32-NEXT:    or r3, r5, r3
+; AIX32-NEXT:    or r4, r4, r8
+; AIX32-NEXT:    clrlwi r3, r3, 1
 ; AIX32-NEXT:    blr
 ;
 ; AIX64-LABEL: builtincheck:
@@ -275,10 +275,10 @@ define dso_local i64 @immshift(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX32-LABEL: immshift:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    rotlwi r5, r3, 15
-; AIX32-NEXT:    srwi r6, r4, 17
 ; AIX32-NEXT:    rlwimi r5, r4, 15, 0, 16
-; AIX32-NEXT:    rlwimi r6, r3, 15, 12, 16
-; AIX32-NEXT:    mr r3, r6
+; AIX32-NEXT:    srwi r4, r4, 17
+; AIX32-NEXT:    rlwimi r4, r3, 15, 12, 16
+; AIX32-NEXT:    mr r3, r4
 ; AIX32-NEXT:    mr r4, r5
 ; AIX32-NEXT:    blr
 ;
@@ -307,23 +307,23 @@ define dso_local i64 @twomasks(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr r0
 ; AIX32-NEXT:    stwu r1, -64(r1)
-; AIX32-NEXT:    cntlzw r5, r4
-; AIX32-NEXT:    cntlzw r6, r3
-; AIX32-NEXT:    stw r0, 72(r1)
 ; AIX32-NEXT:    cmplwi r3, 0
-; AIX32-NEXT:    addi r5, r5, 32
-; AIX32-NEXT:    iseleq r5, r5, r6
+; AIX32-NEXT:    cntlzw r6, r4
+; AIX32-NEXT:    stw r0, 72(r1)
+; AIX32-NEXT:    addi r6, r6, 32
+; AIX32-NEXT:    cntlzw r5, r3
+; AIX32-NEXT:    iseleq r5, r6, r5
 ; AIX32-NEXT:    andi. r6, r5, 32
 ; AIX32-NEXT:    clrlwi r5, r5, 27
 ; AIX32-NEXT:    iseleq r6, r3, r4
-; AIX32-NEXT:    subfic r7, r5, 32
 ; AIX32-NEXT:    iseleq r3, r4, r3
-; AIX32-NEXT:    slw r8, r6, r5
-; AIX32-NEXT:    srw r6, r6, r7
-; AIX32-NEXT:    srw r4, r3, r7
-; AIX32-NEXT:    slw r3, r3, r5
-; AIX32-NEXT:    or r5, r8, r4
-; AIX32-NEXT:    or r4, r3, r6
+; AIX32-NEXT:    subfic r7, r5, 32
+; AIX32-NEXT:    srw r8, r6, r7
+; AIX32-NEXT:    slw r4, r3, r5
+; AIX32-NEXT:    srw r3, r3, r7
+; AIX32-NEXT:    slw r5, r6, r5
+; AIX32-NEXT:    or r4, r4, r8
+; AIX32-NEXT:    or r5, r5, r3
 ; AIX32-NEXT:    clrlwi r3, r5, 1
 ; AIX32-NEXT:    clrlwi r5, r5, 16
 ; AIX32-NEXT:    mr r6, r4
@@ -397,27 +397,28 @@ define dso_local i64 @tworotates(i64 noundef %word) local_unnamed_addr #0 {
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr r0
 ; AIX32-NEXT:    stwu r1, -64(r1)
-; AIX32-NEXT:    cntlzw r5, r4
-; AIX32-NEXT:    cntlzw r6, r3
-; AIX32-NEXT:    stw r0, 72(r1)
 ; AIX32-NEXT:    cmplwi r3, 0
-; AIX32-NEXT:    addi r5, r5, 32
-; AIX32-NEXT:    iseleq r5, r5, r6
+; AIX32-NEXT:    cntlzw r6, r4
+; AIX32-NEXT:    stw r0, 72(r1)
+; AIX32-NEXT:    addi r6, r6, 32
+; AIX32-NEXT:    cntlzw r5, r3
+; AIX32-NEXT:    iseleq r5, r6, r5
 ; AIX32-NEXT:    andi. r6, r5, 32
 ; AIX32-NEXT:    clrlwi r5, r5, 27
-; AIX32-NEXT:    iseleq r7, r3, r4
-; AIX32-NEXT:    subfic r8, r5, 32
-; AIX32-NEXT:    rotlwi r6, r3, 23
+; AIX32-NEXT:    iseleq r6, r3, r4
 ; AIX32-NEXT:    iseleq r9, r4, r3
-; AIX32-NEXT:    slw r11, r7, r5
-; AIX32-NEXT:    srw r7, r7, r8
-; AIX32-NEXT:    srw r10, r9, r8
-; AIX32-NEXT:    slw r8, r9, r5
-; AIX32-NEXT:    srwi r5, r4, 9
-; AIX32-NEXT:    or r9, r11, r10
+; AIX32-NEXT:    subfic r7, r5, 32
+; AIX32-NEXT:    srw r8, r6, r7
+; AIX32-NEXT:    slw r10, r9, r5
+; AIX32-NEXT:    srw r7, r9, r7
+; AIX32-NEXT:    slw r5, r6, r5
+; AIX32-NEXT:    rotlwi r6, r3, 23
+; AIX32-NEXT:    or r5, r5, r7
+; AIX32-NEXT:    or r8, r10, r8
 ; AIX32-NEXT:    rlwimi r6, r4, 23, 0, 8
-; AIX32-NEXT:    or r4, r8, r7
-; AIX32-NEXT:    clrlwi r7, r9, 1
+; AIX32-NEXT:    clrlwi r7, r5, 1
+; AIX32-NEXT:    srwi r5, r4, 9
+; AIX32-NEXT:    mr r4, r8
 ; AIX32-NEXT:    rlwimi r5, r3, 23, 1, 8
 ; AIX32-NEXT:    mr r3, r7
 ; AIX32-NEXT:    bl .callee[PR]

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
index bfa68f27f306a4a..f22aeffdbb466a1 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
@@ -506,11 +506,11 @@ if.end:                                           ; preds = %for.body, %if.else
 ;
 ; CHECK-32: mr 3, 4
 ; CHECK-32-NEXT: mr 5, 4
-; CHECK-32-NEXT: mr 6, 4
+; ENABLE-32-NEXT: stw 0, 72(1)
+; CHECK-32: mr 6, 4
 ; CHECK-32-NEXT: mr 7, 4
 ; CHECK-32-NEXT: mr 8, 4
 ; CHECK-32-NEXT: mr 9, 4
-; ENABLE-32-NEXT: stw 0, 72(1)
 ;
 ; CHECK-NEXT: bl {{.*}}someVariadicFunc
 ; CHECK: slwi 3, 3, 3

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
index 533439ff8af8a49..a2a5c6c5eafb7fc 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
@@ -17,12 +17,12 @@ define i64 @setb1(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb1:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r3, r4
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    addic r7, r6, -1
+; CHECK-PWR8-NEXT:    xor r5, r3, r4
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    subfe r6, r7, r6
-; CHECK-PWR8-NEXT:    isellt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    addic r6, r5, -1
+; CHECK-PWR8-NEXT:    subfe r5, r6, r5
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %a, %b
   %t2 = icmp ne i64 %a, %b
@@ -41,12 +41,12 @@ define i64 @setb2(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb2:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r3, r4
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    addic r7, r6, -1
+; CHECK-PWR8-NEXT:    xor r5, r3, r4
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
-; CHECK-PWR8-NEXT:    subfe r6, r7, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    addic r6, r5, -1
+; CHECK-PWR8-NEXT:    subfe r5, r6, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i64 %b, %a
   %t2 = icmp ne i64 %a, %b
@@ -65,12 +65,12 @@ define i64 @setb3(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb3:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    addic r7, r6, -1
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    subfe r6, r7, r6
-; CHECK-PWR8-NEXT:    isellt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    addic r6, r5, -1
+; CHECK-PWR8-NEXT:    subfe r5, r6, r5
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %a, %b
   %t2 = icmp ne i64 %b, %a
@@ -89,12 +89,12 @@ define i64 @setb4(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb4:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    addic r7, r6, -1
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
-; CHECK-PWR8-NEXT:    subfe r6, r7, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    addic r6, r5, -1
+; CHECK-PWR8-NEXT:    subfe r5, r6, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i64 %b, %a
   %t2 = icmp ne i64 %b, %a
@@ -113,14 +113,14 @@ define i64 @setb5(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb5:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    sradi r6, r4, 63
-; CHECK-PWR8-NEXT:    rldicl r7, r3, 1, 63
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    subc r8, r4, r3
+; CHECK-PWR8-NEXT:    sradi r5, r4, 63
+; CHECK-PWR8-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-PWR8-NEXT:    subc r7, r4, r3
+; CHECK-PWR8-NEXT:    adde r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    adde r6, r7, r6
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    isellt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %a, %b
   %t2 = icmp sgt i64 %a, %b
@@ -139,14 +139,14 @@ define i64 @setb6(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb6:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    sradi r6, r4, 63
-; CHECK-PWR8-NEXT:    rldicl r7, r3, 1, 63
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    subc r8, r4, r3
+; CHECK-PWR8-NEXT:    sradi r5, r4, 63
+; CHECK-PWR8-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-PWR8-NEXT:    subc r7, r4, r3
+; CHECK-PWR8-NEXT:    adde r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
-; CHECK-PWR8-NEXT:    adde r6, r7, r6
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i64 %b, %a
   %t2 = icmp sgt i64 %a, %b
@@ -165,14 +165,14 @@ define i64 @setb7(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb7:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    sradi r6, r4, 63
-; CHECK-PWR8-NEXT:    rldicl r7, r3, 1, 63
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    subc r8, r4, r3
+; CHECK-PWR8-NEXT:    sradi r5, r4, 63
+; CHECK-PWR8-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-PWR8-NEXT:    subc r7, r4, r3
+; CHECK-PWR8-NEXT:    adde r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    adde r6, r7, r6
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    isellt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %a, %b
   %t2 = icmp slt i64 %b, %a
@@ -191,14 +191,14 @@ define i64 @setb8(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb8:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    sradi r6, r4, 63
-; CHECK-PWR8-NEXT:    rldicl r7, r3, 1, 63
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    subc r8, r4, r3
+; CHECK-PWR8-NEXT:    sradi r5, r4, 63
+; CHECK-PWR8-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-PWR8-NEXT:    subc r7, r4, r3
+; CHECK-PWR8-NEXT:    adde r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
-; CHECK-PWR8-NEXT:    adde r6, r7, r6
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i64 %b, %a
   %t2 = icmp slt i64 %b, %a
@@ -217,12 +217,12 @@ define i64 @setb9(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb9:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r3, r4
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    subfic r6, r6, 0
+; CHECK-PWR8-NEXT:    xor r5, r3, r4
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    subfe r3, r6, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    subfic r5, r5, 0
+; CHECK-PWR8-NEXT:    subfe r5, r5, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i64 %a, %b
   %t2 = icmp ne i64 %a, %b
@@ -241,12 +241,12 @@ define i64 @setb10(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb10:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r3, r4
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    subfic r6, r6, 0
+; CHECK-PWR8-NEXT:    xor r5, r3, r4
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
-; CHECK-PWR8-NEXT:    subfe r3, r6, r6
-; CHECK-PWR8-NEXT:    isellt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    subfic r5, r5, 0
+; CHECK-PWR8-NEXT:    subfe r5, r5, r5
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %b, %a
   %t2 = icmp ne i64 %a, %b
@@ -265,12 +265,12 @@ define i64 @setb11(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb11:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    subfic r6, r6, 0
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    subfe r3, r6, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    subfic r5, r5, 0
+; CHECK-PWR8-NEXT:    subfe r5, r5, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i64 %a, %b
   %t2 = icmp ne i64 %b, %a
@@ -289,12 +289,12 @@ define i64 @setb12(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb12:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    subfic r6, r6, 0
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
-; CHECK-PWR8-NEXT:    subfe r3, r6, r6
-; CHECK-PWR8-NEXT:    isellt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    subfic r5, r5, 0
+; CHECK-PWR8-NEXT:    subfe r5, r5, r5
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %b, %a
   %t2 = icmp ne i64 %b, %a
@@ -313,15 +313,15 @@ define i64 @setb13(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb13:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    sradi r6, r3, 63
-; CHECK-PWR8-NEXT:    rldicl r7, r4, 1, 63
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    subc r8, r3, r4
+; CHECK-PWR8-NEXT:    sradi r5, r3, 63
+; CHECK-PWR8-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-PWR8-NEXT:    subc r7, r3, r4
+; CHECK-PWR8-NEXT:    adde r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    adde r6, r7, r6
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    neg r6, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    neg r5, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i64 %a, %b
   %t2 = icmp slt i64 %a, %b
@@ -340,15 +340,15 @@ define i64 @setb14(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb14:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    sradi r6, r3, 63
-; CHECK-PWR8-NEXT:    rldicl r7, r4, 1, 63
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    subc r8, r3, r4
+; CHECK-PWR8-NEXT:    sradi r5, r3, 63
+; CHECK-PWR8-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-PWR8-NEXT:    subc r7, r3, r4
+; CHECK-PWR8-NEXT:    adde r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
-; CHECK-PWR8-NEXT:    adde r6, r7, r6
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    neg r6, r6
-; CHECK-PWR8-NEXT:    isellt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    neg r5, r5
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %b, %a
   %t2 = icmp slt i64 %a, %b
@@ -367,15 +367,15 @@ define i64 @setb15(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb15:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    sradi r6, r3, 63
-; CHECK-PWR8-NEXT:    rldicl r7, r4, 1, 63
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    subc r8, r3, r4
+; CHECK-PWR8-NEXT:    sradi r5, r3, 63
+; CHECK-PWR8-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-PWR8-NEXT:    subc r7, r3, r4
+; CHECK-PWR8-NEXT:    adde r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    adde r6, r7, r6
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    neg r6, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    neg r5, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i64 %a, %b
   %t2 = icmp sgt i64 %b, %a
@@ -394,15 +394,15 @@ define i64 @setb16(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb16:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    sradi r6, r3, 63
-; CHECK-PWR8-NEXT:    rldicl r7, r4, 1, 63
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    subc r8, r3, r4
+; CHECK-PWR8-NEXT:    sradi r5, r3, 63
+; CHECK-PWR8-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-PWR8-NEXT:    subc r7, r3, r4
+; CHECK-PWR8-NEXT:    adde r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
-; CHECK-PWR8-NEXT:    adde r6, r7, r6
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    neg r6, r6
-; CHECK-PWR8-NEXT:    isellt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    neg r5, r5
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %b, %a
   %t2 = icmp sgt i64 %b, %a
@@ -421,8 +421,8 @@ define i64 @setb17(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb17:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    li r5, -1
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
+; CHECK-PWR8-NEXT:    li r5, -1
 ; CHECK-PWR8-NEXT:    li r6, 1
 ; CHECK-PWR8-NEXT:    iselgt r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpld r3, r4
@@ -445,8 +445,8 @@ define i64 @setb18(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb18:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    li r5, -1
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
+; CHECK-PWR8-NEXT:    li r5, -1
 ; CHECK-PWR8-NEXT:    li r6, 1
 ; CHECK-PWR8-NEXT:    iselgt r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpld r4, r3
@@ -469,8 +469,8 @@ define i64 @setb19(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb19:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    li r5, -1
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
+; CHECK-PWR8-NEXT:    li r5, -1
 ; CHECK-PWR8-NEXT:    li r6, 1
 ; CHECK-PWR8-NEXT:    isellt r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpld r3, r4
@@ -493,8 +493,8 @@ define i64 @setb20(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb20:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    li r5, -1
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
+; CHECK-PWR8-NEXT:    li r5, -1
 ; CHECK-PWR8-NEXT:    li r6, 1
 ; CHECK-PWR8-NEXT:    isellt r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpld r4, r3
@@ -517,8 +517,8 @@ define i64 @setb21(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb21:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
+; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    li r6, -1
 ; CHECK-PWR8-NEXT:    isellt r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpld r3, r4
@@ -541,8 +541,8 @@ define i64 @setb22(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb22:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
+; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    li r6, -1
 ; CHECK-PWR8-NEXT:    isellt r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpld r4, r3
@@ -565,8 +565,8 @@ define i64 @setb23(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb23:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
+; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    li r6, -1
 ; CHECK-PWR8-NEXT:    iselgt r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpld r3, r4
@@ -589,8 +589,8 @@ define i64 @setb24(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb24:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
+; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    li r6, -1
 ; CHECK-PWR8-NEXT:    iselgt r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpld r4, r3
@@ -616,12 +616,12 @@ define i64 @setb25(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb25:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    addic r7, r6, -1
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
-; CHECK-PWR8-NEXT:    subfe r6, r7, r6
-; CHECK-PWR8-NEXT:    isellt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    addic r6, r5, -1
+; CHECK-PWR8-NEXT:    subfe r5, r6, r5
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %b, %a
   %t2 = icmp ne i64 %b, %a
@@ -640,12 +640,12 @@ define i64 @setb26(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb26:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    addic r7, r6, -1
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    subfe r6, r7, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    addic r6, r5, -1
+; CHECK-PWR8-NEXT:    subfe r5, r6, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i64 %a, %b
   %t2 = icmp ne i64 %b, %a
@@ -667,12 +667,12 @@ define i64 @setb27(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb27:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    addic r7, r6, -1
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    subfe r6, r7, r6
-; CHECK-PWR8-NEXT:    isellt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    addic r6, r5, -1
+; CHECK-PWR8-NEXT:    subfe r5, r6, r5
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    extsw r3, r3
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %a, %b
@@ -693,12 +693,12 @@ define i64 @setb28(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb28:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    addic r7, r6, -1
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpd r4, r3
-; CHECK-PWR8-NEXT:    subfe r6, r7, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    addic r6, r5, -1
+; CHECK-PWR8-NEXT:    subfe r5, r6, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    extsw r3, r3
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i64 %b, %a
@@ -720,14 +720,14 @@ define i64 @setb29(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setb29:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    sradi r6, r4, 63
-; CHECK-PWR8-NEXT:    rldicl r7, r3, 1, 63
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    subc r8, r4, r3
+; CHECK-PWR8-NEXT:    sradi r5, r4, 63
+; CHECK-PWR8-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-PWR8-NEXT:    subc r7, r4, r3
+; CHECK-PWR8-NEXT:    adde r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    adde r6, r7, r6
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    isellt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    clrldi r3, r3, 56
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %a, %b
@@ -751,13 +751,13 @@ define i64 @setbsw1(i32 %a, i32 %b) {
 ;
 ; CHECK-PWR8-LABEL: setbsw1:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r3, r4
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    cntlzw r6, r6
+; CHECK-PWR8-NEXT:    xor r5, r3, r4
 ; CHECK-PWR8-NEXT:    cmpw r3, r4
-; CHECK-PWR8-NEXT:    srwi r6, r6, 5
-; CHECK-PWR8-NEXT:    xori r3, r6, 1
-; CHECK-PWR8-NEXT:    isellt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    cntlzw r5, r5
+; CHECK-PWR8-NEXT:    srwi r5, r5, 5
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i32 %a, %b
   %t2 = icmp ne i32 %a, %b
@@ -776,13 +776,13 @@ define i64 @setbsw2(i32 %a, i32 %b) {
 ;
 ; CHECK-PWR8-LABEL: setbsw2:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r3, r4
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    cntlzw r6, r6
+; CHECK-PWR8-NEXT:    xor r5, r3, r4
 ; CHECK-PWR8-NEXT:    cmpw r4, r3
-; CHECK-PWR8-NEXT:    srwi r6, r6, 5
-; CHECK-PWR8-NEXT:    xori r3, r6, 1
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    cntlzw r5, r5
+; CHECK-PWR8-NEXT:    srwi r5, r5, 5
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i32 %b, %a
   %t2 = icmp ne i32 %a, %b
@@ -801,8 +801,8 @@ define i64 @setbsw3(i32 %a, i32 %b) {
 ;
 ; CHECK-PWR8-LABEL: setbsw3:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    cmpw r4, r3
+; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    li r6, -1
 ; CHECK-PWR8-NEXT:    iselgt r5, r6, r5
 ; CHECK-PWR8-NEXT:    cmplw r3, r4
@@ -825,13 +825,13 @@ define i64 @setbsh1(i16 signext %a, i16 signext %b) {
 ;
 ; CHECK-PWR8-LABEL: setbsh1:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    cntlzw r6, r6
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpw r3, r4
-; CHECK-PWR8-NEXT:    srwi r6, r6, 5
-; CHECK-PWR8-NEXT:    xori r3, r6, 1
-; CHECK-PWR8-NEXT:    isellt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    cntlzw r5, r5
+; CHECK-PWR8-NEXT:    srwi r5, r5, 5
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i16 %a, %b
   %t2 = icmp ne i16 %b, %a
@@ -850,13 +850,13 @@ define i64 @setbsh2(i16 signext %a, i16 signext %b) {
 ;
 ; CHECK-PWR8-LABEL: setbsh2:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    cntlzw r6, r6
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpw r4, r3
-; CHECK-PWR8-NEXT:    srwi r6, r6, 5
-; CHECK-PWR8-NEXT:    xori r3, r6, 1
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    cntlzw r5, r5
+; CHECK-PWR8-NEXT:    srwi r5, r5, 5
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i16 %b, %a
   %t2 = icmp ne i16 %b, %a
@@ -879,11 +879,11 @@ define i64 @setbsc1(i8 %a, i8 %b) {
 ; CHECK-PWR8:       # %bb.0:
 ; CHECK-PWR8-NEXT:    extsb r4, r4
 ; CHECK-PWR8-NEXT:    extsb r3, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    sub r6, r4, r3
+; CHECK-PWR8-NEXT:    sub r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpw r3, r4
-; CHECK-PWR8-NEXT:    rldicl r3, r6, 1, 63
-; CHECK-PWR8-NEXT:    isellt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    rldicl r5, r5, 1, 63
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i8 %a, %b
   %t2 = icmp sgt i8 %a, %b
@@ -906,11 +906,11 @@ define i64 @setbsc2(i8 %a, i8 %b) {
 ; CHECK-PWR8:       # %bb.0:
 ; CHECK-PWR8-NEXT:    extsb r4, r4
 ; CHECK-PWR8-NEXT:    extsb r3, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    sub r6, r4, r3
+; CHECK-PWR8-NEXT:    sub r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpw r4, r3
-; CHECK-PWR8-NEXT:    rldicl r3, r6, 1, 63
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    rldicl r5, r5, 1, 63
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp sgt i8 %b, %a
   %t2 = icmp sgt i8 %a, %b
@@ -935,13 +935,13 @@ define i64 @setbsc3(i4 %a, i4 %b) {
 ; CHECK-PWR8:       # %bb.0:
 ; CHECK-PWR8-NEXT:    slwi r4, r4, 28
 ; CHECK-PWR8-NEXT:    slwi r3, r3, 28
-; CHECK-PWR8-NEXT:    li r5, -1
 ; CHECK-PWR8-NEXT:    srawi r4, r4, 28
 ; CHECK-PWR8-NEXT:    srawi r3, r3, 28
-; CHECK-PWR8-NEXT:    sub r6, r4, r3
 ; CHECK-PWR8-NEXT:    cmpw r3, r4
-; CHECK-PWR8-NEXT:    rldicl r3, r6, 1, 63
-; CHECK-PWR8-NEXT:    isellt r3, r5, r3
+; CHECK-PWR8-NEXT:    sub r5, r4, r3
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    rldicl r5, r5, 1, 63
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i4 %a, %b
   %t2 = icmp slt i4 %b, %a
@@ -962,12 +962,12 @@ define i64 @setbud1(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setbud1:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    subc r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    subfe r6, r4, r4
+; CHECK-PWR8-NEXT:    subc r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmpld r4, r3
-; CHECK-PWR8-NEXT:    neg r3, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    subfe r5, r4, r4
+; CHECK-PWR8-NEXT:    neg r5, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp ugt i64 %b, %a
   %t2 = icmp ult i64 %b, %a
@@ -986,12 +986,12 @@ define i64 @setbud2(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setbud2:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r3, r4
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    subfic r6, r6, 0
+; CHECK-PWR8-NEXT:    xor r5, r3, r4
 ; CHECK-PWR8-NEXT:    cmpld r3, r4
-; CHECK-PWR8-NEXT:    subfe r3, r6, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    subfic r5, r5, 0
+; CHECK-PWR8-NEXT:    subfe r5, r5, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp ugt i64 %a, %b
   %t2 = icmp ne i64 %a, %b
@@ -1010,10 +1010,10 @@ define i64 @setbud3(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setbud3:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    li r5, 1
 ; CHECK-PWR8-NEXT:    cmpld r4, r3
-; CHECK-PWR8-NEXT:    li r3, -1
-; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    li r4, -1
+; CHECK-PWR8-NEXT:    iselgt r3, r4, r3
 ; CHECK-PWR8-NEXT:    iseleq r3, 0, r3
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp eq i64 %b, %a
@@ -1033,14 +1033,14 @@ define i64 @setbuw1(i32 %a, i32 %b) {
 ;
 ; CHECK-PWR8-LABEL: setbuw1:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r3, r4
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    cntlzw r6, r6
+; CHECK-PWR8-NEXT:    xor r5, r3, r4
 ; CHECK-PWR8-NEXT:    cmplw r4, r3
-; CHECK-PWR8-NEXT:    srwi r6, r6, 5
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    neg r3, r6
-; CHECK-PWR8-NEXT:    isellt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    cntlzw r5, r5
+; CHECK-PWR8-NEXT:    srwi r5, r5, 5
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    neg r5, r5
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp ult i32 %b, %a
   %t2 = icmp ne i32 %a, %b
@@ -1059,14 +1059,14 @@ define i64 @setbuw2(i32 %a, i32 %b) {
 ;
 ; CHECK-PWR8-LABEL: setbuw2:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    cntlzw r6, r6
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmplw r3, r4
-; CHECK-PWR8-NEXT:    srwi r6, r6, 5
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    neg r3, r6
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    cntlzw r5, r5
+; CHECK-PWR8-NEXT:    srwi r5, r5, 5
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    neg r5, r5
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp ugt i32 %a, %b
   %t2 = icmp ne i32 %b, %a
@@ -1089,14 +1089,14 @@ define i64 @setbuh(i16 %a, i16 %b) {
 ; CHECK-PWR8:       # %bb.0:
 ; CHECK-PWR8-NEXT:    clrlwi r3, r3, 16
 ; CHECK-PWR8-NEXT:    clrlwi r4, r4, 16
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    xor r6, r4, r3
+; CHECK-PWR8-NEXT:    xor r5, r4, r3
 ; CHECK-PWR8-NEXT:    cmplw r4, r3
-; CHECK-PWR8-NEXT:    cntlzw r6, r6
-; CHECK-PWR8-NEXT:    srwi r6, r6, 5
-; CHECK-PWR8-NEXT:    xori r6, r6, 1
-; CHECK-PWR8-NEXT:    neg r3, r6
-; CHECK-PWR8-NEXT:    isellt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    cntlzw r5, r5
+; CHECK-PWR8-NEXT:    srwi r5, r5, 5
+; CHECK-PWR8-NEXT:    xori r5, r5, 1
+; CHECK-PWR8-NEXT:    neg r5, r5
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp ult i16 %b, %a
   %t2 = icmp ne i16 %b, %a
@@ -1119,13 +1119,13 @@ define i64 @setbuc(i8 %a, i8 %b) {
 ; CHECK-PWR8:       # %bb.0:
 ; CHECK-PWR8-NEXT:    clrlwi r3, r3, 24
 ; CHECK-PWR8-NEXT:    clrlwi r4, r4, 24
-; CHECK-PWR8-NEXT:    li r5, 1
-; CHECK-PWR8-NEXT:    clrldi r6, r3, 32
-; CHECK-PWR8-NEXT:    clrldi r7, r4, 32
-; CHECK-PWR8-NEXT:    sub r6, r6, r7
+; CHECK-PWR8-NEXT:    clrldi r5, r3, 32
+; CHECK-PWR8-NEXT:    clrldi r6, r4, 32
 ; CHECK-PWR8-NEXT:    cmplw r3, r4
-; CHECK-PWR8-NEXT:    sradi r6, r6, 63
-; CHECK-PWR8-NEXT:    iselgt r3, r5, r6
+; CHECK-PWR8-NEXT:    li r3, 1
+; CHECK-PWR8-NEXT:    sub r5, r5, r6
+; CHECK-PWR8-NEXT:    sradi r5, r5, 63
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp ugt i8 %a, %b
   %t2 = icmp ult i8 %a, %b
@@ -1147,12 +1147,12 @@ define i64 @setbf1(float %a, float %b) {
 ; CHECK-PWR8-LABEL: setbf1:
 ; CHECK-PWR8:       # %bb.0:
 ; CHECK-PWR8-NEXT:    fcmpu cr0, f2, f1
-; CHECK-PWR8-NEXT:    fcmpu cr1, f1, f2
 ; CHECK-PWR8-NEXT:    li r3, 0
 ; CHECK-PWR8-NEXT:    li r4, 1
 ; CHECK-PWR8-NEXT:    isellt r3, r4, r3
+; CHECK-PWR8-NEXT:    fcmpu cr0, f1, f2
 ; CHECK-PWR8-NEXT:    li r4, -1
-; CHECK-PWR8-NEXT:    isel r3, r4, r3, 4*cr1+lt
+; CHECK-PWR8-NEXT:    isellt r3, r4, r3
 ; CHECK-PWR8-NEXT:    blr
   %t1 = fcmp nnan olt float %a, %b
   %t2 = fcmp nnan olt float %b, %a
@@ -1219,12 +1219,12 @@ define i64 @setbdf2(double %a, double %b) {
 ; CHECK-PWR8-LABEL: setbdf2:
 ; CHECK-PWR8:       # %bb.0:
 ; CHECK-PWR8-NEXT:    fcmpu cr0, f2, f1
-; CHECK-PWR8-NEXT:    xscmpudp cr1, f2, f1
 ; CHECK-PWR8-NEXT:    li r3, 0
 ; CHECK-PWR8-NEXT:    li r4, -1
 ; CHECK-PWR8-NEXT:    iselgt r3, r4, r3
+; CHECK-PWR8-NEXT:    xscmpudp cr0, f2, f1
 ; CHECK-PWR8-NEXT:    li r4, 1
-; CHECK-PWR8-NEXT:    isel r3, r4, r3, 4*cr1+lt
+; CHECK-PWR8-NEXT:    isellt r3, r4, r3
 ; CHECK-PWR8-NEXT:    blr
   %t1 = fcmp nnan olt double %b, %a
   %t2 = fcmp nnan ogt double %b, %a
@@ -1260,18 +1260,18 @@ define i64 @setbf128(fp128 %a, fp128 %b) {
 ; CHECK-PWR8-NEXT:    bl __ltkf2
 ; CHECK-PWR8-NEXT:    nop
 ; CHECK-PWR8-NEXT:    vmr v2, v30
-; CHECK-PWR8-NEXT:    srawi r30, r3, 31
 ; CHECK-PWR8-NEXT:    vmr v3, v31
+; CHECK-PWR8-NEXT:    srawi r30, r3, 31
 ; CHECK-PWR8-NEXT:    bl __gtkf2
 ; CHECK-PWR8-NEXT:    nop
-; CHECK-PWR8-NEXT:    li r4, 1
-; CHECK-PWR8-NEXT:    cmpwi r3, 0
-; CHECK-PWR8-NEXT:    iselgt r3, r4, r30
 ; CHECK-PWR8-NEXT:    li r4, 64
-; CHECK-PWR8-NEXT:    ld r30, 80(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    cmpwi r3, 0
+; CHECK-PWR8-NEXT:    li r3, 1
 ; CHECK-PWR8-NEXT:    lvx v31, r1, r4 # 16-byte Folded Reload
 ; CHECK-PWR8-NEXT:    li r4, 48
 ; CHECK-PWR8-NEXT:    lvx v30, r1, r4 # 16-byte Folded Reload
+; CHECK-PWR8-NEXT:    iselgt r3, r3, r30
+; CHECK-PWR8-NEXT:    ld r30, 80(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    addi r1, r1, 96
 ; CHECK-PWR8-NEXT:    ld r0, 16(r1)
 ; CHECK-PWR8-NEXT:    mtlr r0
@@ -1298,12 +1298,12 @@ define i64 @setbn1(i64 %a, i64 %b) {
 ;
 ; CHECK-PWR8-LABEL: setbn1:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    xor r6, r3, r4
-; CHECK-PWR8-NEXT:    li r5, -1
-; CHECK-PWR8-NEXT:    cntlzd r6, r6
+; CHECK-PWR8-NEXT:    xor r5, r3, r4
 ; CHECK-PWR8-NEXT:    cmpd r3, r4
-; CHECK-PWR8-NEXT:    rldicl r3, r6, 58, 63
-; CHECK-PWR8-NEXT:    isellt r3, r5, r3
+; CHECK-PWR8-NEXT:    li r3, -1
+; CHECK-PWR8-NEXT:    cntlzd r5, r5
+; CHECK-PWR8-NEXT:    rldicl r5, r5, 58, 63
+; CHECK-PWR8-NEXT:    isellt r3, r3, r5
 ; CHECK-PWR8-NEXT:    blr
   %t1 = icmp slt i64 %a, %b
   %t2 = icmp eq i64 %a, %b
@@ -1327,12 +1327,12 @@ define i64 @setbn2(double %a, double %b) {
 ; CHECK-PWR8-LABEL: setbn2:
 ; CHECK-PWR8:       # %bb.0:
 ; CHECK-PWR8-NEXT:    fcmpu cr0, f1, f2
-; CHECK-PWR8-NEXT:    xscmpudp cr1, f1, f2
 ; CHECK-PWR8-NEXT:    li r3, 1
 ; CHECK-PWR8-NEXT:    li r4, -1
 ; CHECK-PWR8-NEXT:    cror 4*cr5+lt, un, eq
+; CHECK-PWR8-NEXT:    xscmpudp cr0, f1, f2
 ; CHECK-PWR8-NEXT:    isel r3, 0, r3, 4*cr5+lt
-; CHECK-PWR8-NEXT:    isel r3, r4, r3, 4*cr1+lt
+; CHECK-PWR8-NEXT:    isellt r3, r4, r3
 ; CHECK-PWR8-NEXT:    blr
   %t1 = fcmp olt double %a, %b
   %t2 = fcmp one double %a, %b
@@ -1357,8 +1357,8 @@ define i64 @setbn3(float %a, float %b) {
 ; CHECK-PWR8-NEXT:    fcmpu cr0, f1, f2
 ; CHECK-PWR8-NEXT:    li r3, 1
 ; CHECK-PWR8-NEXT:    li r4, -1
-; CHECK-PWR8-NEXT:    cror 4*cr5+lt, lt, un
 ; CHECK-PWR8-NEXT:    iseleq r3, 0, r3
+; CHECK-PWR8-NEXT:    cror 4*cr5+lt, lt, un
 ; CHECK-PWR8-NEXT:    isel r3, r4, r3, 4*cr5+lt
 ; CHECK-PWR8-NEXT:    blr
   %t1 = fcmp ult float %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index 342a9044b9bcc5a..a0008e884e27fab 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -79,9 +79,9 @@ define <2 x i64> @sub_absv_64(<2 x i64> %a, <2 x i64> %b) local_unnamed_addr {
 ;
 ; CHECK-PWR8-LABEL: sub_absv_64:
 ; CHECK-PWR8:       # %bb.0: # %entry
-; CHECK-PWR8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-PWR8-NEXT:    vsubudm v2, v2, v3
-; CHECK-PWR8-NEXT:    vsubudm v3, v4, v2
+; CHECK-PWR8-NEXT:    xxlxor v3, v3, v3
+; CHECK-PWR8-NEXT:    vsubudm v3, v3, v2
 ; CHECK-PWR8-NEXT:    vmaxsd v2, v2, v3
 ; CHECK-PWR8-NEXT:    blr
 ;
@@ -125,13 +125,21 @@ define <4 x i32> @sub_absv_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {
 ; CHECK-PWR9-NEXT:    vabsduw v2, v2, v3
 ; CHECK-PWR9-NEXT:    blr
 ;
-; CHECK-PWR78-LABEL: sub_absv_32:
-; CHECK-PWR78:       # %bb.0: # %entry
-; CHECK-PWR78-NEXT:    xxlxor v4, v4, v4
-; CHECK-PWR78-NEXT:    vsubuwm v2, v2, v3
-; CHECK-PWR78-NEXT:    vsubuwm v3, v4, v2
-; CHECK-PWR78-NEXT:    vmaxsw v2, v2, v3
-; CHECK-PWR78-NEXT:    blr
+; CHECK-PWR8-LABEL: sub_absv_32:
+; CHECK-PWR8:       # %bb.0: # %entry
+; CHECK-PWR8-NEXT:    vsubuwm v2, v2, v3
+; CHECK-PWR8-NEXT:    xxlxor v3, v3, v3
+; CHECK-PWR8-NEXT:    vsubuwm v3, v3, v2
+; CHECK-PWR8-NEXT:    vmaxsw v2, v2, v3
+; CHECK-PWR8-NEXT:    blr
+;
+; CHECK-PWR7-LABEL: sub_absv_32:
+; CHECK-PWR7:       # %bb.0: # %entry
+; CHECK-PWR7-NEXT:    xxlxor v4, v4, v4
+; CHECK-PWR7-NEXT:    vsubuwm v2, v2, v3
+; CHECK-PWR7-NEXT:    vsubuwm v3, v4, v2
+; CHECK-PWR7-NEXT:    vmaxsw v2, v2, v3
+; CHECK-PWR7-NEXT:    blr
 entry:
   %0 = sub nsw <4 x i32> %a, %b
   %1 = icmp sgt <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -149,13 +157,21 @@ define <8 x i16> @sub_absv_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
 ; CHECK-PWR9-NEXT:    vmaxsh v2, v2, v3
 ; CHECK-PWR9-NEXT:    blr
 ;
-; CHECK-PWR78-LABEL: sub_absv_16:
-; CHECK-PWR78:       # %bb.0: # %entry
-; CHECK-PWR78-NEXT:    xxlxor v4, v4, v4
-; CHECK-PWR78-NEXT:    vsubuhm v2, v2, v3
-; CHECK-PWR78-NEXT:    vsubuhm v3, v4, v2
-; CHECK-PWR78-NEXT:    vmaxsh v2, v2, v3
-; CHECK-PWR78-NEXT:    blr
+; CHECK-PWR8-LABEL: sub_absv_16:
+; CHECK-PWR8:       # %bb.0: # %entry
+; CHECK-PWR8-NEXT:    vsubuhm v2, v2, v3
+; CHECK-PWR8-NEXT:    xxlxor v3, v3, v3
+; CHECK-PWR8-NEXT:    vsubuhm v3, v3, v2
+; CHECK-PWR8-NEXT:    vmaxsh v2, v2, v3
+; CHECK-PWR8-NEXT:    blr
+;
+; CHECK-PWR7-LABEL: sub_absv_16:
+; CHECK-PWR7:       # %bb.0: # %entry
+; CHECK-PWR7-NEXT:    xxlxor v4, v4, v4
+; CHECK-PWR7-NEXT:    vsubuhm v2, v2, v3
+; CHECK-PWR7-NEXT:    vsubuhm v3, v4, v2
+; CHECK-PWR7-NEXT:    vmaxsh v2, v2, v3
+; CHECK-PWR7-NEXT:    blr
 entry:
   %0 = sub nsw <8 x i16> %a, %b
   %1 = icmp sgt <8 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -173,13 +189,21 @@ define <16 x i8> @sub_absv_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
 ; CHECK-PWR9-NEXT:    vmaxsb v2, v2, v3
 ; CHECK-PWR9-NEXT:    blr
 ;
-; CHECK-PWR78-LABEL: sub_absv_8:
-; CHECK-PWR78:       # %bb.0: # %entry
-; CHECK-PWR78-NEXT:    xxlxor v4, v4, v4
-; CHECK-PWR78-NEXT:    vsububm v2, v2, v3
-; CHECK-PWR78-NEXT:    vsububm v3, v4, v2
-; CHECK-PWR78-NEXT:    vmaxsb v2, v2, v3
-; CHECK-PWR78-NEXT:    blr
+; CHECK-PWR8-LABEL: sub_absv_8:
+; CHECK-PWR8:       # %bb.0: # %entry
+; CHECK-PWR8-NEXT:    vsububm v2, v2, v3
+; CHECK-PWR8-NEXT:    xxlxor v3, v3, v3
+; CHECK-PWR8-NEXT:    vsububm v3, v3, v2
+; CHECK-PWR8-NEXT:    vmaxsb v2, v2, v3
+; CHECK-PWR8-NEXT:    blr
+;
+; CHECK-PWR7-LABEL: sub_absv_8:
+; CHECK-PWR7:       # %bb.0: # %entry
+; CHECK-PWR7-NEXT:    xxlxor v4, v4, v4
+; CHECK-PWR7-NEXT:    vsububm v2, v2, v3
+; CHECK-PWR7-NEXT:    vsububm v3, v4, v2
+; CHECK-PWR7-NEXT:    vmaxsb v2, v2, v3
+; CHECK-PWR7-NEXT:    blr
 entry:
   %0 = sub nsw <16 x i8> %a, %b
   %1 = icmp sgt <16 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -215,12 +239,12 @@ define <8 x i16> @sub_absv_16_ext(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr
 ;
 ; CHECK-PWR8-LABEL: sub_absv_16_ext:
 ; CHECK-PWR8:       # %bb.0: # %entry
-; CHECK-PWR8-NEXT:    vmrglh v5, v2, v2
 ; CHECK-PWR8-NEXT:    vspltisw v4, 8
+; CHECK-PWR8-NEXT:    vmrglh v5, v2, v2
+; CHECK-PWR8-NEXT:    vadduwm v4, v4, v4
 ; CHECK-PWR8-NEXT:    vmrghh v2, v2, v2
 ; CHECK-PWR8-NEXT:    vmrglh v0, v3, v3
 ; CHECK-PWR8-NEXT:    vmrghh v3, v3, v3
-; CHECK-PWR8-NEXT:    vadduwm v4, v4, v4
 ; CHECK-PWR8-NEXT:    vslw v5, v5, v4
 ; CHECK-PWR8-NEXT:    vslw v2, v2, v4
 ; CHECK-PWR8-NEXT:    vslw v0, v0, v4
@@ -230,11 +254,11 @@ define <8 x i16> @sub_absv_16_ext(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr
 ; CHECK-PWR8-NEXT:    vsraw v0, v0, v4
 ; CHECK-PWR8-NEXT:    vsraw v3, v3, v4
 ; CHECK-PWR8-NEXT:    xxlxor v4, v4, v4
-; CHECK-PWR8-NEXT:    vsubuwm v5, v5, v0
 ; CHECK-PWR8-NEXT:    vsubuwm v2, v2, v3
-; CHECK-PWR8-NEXT:    vsubuwm v3, v4, v5
+; CHECK-PWR8-NEXT:    vsubuwm v3, v5, v0
+; CHECK-PWR8-NEXT:    vsubuwm v5, v4, v3
 ; CHECK-PWR8-NEXT:    vsubuwm v4, v4, v2
-; CHECK-PWR8-NEXT:    vmaxsw v3, v5, v3
+; CHECK-PWR8-NEXT:    vmaxsw v3, v3, v5
 ; CHECK-PWR8-NEXT:    vmaxsw v2, v2, v4
 ; CHECK-PWR8-NEXT:    vpkuwum v2, v2, v3
 ; CHECK-PWR8-NEXT:    blr
@@ -667,208 +691,182 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR8-LABEL: sub_absv_8_ext:
 ; CHECK-PWR8:       # %bb.0: # %entry
 ; CHECK-PWR8-NEXT:    xxswapd vs0, v2
-; CHECK-PWR8-NEXT:    mfvsrd r5, v2
-; CHECK-PWR8-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    mfvsrd r6, v3
 ; CHECK-PWR8-NEXT:    xxswapd vs1, v3
-; CHECK-PWR8-NEXT:    clrldi r3, r5, 56
-; CHECK-PWR8-NEXT:    rldicl r7, r5, 56, 56
-; CHECK-PWR8-NEXT:    clrldi r4, r6, 56
-; CHECK-PWR8-NEXT:    rldicl r8, r6, 56, 56
-; CHECK-PWR8-NEXT:    mffprd r26, f0
-; CHECK-PWR8-NEXT:    clrlwi r3, r3, 24
-; CHECK-PWR8-NEXT:    clrlwi r7, r7, 24
 ; CHECK-PWR8-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    rldicl r11, r5, 40, 56
-; CHECK-PWR8-NEXT:    rldicl r12, r6, 40, 56
-; CHECK-PWR8-NEXT:    clrlwi r4, r4, 24
-; CHECK-PWR8-NEXT:    clrlwi r8, r8, 24
-; CHECK-PWR8-NEXT:    rldicl r9, r5, 48, 56
-; CHECK-PWR8-NEXT:    rldicl r10, r6, 48, 56
-; CHECK-PWR8-NEXT:    sub r4, r3, r4
-; CHECK-PWR8-NEXT:    clrlwi r11, r11, 24
-; CHECK-PWR8-NEXT:    rldicl r3, r26, 16, 56
-; CHECK-PWR8-NEXT:    clrlwi r12, r12, 24
-; CHECK-PWR8-NEXT:    sub r7, r7, r8
-; CHECK-PWR8-NEXT:    clrlwi r9, r9, 24
-; CHECK-PWR8-NEXT:    clrlwi r10, r10, 24
-; CHECK-PWR8-NEXT:    std r24, -64(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    mffprd r24, f1
-; CHECK-PWR8-NEXT:    rldicl r0, r5, 32, 56
-; CHECK-PWR8-NEXT:    rldicl r30, r6, 32, 56
-; CHECK-PWR8-NEXT:    std r3, -160(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    sub r11, r11, r12
-; CHECK-PWR8-NEXT:    sub r9, r9, r10
-; CHECK-PWR8-NEXT:    srawi r3, r4, 31
-; CHECK-PWR8-NEXT:    srawi r12, r7, 31
-; CHECK-PWR8-NEXT:    clrlwi r10, r0, 24
-; CHECK-PWR8-NEXT:    clrlwi r0, r30, 24
-; CHECK-PWR8-NEXT:    xor r4, r4, r3
-; CHECK-PWR8-NEXT:    xor r7, r7, r12
-; CHECK-PWR8-NEXT:    sub r10, r10, r0
-; CHECK-PWR8-NEXT:    std r20, -96(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std r21, -88(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    sub r3, r4, r3
-; CHECK-PWR8-NEXT:    srawi r4, r9, 31
-; CHECK-PWR8-NEXT:    sub r7, r7, r12
-; CHECK-PWR8-NEXT:    std r22, -80(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT:    std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT:    mffprd r5, f0
+; CHECK-PWR8-NEXT:    mffprd r11, f1
+; CHECK-PWR8-NEXT:    std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT:    std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT:    clrldi r3, r5, 56
+; CHECK-PWR8-NEXT:    clrldi r4, r11, 56
+; CHECK-PWR8-NEXT:    rldicl r6, r5, 56, 56
+; CHECK-PWR8-NEXT:    rldicl r7, r11, 56, 56
+; CHECK-PWR8-NEXT:    rldicl r10, r5, 40, 56
+; CHECK-PWR8-NEXT:    rldicl r12, r11, 40, 56
+; CHECK-PWR8-NEXT:    rldicl r8, r5, 48, 56
+; CHECK-PWR8-NEXT:    rldicl r9, r11, 48, 56
 ; CHECK-PWR8-NEXT:    rldicl r29, r5, 24, 56
-; CHECK-PWR8-NEXT:    rldicl r28, r6, 24, 56
-; CHECK-PWR8-NEXT:    xor r9, r9, r4
-; CHECK-PWR8-NEXT:    mtvsrd v3, r7
+; CHECK-PWR8-NEXT:    rldicl r28, r11, 24, 56
 ; CHECK-PWR8-NEXT:    rldicl r27, r5, 16, 56
-; CHECK-PWR8-NEXT:    rldicl r25, r6, 16, 56
-; CHECK-PWR8-NEXT:    clrlwi r30, r29, 24
-; CHECK-PWR8-NEXT:    clrlwi r29, r28, 24
-; CHECK-PWR8-NEXT:    mtvsrd v2, r3
-; CHECK-PWR8-NEXT:    sub r4, r9, r4
-; CHECK-PWR8-NEXT:    srawi r7, r10, 31
-; CHECK-PWR8-NEXT:    srawi r3, r11, 31
-; CHECK-PWR8-NEXT:    clrlwi r9, r27, 24
-; CHECK-PWR8-NEXT:    clrlwi r12, r25, 24
-; CHECK-PWR8-NEXT:    sub r0, r30, r29
-; CHECK-PWR8-NEXT:    mtvsrd v4, r4
-; CHECK-PWR8-NEXT:    std r23, -72(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    xor r10, r10, r7
-; CHECK-PWR8-NEXT:    xor r11, r11, r3
-; CHECK-PWR8-NEXT:    sub r9, r9, r12
-; CHECK-PWR8-NEXT:    std r18, -112(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std r19, -104(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    vmrghb v2, v3, v2
-; CHECK-PWR8-NEXT:    sub r7, r10, r7
+; CHECK-PWR8-NEXT:    rldicl r0, r5, 32, 56
+; CHECK-PWR8-NEXT:    rldicl r30, r11, 32, 56
 ; CHECK-PWR8-NEXT:    rldicl r5, r5, 8, 56
-; CHECK-PWR8-NEXT:    sub r3, r11, r3
-; CHECK-PWR8-NEXT:    rldicl r6, r6, 8, 56
-; CHECK-PWR8-NEXT:    srawi r4, r0, 31
-; CHECK-PWR8-NEXT:    mtvsrd v0, r7
-; CHECK-PWR8-NEXT:    std r16, -128(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std r17, -120(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    srawi r7, r9, 31
-; CHECK-PWR8-NEXT:    clrldi r23, r26, 56
-; CHECK-PWR8-NEXT:    mtvsrd v5, r3
-; CHECK-PWR8-NEXT:    clrlwi r3, r5, 24
-; CHECK-PWR8-NEXT:    clrlwi r5, r6, 24
-; CHECK-PWR8-NEXT:    clrldi r22, r24, 56
-; CHECK-PWR8-NEXT:    rldicl r21, r26, 56, 56
-; CHECK-PWR8-NEXT:    xor r10, r0, r4
-; CHECK-PWR8-NEXT:    xor r9, r9, r7
-; CHECK-PWR8-NEXT:    rldicl r20, r24, 56, 56
-; CHECK-PWR8-NEXT:    rldicl r19, r26, 48, 56
-; CHECK-PWR8-NEXT:    sub r3, r3, r5
-; CHECK-PWR8-NEXT:    sub r4, r10, r4
-; CHECK-PWR8-NEXT:    sub r7, r9, r7
-; CHECK-PWR8-NEXT:    clrlwi r9, r23, 24
-; CHECK-PWR8-NEXT:    rldicl r18, r24, 48, 56
-; CHECK-PWR8-NEXT:    clrlwi r10, r22, 24
-; CHECK-PWR8-NEXT:    clrlwi r11, r21, 24
-; CHECK-PWR8-NEXT:    clrlwi r12, r20, 24
-; CHECK-PWR8-NEXT:    mtvsrd v1, r4
-; CHECK-PWR8-NEXT:    std r14, -144(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std r15, -136(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    rldicl r17, r26, 40, 56
-; CHECK-PWR8-NEXT:    rldicl r16, r24, 40, 56
-; CHECK-PWR8-NEXT:    sub r9, r9, r10
-; CHECK-PWR8-NEXT:    sub r10, r11, r12
-; CHECK-PWR8-NEXT:    mtvsrd v3, r7
-; CHECK-PWR8-NEXT:    srawi r4, r3, 31
-; CHECK-PWR8-NEXT:    clrlwi r11, r19, 24
-; CHECK-PWR8-NEXT:    clrlwi r12, r18, 24
-; CHECK-PWR8-NEXT:    vmrghb v4, v5, v4
-; CHECK-PWR8-NEXT:    std r31, -8(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    xor r3, r3, r4
-; CHECK-PWR8-NEXT:    sub r7, r11, r12
-; CHECK-PWR8-NEXT:    clrlwi r11, r17, 24
-; CHECK-PWR8-NEXT:    clrlwi r12, r16, 24
-; CHECK-PWR8-NEXT:    vmrghb v0, v1, v0
-; CHECK-PWR8-NEXT:    std r2, -152(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    rldicl r15, r26, 32, 56
-; CHECK-PWR8-NEXT:    rldicl r14, r24, 32, 56
-; CHECK-PWR8-NEXT:    sub r3, r3, r4
-; CHECK-PWR8-NEXT:    sub r11, r11, r12
-; CHECK-PWR8-NEXT:    srawi r4, r9, 31
-; CHECK-PWR8-NEXT:    srawi r12, r10, 31
-; CHECK-PWR8-NEXT:    clrlwi r0, r15, 24
-; CHECK-PWR8-NEXT:    clrlwi r30, r14, 24
-; CHECK-PWR8-NEXT:    mtvsrd v5, r3
-; CHECK-PWR8-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    xor r9, r9, r4
-; CHECK-PWR8-NEXT:    xor r10, r10, r12
-; CHECK-PWR8-NEXT:    sub r3, r0, r30
-; CHECK-PWR8-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    ld r23, -72(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    ld r22, -80(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    srawi r28, r11, 31
-; CHECK-PWR8-NEXT:    sub r4, r9, r4
-; CHECK-PWR8-NEXT:    sub r10, r10, r12
-; CHECK-PWR8-NEXT:    vmrghb v3, v5, v3
-; CHECK-PWR8-NEXT:    ld r21, -88(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    ld r20, -96(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    srawi r29, r7, 31
-; CHECK-PWR8-NEXT:    srawi r9, r3, 31
-; CHECK-PWR8-NEXT:    mtvsrd v5, r4
-; CHECK-PWR8-NEXT:    xor r4, r11, r28
-; CHECK-PWR8-NEXT:    ld r19, -104(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    ld r18, -112(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    mtvsrd v1, r10
-; CHECK-PWR8-NEXT:    ld r10, -160(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    rldicl r31, r26, 24, 56
-; CHECK-PWR8-NEXT:    rldicl r2, r24, 24, 56
-; CHECK-PWR8-NEXT:    xor r7, r7, r29
-; CHECK-PWR8-NEXT:    xor r3, r3, r9
-; CHECK-PWR8-NEXT:    rldicl r8, r24, 16, 56
-; CHECK-PWR8-NEXT:    rldicl r6, r26, 8, 56
-; CHECK-PWR8-NEXT:    sub r4, r4, r28
-; CHECK-PWR8-NEXT:    clrlwi r0, r31, 24
-; CHECK-PWR8-NEXT:    clrlwi r30, r2, 24
-; CHECK-PWR8-NEXT:    sub r7, r7, r29
-; CHECK-PWR8-NEXT:    rldicl r5, r24, 8, 56
+; CHECK-PWR8-NEXT:    std r24, -64(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT:    clrlwi r3, r3, 24
+; CHECK-PWR8-NEXT:    clrlwi r4, r4, 24
+; CHECK-PWR8-NEXT:    clrlwi r6, r6, 24
+; CHECK-PWR8-NEXT:    clrlwi r7, r7, 24
 ; CHECK-PWR8-NEXT:    clrlwi r10, r10, 24
+; CHECK-PWR8-NEXT:    clrlwi r12, r12, 24
+; CHECK-PWR8-NEXT:    sub r3, r3, r4
+; CHECK-PWR8-NEXT:    sub r4, r6, r7
+; CHECK-PWR8-NEXT:    sub r7, r10, r12
 ; CHECK-PWR8-NEXT:    clrlwi r8, r8, 24
-; CHECK-PWR8-NEXT:    sub r3, r3, r9
-; CHECK-PWR8-NEXT:    mtvsrd v7, r4
-; CHECK-PWR8-NEXT:    clrlwi r4, r6, 24
+; CHECK-PWR8-NEXT:    clrlwi r9, r9, 24
+; CHECK-PWR8-NEXT:    clrlwi r29, r29, 24
+; CHECK-PWR8-NEXT:    clrlwi r28, r28, 24
+; CHECK-PWR8-NEXT:    sub r6, r8, r9
+; CHECK-PWR8-NEXT:    sub r9, r29, r28
+; CHECK-PWR8-NEXT:    clrlwi r27, r27, 24
+; CHECK-PWR8-NEXT:    clrlwi r0, r0, 24
+; CHECK-PWR8-NEXT:    clrlwi r30, r30, 24
+; CHECK-PWR8-NEXT:    sub r8, r0, r30
 ; CHECK-PWR8-NEXT:    clrlwi r5, r5, 24
-; CHECK-PWR8-NEXT:    sub r0, r0, r30
+; CHECK-PWR8-NEXT:    srawi r10, r3, 31
+; CHECK-PWR8-NEXT:    srawi r12, r4, 31
+; CHECK-PWR8-NEXT:    srawi r28, r9, 31
+; CHECK-PWR8-NEXT:    srawi r0, r6, 31
+; CHECK-PWR8-NEXT:    srawi r29, r8, 31
+; CHECK-PWR8-NEXT:    srawi r30, r7, 31
+; CHECK-PWR8-NEXT:    xor r3, r3, r10
+; CHECK-PWR8-NEXT:    sub r10, r3, r10
+; CHECK-PWR8-NEXT:    rldicl r3, r11, 16, 56
+; CHECK-PWR8-NEXT:    xor r4, r4, r12
+; CHECK-PWR8-NEXT:    rldicl r11, r11, 8, 56
+; CHECK-PWR8-NEXT:    xor r25, r9, r28
+; CHECK-PWR8-NEXT:    sub r9, r4, r12
+; CHECK-PWR8-NEXT:    sub r4, r25, r28
+; CHECK-PWR8-NEXT:    mtvsrd v1, r9
+; CHECK-PWR8-NEXT:    clrlwi r3, r3, 24
+; CHECK-PWR8-NEXT:    mtvsrd v7, r4
+; CHECK-PWR8-NEXT:    sub r3, r27, r3
+; CHECK-PWR8-NEXT:    clrlwi r11, r11, 24
+; CHECK-PWR8-NEXT:    xor r6, r6, r0
+; CHECK-PWR8-NEXT:    sub r5, r5, r11
+; CHECK-PWR8-NEXT:    xor r26, r8, r29
+; CHECK-PWR8-NEXT:    sub r8, r6, r0
+; CHECK-PWR8-NEXT:    mfvsrd r0, v3
+; CHECK-PWR8-NEXT:    xor r7, r7, r30
+; CHECK-PWR8-NEXT:    sub r7, r7, r30
+; CHECK-PWR8-NEXT:    sub r6, r26, r29
 ; CHECK-PWR8-NEXT:    mtvsrd v6, r7
-; CHECK-PWR8-NEXT:    sub r7, r10, r8
-; CHECK-PWR8-NEXT:    ld r2, -152(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    ld r31, -8(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    mtvsrd v8, r3
-; CHECK-PWR8-NEXT:    sub r3, r4, r5
-; CHECK-PWR8-NEXT:    srawi r12, r0, 31
+; CHECK-PWR8-NEXT:    clrldi r30, r0, 56
+; CHECK-PWR8-NEXT:    rldicl r29, r0, 56, 56
+; CHECK-PWR8-NEXT:    rldicl r28, r0, 48, 56
+; CHECK-PWR8-NEXT:    rldicl r27, r0, 40, 56
+; CHECK-PWR8-NEXT:    rldicl r26, r0, 32, 56
+; CHECK-PWR8-NEXT:    rldicl r25, r0, 24, 56
+; CHECK-PWR8-NEXT:    rldicl r24, r0, 16, 56
+; CHECK-PWR8-NEXT:    rldicl r0, r0, 8, 56
+; CHECK-PWR8-NEXT:    srawi r12, r3, 31
+; CHECK-PWR8-NEXT:    srawi r11, r5, 31
+; CHECK-PWR8-NEXT:    clrlwi r30, r30, 24
+; CHECK-PWR8-NEXT:    clrlwi r29, r29, 24
+; CHECK-PWR8-NEXT:    clrlwi r28, r28, 24
+; CHECK-PWR8-NEXT:    clrlwi r27, r27, 24
+; CHECK-PWR8-NEXT:    clrlwi r26, r26, 24
+; CHECK-PWR8-NEXT:    clrlwi r25, r25, 24
+; CHECK-PWR8-NEXT:    clrlwi r24, r24, 24
+; CHECK-PWR8-NEXT:    clrlwi r0, r0, 24
+; CHECK-PWR8-NEXT:    xor r3, r3, r12
+; CHECK-PWR8-NEXT:    sub r3, r3, r12
+; CHECK-PWR8-NEXT:    mfvsrd r12, v2
+; CHECK-PWR8-NEXT:    xor r5, r5, r11
+; CHECK-PWR8-NEXT:    sub r5, r5, r11
+; CHECK-PWR8-NEXT:    mtvsrd v8, r5
+; CHECK-PWR8-NEXT:    clrldi r11, r12, 56
+; CHECK-PWR8-NEXT:    clrlwi r11, r11, 24
+; CHECK-PWR8-NEXT:    sub r11, r11, r30
+; CHECK-PWR8-NEXT:    srawi r30, r11, 31
+; CHECK-PWR8-NEXT:    xor r11, r11, r30
+; CHECK-PWR8-NEXT:    sub r11, r11, r30
+; CHECK-PWR8-NEXT:    rldicl r30, r12, 56, 56
+; CHECK-PWR8-NEXT:    clrlwi r30, r30, 24
+; CHECK-PWR8-NEXT:    mtvsrd v2, r11
+; CHECK-PWR8-NEXT:    sub r30, r30, r29
+; CHECK-PWR8-NEXT:    srawi r29, r30, 31
+; CHECK-PWR8-NEXT:    xor r30, r30, r29
+; CHECK-PWR8-NEXT:    sub r30, r30, r29
+; CHECK-PWR8-NEXT:    rldicl r29, r12, 48, 56
+; CHECK-PWR8-NEXT:    clrlwi r29, r29, 24
+; CHECK-PWR8-NEXT:    mtvsrd v3, r30
 ; CHECK-PWR8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    sub r29, r29, r28
+; CHECK-PWR8-NEXT:    srawi r28, r29, 31
+; CHECK-PWR8-NEXT:    xor r29, r29, r28
+; CHECK-PWR8-NEXT:    sub r29, r29, r28
+; CHECK-PWR8-NEXT:    rldicl r28, r12, 40, 56
+; CHECK-PWR8-NEXT:    clrlwi r28, r28, 24
+; CHECK-PWR8-NEXT:    sub r28, r28, r27
+; CHECK-PWR8-NEXT:    srawi r27, r28, 31
+; CHECK-PWR8-NEXT:    xor r28, r28, r27
+; CHECK-PWR8-NEXT:    sub r28, r28, r27
+; CHECK-PWR8-NEXT:    rldicl r27, r12, 32, 56
+; CHECK-PWR8-NEXT:    clrlwi r27, r27, 24
+; CHECK-PWR8-NEXT:    mtvsrd v4, r28
 ; CHECK-PWR8-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    srawi r6, r7, 31
-; CHECK-PWR8-NEXT:    srawi r5, r3, 31
-; CHECK-PWR8-NEXT:    xor r8, r0, r12
-; CHECK-PWR8-NEXT:    vmrghb v5, v1, v5
+; CHECK-PWR8-NEXT:    sub r27, r27, r26
+; CHECK-PWR8-NEXT:    srawi r26, r27, 31
+; CHECK-PWR8-NEXT:    xor r27, r27, r26
+; CHECK-PWR8-NEXT:    sub r27, r27, r26
+; CHECK-PWR8-NEXT:    rldicl r26, r12, 24, 56
+; CHECK-PWR8-NEXT:    clrlwi r26, r26, 24
+; CHECK-PWR8-NEXT:    sub r26, r26, r25
+; CHECK-PWR8-NEXT:    srawi r25, r26, 31
+; CHECK-PWR8-NEXT:    xor r26, r26, r25
+; CHECK-PWR8-NEXT:    sub r26, r26, r25
+; CHECK-PWR8-NEXT:    rldicl r25, r12, 16, 56
+; CHECK-PWR8-NEXT:    rldicl r12, r12, 8, 56
+; CHECK-PWR8-NEXT:    clrlwi r25, r25, 24
+; CHECK-PWR8-NEXT:    clrlwi r12, r12, 24
+; CHECK-PWR8-NEXT:    mtvsrd v5, r26
 ; CHECK-PWR8-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    sub r25, r25, r24
+; CHECK-PWR8-NEXT:    sub r12, r12, r0
+; CHECK-PWR8-NEXT:    srawi r24, r25, 31
+; CHECK-PWR8-NEXT:    srawi r0, r12, 31
+; CHECK-PWR8-NEXT:    xor r25, r25, r24
+; CHECK-PWR8-NEXT:    xor r12, r12, r0
+; CHECK-PWR8-NEXT:    sub r25, r25, r24
+; CHECK-PWR8-NEXT:    sub r12, r12, r0
 ; CHECK-PWR8-NEXT:    ld r24, -64(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    xor r4, r7, r6
-; CHECK-PWR8-NEXT:    xor r3, r3, r5
-; CHECK-PWR8-NEXT:    sub r8, r8, r12
+; CHECK-PWR8-NEXT:    mtvsrd v0, r12
+; CHECK-PWR8-NEXT:    vmrghb v2, v3, v2
+; CHECK-PWR8-NEXT:    mtvsrd v3, r29
+; CHECK-PWR8-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    vmrghb v3, v4, v3
+; CHECK-PWR8-NEXT:    mtvsrd v4, r27
+; CHECK-PWR8-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    vmrglh v2, v3, v2
+; CHECK-PWR8-NEXT:    vmrghb v4, v5, v4
+; CHECK-PWR8-NEXT:    mtvsrd v5, r25
+; CHECK-PWR8-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT:    vmrghb v5, v0, v5
+; CHECK-PWR8-NEXT:    mtvsrd v0, r10
+; CHECK-PWR8-NEXT:    vmrglh v3, v5, v4
+; CHECK-PWR8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-PWR8-NEXT:    vmrghb v0, v1, v0
+; CHECK-PWR8-NEXT:    mtvsrd v1, r8
+; CHECK-PWR8-NEXT:    vmrghb v1, v6, v1
+; CHECK-PWR8-NEXT:    mtvsrd v6, r6
+; CHECK-PWR8-NEXT:    vmrglh v4, v1, v0
 ; CHECK-PWR8-NEXT:    vmrghb v6, v7, v6
-; CHECK-PWR8-NEXT:    ld r17, -120(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    ld r16, -128(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    sub r4, r4, r6
-; CHECK-PWR8-NEXT:    sub r3, r3, r5
-; CHECK-PWR8-NEXT:    mtvsrd v9, r8
-; CHECK-PWR8-NEXT:    ld r15, -136(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    ld r14, -144(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    mtvsrd v1, r4
 ; CHECK-PWR8-NEXT:    mtvsrd v7, r3
-; CHECK-PWR8-NEXT:    vmrghb v8, v9, v8
-; CHECK-PWR8-NEXT:    vmrghb v1, v7, v1
-; CHECK-PWR8-NEXT:    vmrglh v2, v4, v2
-; CHECK-PWR8-NEXT:    vmrglh v3, v3, v0
-; CHECK-PWR8-NEXT:    vmrglh v4, v6, v5
-; CHECK-PWR8-NEXT:    vmrglh v5, v1, v8
-; CHECK-PWR8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-PWR8-NEXT:    vmrghb v7, v8, v7
+; CHECK-PWR8-NEXT:    vmrglh v5, v7, v6
 ; CHECK-PWR8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-PWR8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-PWR8-NEXT:    blr
@@ -1238,13 +1236,21 @@ define <4 x i32> @sub_absv_vec_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr
 ; CHECK-PWR9-NEXT:    vabsduw v2, v2, v3
 ; CHECK-PWR9-NEXT:    blr
 ;
-; CHECK-PWR78-LABEL: sub_absv_vec_32:
-; CHECK-PWR78:       # %bb.0: # %entry
-; CHECK-PWR78-NEXT:    xxlxor v4, v4, v4
-; CHECK-PWR78-NEXT:    vsubuwm v2, v2, v3
-; CHECK-PWR78-NEXT:    vsubuwm v3, v4, v2
-; CHECK-PWR78-NEXT:    vmaxsw v2, v2, v3
-; CHECK-PWR78-NEXT:    blr
+; CHECK-PWR8-LABEL: sub_absv_vec_32:
+; CHECK-PWR8:       # %bb.0: # %entry
+; CHECK-PWR8-NEXT:    vsubuwm v2, v2, v3
+; CHECK-PWR8-NEXT:    xxlxor v3, v3, v3
+; CHECK-PWR8-NEXT:    vsubuwm v3, v3, v2
+; CHECK-PWR8-NEXT:    vmaxsw v2, v2, v3
+; CHECK-PWR8-NEXT:    blr
+;
+; CHECK-PWR7-LABEL: sub_absv_vec_32:
+; CHECK-PWR7:       # %bb.0: # %entry
+; CHECK-PWR7-NEXT:    xxlxor v4, v4, v4
+; CHECK-PWR7-NEXT:    vsubuwm v2, v2, v3
+; CHECK-PWR7-NEXT:    vsubuwm v3, v4, v2
+; CHECK-PWR7-NEXT:    vmaxsw v2, v2, v3
+; CHECK-PWR7-NEXT:    blr
 entry:
   %sub = sub nsw <4 x i32> %a, %b
   %sub.i = sub <4 x i32> zeroinitializer, %sub
@@ -1261,13 +1267,21 @@ define <8 x i16> @sub_absv_vec_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr
 ; CHECK-PWR9-NEXT:    vmaxsh v2, v2, v3
 ; CHECK-PWR9-NEXT:    blr
 ;
-; CHECK-PWR78-LABEL: sub_absv_vec_16:
-; CHECK-PWR78:       # %bb.0: # %entry
-; CHECK-PWR78-NEXT:    xxlxor v4, v4, v4
-; CHECK-PWR78-NEXT:    vsubuhm v2, v2, v3
-; CHECK-PWR78-NEXT:    vsubuhm v3, v4, v2
-; CHECK-PWR78-NEXT:    vmaxsh v2, v2, v3
-; CHECK-PWR78-NEXT:    blr
+; CHECK-PWR8-LABEL: sub_absv_vec_16:
+; CHECK-PWR8:       # %bb.0: # %entry
+; CHECK-PWR8-NEXT:    vsubuhm v2, v2, v3
+; CHECK-PWR8-NEXT:    xxlxor v3, v3, v3
+; CHECK-PWR8-NEXT:    vsubuhm v3, v3, v2
+; CHECK-PWR8-NEXT:    vmaxsh v2, v2, v3
+; CHECK-PWR8-NEXT:    blr
+;
+; CHECK-PWR7-LABEL: sub_absv_vec_16:
+; CHECK-PWR7:       # %bb.0: # %entry
+; CHECK-PWR7-NEXT:    xxlxor v4, v4, v4
+; CHECK-PWR7-NEXT:    vsubuhm v2, v2, v3
+; CHECK-PWR7-NEXT:    vsubuhm v3, v4, v2
+; CHECK-PWR7-NEXT:    vmaxsh v2, v2, v3
+; CHECK-PWR7-NEXT:    blr
 entry:
   %sub = sub nsw <8 x i16> %a, %b
   %sub.i = sub <8 x i16> zeroinitializer, %sub
@@ -1284,13 +1298,21 @@ define <16 x i8> @sub_absv_vec_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-NEXT:    vmaxsb v2, v2, v3
 ; CHECK-PWR9-NEXT:    blr
 ;
-; CHECK-PWR78-LABEL: sub_absv_vec_8:
-; CHECK-PWR78:       # %bb.0: # %entry
-; CHECK-PWR78-NEXT:    xxlxor v4, v4, v4
-; CHECK-PWR78-NEXT:    vsububm v2, v2, v3
-; CHECK-PWR78-NEXT:    vsububm v3, v4, v2
-; CHECK-PWR78-NEXT:    vmaxsb v2, v2, v3
-; CHECK-PWR78-NEXT:    blr
+; CHECK-PWR8-LABEL: sub_absv_vec_8:
+; CHECK-PWR8:       # %bb.0: # %entry
+; CHECK-PWR8-NEXT:    vsububm v2, v2, v3
+; CHECK-PWR8-NEXT:    xxlxor v3, v3, v3
+; CHECK-PWR8-NEXT:    vsububm v3, v3, v2
+; CHECK-PWR8-NEXT:    vmaxsb v2, v2, v3
+; CHECK-PWR8-NEXT:    blr
+;
+; CHECK-PWR7-LABEL: sub_absv_vec_8:
+; CHECK-PWR7:       # %bb.0: # %entry
+; CHECK-PWR7-NEXT:    xxlxor v4, v4, v4
+; CHECK-PWR7-NEXT:    vsububm v2, v2, v3
+; CHECK-PWR7-NEXT:    vsububm v3, v4, v2
+; CHECK-PWR7-NEXT:    vmaxsb v2, v2, v3
+; CHECK-PWR7-NEXT:    blr
 entry:
   %sub = sub nsw <16 x i8> %a, %b
   %sub.i = sub <16 x i8> zeroinitializer, %sub
@@ -1444,10 +1466,10 @@ define <4 x i32> @sext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr {
 ;
 ; CHECK-PWR8-LABEL: sext_sub_absd32:
 ; CHECK-PWR8:       # %bb.0:
-; CHECK-PWR8-NEXT:    vmrglh v2, v2, v2
 ; CHECK-PWR8-NEXT:    vspltisw v4, 8
-; CHECK-PWR8-NEXT:    vmrglh v3, v3, v3
+; CHECK-PWR8-NEXT:    vmrglh v2, v2, v2
 ; CHECK-PWR8-NEXT:    vadduwm v4, v4, v4
+; CHECK-PWR8-NEXT:    vmrglh v3, v3, v3
 ; CHECK-PWR8-NEXT:    vslw v2, v2, v4
 ; CHECK-PWR8-NEXT:    vslw v3, v3, v4
 ; CHECK-PWR8-NEXT:    vsraw v2, v2, v4
@@ -1516,8 +1538,8 @@ define <8 x i16> @sext_sub_absd16(<8 x i8>, <8 x i8>) local_unnamed_addr {
 ; CHECK-PWR8:       # %bb.0:
 ; CHECK-PWR8-NEXT:    vmrglb v2, v2, v2
 ; CHECK-PWR8-NEXT:    vspltish v4, 8
-; CHECK-PWR8-NEXT:    vmrglb v3, v3, v3
 ; CHECK-PWR8-NEXT:    vslh v2, v2, v4
+; CHECK-PWR8-NEXT:    vmrglb v3, v3, v3
 ; CHECK-PWR8-NEXT:    vslh v3, v3, v4
 ; CHECK-PWR8-NEXT:    vsrah v2, v2, v4
 ; CHECK-PWR8-NEXT:    vsrah v3, v3, v4

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
index 39b06619063dfac..8c88e07140e5a93 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
@@ -19,12 +19,12 @@ define signext i8 @caller_9(ptr nocapture readonly byval([9 x i8]) %data) #0 {
 ; P8LE-NEXT:    stdu r1, -80(r1)
 ; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    stb r4, 56(r1)
-; P8LE-NEXT:    addi r5, r1, 71
+; P8LE-NEXT:    addi r4, r1, 71
+; P8LE-NEXT:    lbz r5, 56(r1)
 ; P8LE-NEXT:    std r3, 48(r1)
-; P8LE-NEXT:    lbz r4, 56(r1)
-; P8LE-NEXT:    stdx r3, 0, r5
-; P8LE-NEXT:    mr r3, r5
-; P8LE-NEXT:    stb r4, 79(r1)
+; P8LE-NEXT:    stdx r3, 0, r4
+; P8LE-NEXT:    mr r3, r4
+; P8LE-NEXT:    stb r5, 79(r1)
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
 ; P8LE-NEXT:    li r3, 0
@@ -78,12 +78,12 @@ define signext i8 @caller_9(ptr nocapture readonly byval([9 x i8]) %data) #0 {
 ; P8BE-NEXT:    stdu r1, -144(r1)
 ; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    stb r4, 200(r1)
-; P8BE-NEXT:    addi r5, r1, 135
+; P8BE-NEXT:    addi r4, r1, 135
+; P8BE-NEXT:    lbz r5, 200(r1)
 ; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    lbz r4, 200(r1)
-; P8BE-NEXT:    stdx r3, 0, r5
-; P8BE-NEXT:    mr r3, r5
-; P8BE-NEXT:    stb r4, 143(r1)
+; P8BE-NEXT:    stdx r3, 0, r4
+; P8BE-NEXT:    mr r3, r4
+; P8BE-NEXT:    stb r5, 143(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0
@@ -179,8 +179,8 @@ define signext i8 @caller_9_callee_9(ptr nocapture readonly byval([9 x i8]) %dat
 ; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    stb r4, 56(r1)
 ; P8LE-NEXT:    addi r5, r1, 71
-; P8LE-NEXT:    std r3, 48(r1)
 ; P8LE-NEXT:    lbz r4, 56(r1)
+; P8LE-NEXT:    std r3, 48(r1)
 ; P8LE-NEXT:    stdx r3, 0, r5
 ; P8LE-NEXT:    stb r4, 79(r1)
 ; P8LE-NEXT:    lbz r4, 56(r1)
@@ -238,8 +238,8 @@ define signext i8 @caller_9_callee_9(ptr nocapture readonly byval([9 x i8]) %dat
 ; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    stb r4, 200(r1)
 ; P8BE-NEXT:    addi r5, r1, 135
-; P8BE-NEXT:    std r3, 192(r1)
 ; P8BE-NEXT:    lbz r4, 200(r1)
+; P8BE-NEXT:    std r3, 192(r1)
 ; P8BE-NEXT:    stdx r3, 0, r5
 ; P8BE-NEXT:    stb r4, 143(r1)
 ; P8BE-NEXT:    lbz r4, 200(r1)
@@ -337,12 +337,12 @@ define signext i8 @caller_10(ptr nocapture readonly byval([10 x i8]) %data) #0 {
 ; P8LE-NEXT:    stdu r1, -80(r1)
 ; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    sth r4, 56(r1)
-; P8LE-NEXT:    addi r5, r1, 70
+; P8LE-NEXT:    addi r4, r1, 70
+; P8LE-NEXT:    lhz r5, 56(r1)
 ; P8LE-NEXT:    std r3, 48(r1)
-; P8LE-NEXT:    lhz r4, 56(r1)
-; P8LE-NEXT:    stdx r3, 0, r5
-; P8LE-NEXT:    mr r3, r5
-; P8LE-NEXT:    sth r4, 78(r1)
+; P8LE-NEXT:    stdx r3, 0, r4
+; P8LE-NEXT:    mr r3, r4
+; P8LE-NEXT:    sth r5, 78(r1)
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
 ; P8LE-NEXT:    li r3, 0
@@ -396,12 +396,12 @@ define signext i8 @caller_10(ptr nocapture readonly byval([10 x i8]) %data) #0 {
 ; P8BE-NEXT:    stdu r1, -144(r1)
 ; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    sth r4, 200(r1)
-; P8BE-NEXT:    addi r5, r1, 134
+; P8BE-NEXT:    addi r4, r1, 134
+; P8BE-NEXT:    lhz r5, 200(r1)
 ; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    lhz r4, 200(r1)
-; P8BE-NEXT:    stdx r3, 0, r5
-; P8BE-NEXT:    mr r3, r5
-; P8BE-NEXT:    sth r4, 142(r1)
+; P8BE-NEXT:    stdx r3, 0, r4
+; P8BE-NEXT:    mr r3, r4
+; P8BE-NEXT:    sth r5, 142(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0
@@ -500,12 +500,12 @@ define signext i8 @caller_12(ptr nocapture readonly byval([12 x i8]) %data) #0 {
 ; P8LE-NEXT:    stdu r1, -80(r1)
 ; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    stw r4, 56(r1)
-; P8LE-NEXT:    addi r5, r1, 68
+; P8LE-NEXT:    addi r4, r1, 68
+; P8LE-NEXT:    lwz r5, 56(r1)
 ; P8LE-NEXT:    std r3, 48(r1)
-; P8LE-NEXT:    lwz r4, 56(r1)
 ; P8LE-NEXT:    std r3, 68(r1)
-; P8LE-NEXT:    mr r3, r5
-; P8LE-NEXT:    stw r4, 76(r1)
+; P8LE-NEXT:    mr r3, r4
+; P8LE-NEXT:    stw r5, 76(r1)
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
 ; P8LE-NEXT:    li r3, 0
@@ -559,12 +559,12 @@ define signext i8 @caller_12(ptr nocapture readonly byval([12 x i8]) %data) #0 {
 ; P8BE-NEXT:    stdu r1, -144(r1)
 ; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    stw r4, 200(r1)
-; P8BE-NEXT:    addi r5, r1, 132
+; P8BE-NEXT:    addi r4, r1, 132
+; P8BE-NEXT:    lwz r5, 200(r1)
 ; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    lwz r4, 200(r1)
 ; P8BE-NEXT:    std r3, 132(r1)
-; P8BE-NEXT:    mr r3, r5
-; P8BE-NEXT:    stw r4, 140(r1)
+; P8BE-NEXT:    mr r3, r4
+; P8BE-NEXT:    stw r5, 140(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0
@@ -671,14 +671,14 @@ define signext i8 @caller_14(ptr nocapture readonly byval([14 x i8]) %data) #0 {
 ; P8LE-NEXT:    stdu r1, -80(r1)
 ; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    stw r4, 56(r1)
-; P8LE-NEXT:    addi r5, r1, 66
 ; P8LE-NEXT:    rldicl r4, r4, 32, 32
+; P8LE-NEXT:    lwz r5, 56(r1)
 ; P8LE-NEXT:    std r3, 48(r1)
-; P8LE-NEXT:    lwz r6, 56(r1)
-; P8LE-NEXT:    stdx r3, 0, r5
-; P8LE-NEXT:    mr r3, r5
 ; P8LE-NEXT:    sth r4, 60(r1)
-; P8LE-NEXT:    stw r6, 74(r1)
+; P8LE-NEXT:    addi r4, r1, 66
+; P8LE-NEXT:    stdx r3, 0, r4
+; P8LE-NEXT:    mr r3, r4
+; P8LE-NEXT:    stw r5, 74(r1)
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
 ; P8LE-NEXT:    li r3, 0
@@ -734,16 +734,16 @@ define signext i8 @caller_14(ptr nocapture readonly byval([14 x i8]) %data) #0 {
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
 ; P8BE-NEXT:    stdu r1, -144(r1)
-; P8BE-NEXT:    rldicl r6, r4, 48, 16
+; P8BE-NEXT:    rldicl r5, r4, 48, 16
 ; P8BE-NEXT:    std r0, 160(r1)
-; P8BE-NEXT:    addi r5, r1, 130
-; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    stdx r3, 0, r5
-; P8BE-NEXT:    mr r3, r5
-; P8BE-NEXT:    stw r6, 200(r1)
-; P8BE-NEXT:    lwz r6, 200(r1)
 ; P8BE-NEXT:    sth r4, 204(r1)
-; P8BE-NEXT:    stw r6, 138(r1)
+; P8BE-NEXT:    addi r4, r1, 130
+; P8BE-NEXT:    std r3, 192(r1)
+; P8BE-NEXT:    stw r5, 200(r1)
+; P8BE-NEXT:    stdx r3, 0, r4
+; P8BE-NEXT:    mr r3, r4
+; P8BE-NEXT:    lwz r5, 200(r1)
+; P8BE-NEXT:    stw r5, 138(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0
@@ -856,9 +856,9 @@ define signext i8 @caller_16(ptr nocapture readonly byval([16 x i8]) %data) #0 {
 ; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    std r3, 48(r1)
 ; P8LE-NEXT:    std r4, 56(r1)
+; P8LE-NEXT:    stw r4, 72(r1)
 ; P8LE-NEXT:    std r3, 64(r1)
 ; P8LE-NEXT:    mr r3, r5
-; P8LE-NEXT:    stw r4, 72(r1)
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
 ; P8LE-NEXT:    li r3, 0
@@ -908,14 +908,14 @@ define signext i8 @caller_16(ptr nocapture readonly byval([16 x i8]) %data) #0 {
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
 ; P8BE-NEXT:    stdu r1, -144(r1)
-; P8BE-NEXT:    addi r5, r1, 128
 ; P8BE-NEXT:    std r0, 160(r1)
-; P8BE-NEXT:    rldicl r6, r4, 32, 32
-; P8BE-NEXT:    std r3, 192(r1)
 ; P8BE-NEXT:    std r4, 200(r1)
+; P8BE-NEXT:    rldicl r5, r4, 32, 32
+; P8BE-NEXT:    addi r4, r1, 128
+; P8BE-NEXT:    std r3, 192(r1)
 ; P8BE-NEXT:    std r3, 128(r1)
-; P8BE-NEXT:    mr r3, r5
-; P8BE-NEXT:    stw r6, 136(r1)
+; P8BE-NEXT:    mr r3, r4
+; P8BE-NEXT:    stw r5, 136(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0
@@ -1020,14 +1020,14 @@ define signext i8 @caller_18(ptr nocapture readonly byval([18 x i8]) %data) #0 {
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
 ; P8LE-NEXT:    stdu r1, -96(r1)
-; P8LE-NEXT:    addi r6, r1, 78
 ; P8LE-NEXT:    std r0, 112(r1)
+; P8LE-NEXT:    sth r5, 64(r1)
+; P8LE-NEXT:    addi r5, r1, 78
 ; P8LE-NEXT:    std r3, 48(r1)
 ; P8LE-NEXT:    std r4, 56(r1)
-; P8LE-NEXT:    stdx r3, 0, r6
-; P8LE-NEXT:    mr r3, r6
-; P8LE-NEXT:    sth r5, 64(r1)
 ; P8LE-NEXT:    stw r4, 86(r1)
+; P8LE-NEXT:    stdx r3, 0, r5
+; P8LE-NEXT:    mr r3, r5
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
 ; P8LE-NEXT:    li r3, 0
@@ -1079,14 +1079,14 @@ define signext i8 @caller_18(ptr nocapture readonly byval([18 x i8]) %data) #0 {
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
 ; P8BE-NEXT:    stdu r1, -144(r1)
-; P8BE-NEXT:    addi r6, r1, 126
 ; P8BE-NEXT:    std r0, 160(r1)
+; P8BE-NEXT:    std r4, 200(r1)
 ; P8BE-NEXT:    sth r5, 208(r1)
 ; P8BE-NEXT:    rldicl r5, r4, 32, 32
+; P8BE-NEXT:    addi r4, r1, 126
 ; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    std r4, 200(r1)
-; P8BE-NEXT:    stdx r3, 0, r6
-; P8BE-NEXT:    mr r3, r6
+; P8BE-NEXT:    stdx r3, 0, r4
+; P8BE-NEXT:    mr r3, r4
 ; P8BE-NEXT:    stw r5, 134(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll
index 42b19e5e96a6830..4aedd1cc714d951 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll
@@ -240,9 +240,9 @@ define signext i8 @caller_3(ptr nocapture readonly byval([3 x i8]) %data) #0 {
 ; P8LE-NEXT:    mflr r0
 ; P8LE-NEXT:    stdu r1, -64(r1)
 ; P8LE-NEXT:    std r0, 80(r1)
-; P8LE-NEXT:    rldicl r4, r3, 48, 16
 ; P8LE-NEXT:    sth r3, 48(r1)
-; P8LE-NEXT:    stb r4, 50(r1)
+; P8LE-NEXT:    rldicl r3, r3, 48, 16
+; P8LE-NEXT:    stb r3, 50(r1)
 ; P8LE-NEXT:    lhz r3, 48(r1)
 ; P8LE-NEXT:    lbz r4, 50(r1)
 ; P8LE-NEXT:    sth r3, 61(r1)
@@ -504,10 +504,10 @@ define signext i8 @caller_5(ptr nocapture readonly byval([5 x i8]) %data) #0 {
 ; P8LE-NEXT:    rldicl r4, r3, 32, 32
 ; P8LE-NEXT:    std r0, 80(r1)
 ; P8LE-NEXT:    stw r3, 48(r1)
-; P8LE-NEXT:    stb r4, 52(r1)
-; P8LE-NEXT:    lbz r4, 52(r1)
 ; P8LE-NEXT:    stw r3, 59(r1)
 ; P8LE-NEXT:    addi r3, r1, 59
+; P8LE-NEXT:    stb r4, 52(r1)
+; P8LE-NEXT:    lbz r4, 52(r1)
 ; P8LE-NEXT:    stb r4, 63(r1)
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
@@ -645,9 +645,9 @@ define signext i8 @caller_6(ptr nocapture readonly byval([6 x i8]) %data) #0 {
 ; P8LE-NEXT:    mflr r0
 ; P8LE-NEXT:    stdu r1, -64(r1)
 ; P8LE-NEXT:    std r0, 80(r1)
-; P8LE-NEXT:    rldicl r4, r3, 32, 32
 ; P8LE-NEXT:    stw r3, 48(r1)
-; P8LE-NEXT:    sth r4, 52(r1)
+; P8LE-NEXT:    rldicl r3, r3, 32, 32
+; P8LE-NEXT:    sth r3, 52(r1)
 ; P8LE-NEXT:    lwz r3, 48(r1)
 ; P8LE-NEXT:    lhz r4, 52(r1)
 ; P8LE-NEXT:    stw r3, 58(r1)
@@ -798,17 +798,17 @@ define signext i8 @caller_7(ptr nocapture readonly byval([7 x i8]) %data) #0 {
 ; P8LE-NEXT:    mflr r0
 ; P8LE-NEXT:    stdu r1, -64(r1)
 ; P8LE-NEXT:    rldicl r4, r3, 32, 32
-; P8LE-NEXT:    rldicl r5, r3, 16, 48
 ; P8LE-NEXT:    std r0, 80(r1)
 ; P8LE-NEXT:    stw r3, 48(r1)
+; P8LE-NEXT:    stw r3, 57(r1)
 ; P8LE-NEXT:    sth r4, 52(r1)
-; P8LE-NEXT:    stb r5, 54(r1)
+; P8LE-NEXT:    rldicl r4, r3, 16, 48
+; P8LE-NEXT:    stb r4, 54(r1)
 ; P8LE-NEXT:    lhz r4, 52(r1)
-; P8LE-NEXT:    lbz r5, 54(r1)
-; P8LE-NEXT:    stw r3, 57(r1)
-; P8LE-NEXT:    addi r3, r1, 57
+; P8LE-NEXT:    lbz r3, 54(r1)
 ; P8LE-NEXT:    sth r4, 61(r1)
-; P8LE-NEXT:    stb r5, 63(r1)
+; P8LE-NEXT:    stb r3, 63(r1)
+; P8LE-NEXT:    addi r3, r1, 57
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
 ; P8LE-NEXT:    li r3, 0
@@ -868,18 +868,18 @@ define signext i8 @caller_7(ptr nocapture readonly byval([7 x i8]) %data) #0 {
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
 ; P8BE-NEXT:    stdu r1, -128(r1)
-; P8BE-NEXT:    rldicl r4, r3, 56, 8
 ; P8BE-NEXT:    std r0, 144(r1)
+; P8BE-NEXT:    rldicl r4, r3, 56, 8
 ; P8BE-NEXT:    stb r3, 183(r1)
 ; P8BE-NEXT:    rldicl r3, r3, 40, 24
-; P8BE-NEXT:    sth r4, 181(r1)
-; P8BE-NEXT:    lbz r5, 183(r1)
-; P8BE-NEXT:    lhz r4, 181(r1)
 ; P8BE-NEXT:    stw r3, 177(r1)
 ; P8BE-NEXT:    stw r3, 121(r1)
+; P8BE-NEXT:    lbz r3, 183(r1)
+; P8BE-NEXT:    sth r4, 181(r1)
+; P8BE-NEXT:    lhz r4, 181(r1)
+; P8BE-NEXT:    stb r3, 127(r1)
 ; P8BE-NEXT:    addi r3, r1, 121
 ; P8BE-NEXT:    sth r4, 125(r1)
-; P8BE-NEXT:    stb r5, 127(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
index 56105426e2f6848..bd528731d239483 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
@@ -97,8 +97,8 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P8-NEXT:    clrldi r3, r3, 32
 ; BE-P8-NEXT:    addi r1, r1, 128
 ; BE-P8-NEXT:    ld r0, 16(r1)
-; BE-P8-NEXT:    hashchk r0, -16(r1)
 ; BE-P8-NEXT:    mtlr r0
+; BE-P8-NEXT:    hashchk r0, -16(r1)
 ; BE-P8-NEXT:    blr
 ;
 ; BE-32BIT-P10-LABEL: caller:
@@ -151,8 +151,8 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-32BIT-P8-NEXT:    lwz r31, 76(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    addi r1, r1, 80
 ; BE-32BIT-P8-NEXT:    lwz r0, 8(r1)
-; BE-32BIT-P8-NEXT:    hashchk r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    mtlr r0
+; BE-32BIT-P8-NEXT:    hashchk r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    blr
 ;
 ; BE-P10-PRIV-LABEL: caller:
@@ -208,8 +208,8 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P8-PRIV-NEXT:    clrldi r3, r3, 32
 ; BE-P8-PRIV-NEXT:    addi r1, r1, 128
 ; BE-P8-PRIV-NEXT:    ld r0, 16(r1)
-; BE-P8-PRIV-NEXT:    hashchkp r0, -16(r1)
 ; BE-P8-PRIV-NEXT:    mtlr r0
+; BE-P8-PRIV-NEXT:    hashchkp r0, -16(r1)
 ; BE-P8-PRIV-NEXT:    blr
 ;
 ; BE-32BIT-P10-PRIV-LABEL: caller:
@@ -262,8 +262,8 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r31, 76(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    addi r1, r1, 80
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r0, 8(r1)
-; BE-32BIT-P8-PRIV-NEXT:    hashchkp r0, -16(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    mtlr r0
+; BE-32BIT-P8-PRIV-NEXT:    hashchkp r0, -16(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    blr
 entry:
   %call = tail call zeroext i32 @callee(i32 zeroext %in)
@@ -534,8 +534,8 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-LABEL: spill:
 ; BE-P8:       # %bb.0: # %entry
 ; BE-P8-NEXT:    mfcr r12
-; BE-P8-NEXT:    mflr r0
 ; BE-P8-NEXT:    stw r12, 8(r1)
+; BE-P8-NEXT:    mflr r0
 ; BE-P8-NEXT:    stdu r1, -624(r1)
 ; BE-P8-NEXT:    li r4, 144
 ; BE-P8-NEXT:    std r0, 640(r1)
@@ -543,64 +543,64 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; BE-P8-NEXT:    li r4, 160
 ; BE-P8-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; BE-P8-NEXT:    li r4, 160
 ; BE-P8-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; BE-P8-NEXT:    li r4, 176
 ; BE-P8-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; BE-P8-NEXT:    li r4, 176
 ; BE-P8-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r28, 448(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r29, 456(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r30, 464(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    std r31, 472(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 192
+; BE-P8-NEXT:    std r31, 472(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 208
-; BE-P8-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 224
-; BE-P8-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f26, 576(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f27, 584(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f28, 592(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f29, 600(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f30, 608(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 240
-; BE-P8-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f31, 616(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 256
-; BE-P8-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 272
-; BE-P8-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 288
-; BE-P8-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 304
-; BE-P8-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 320
-; BE-P8-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    lwz r4, 12(r3)
-; BE-P8-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f26, 576(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f27, 584(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f28, 592(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f29, 600(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f30, 608(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f31, 616(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stw r4, 132(r1)
 ; BE-P8-NEXT:    #APP
 ; BE-P8-NEXT:    nop
@@ -616,55 +616,55 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-NEXT:    lfd f28, 592(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r30, 464(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r29, 456(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    lwz r4, 16(r4)
 ; BE-P8-NEXT:    lfd f27, 584(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lfd f26, 576(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r28, 448(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    ld r27, 440(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lwz r4, 16(r4)
 ; BE-P8-NEXT:    lfd f25, 568(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lfd f24, 560(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    ld r27, 440(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r26, 432(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    add r3, r4, r3
-; BE-P8-NEXT:    li r4, 320
 ; BE-P8-NEXT:    lfd f23, 552(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lfd f22, 544(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r25, 424(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r24, 416(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
-; BE-P8-NEXT:    li r4, 304
 ; BE-P8-NEXT:    lfd f21, 536(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r23, 408(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r22, 400(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    clrldi r3, r3, 32
-; BE-P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
-; BE-P8-NEXT:    li r4, 288
-; BE-P8-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    add r3, r4, r3
+; BE-P8-NEXT:    li r4, 320
+; BE-P8-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r21, 392(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r20, 384(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
-; BE-P8-NEXT:    li r4, 272
-; BE-P8-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
+; BE-P8-NEXT:    li r4, 304
+; BE-P8-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r19, 376(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r18, 368(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
-; BE-P8-NEXT:    li r4, 256
-; BE-P8-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r17, 360(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r16, 352(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
-; BE-P8-NEXT:    li r4, 240
-; BE-P8-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; BE-P8-NEXT:    li r4, 288
 ; BE-P8-NEXT:    ld r15, 344(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r14, 336(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    clrldi r3, r3, 32
+; BE-P8-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
+; BE-P8-NEXT:    li r4, 272
+; BE-P8-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
+; BE-P8-NEXT:    li r4, 256
+; BE-P8-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
+; BE-P8-NEXT:    li r4, 240
 ; BE-P8-NEXT:    lxvd2x v26, r1, r4 # 16-byte Folded Reload
 ; BE-P8-NEXT:    li r4, 224
-; BE-P8-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lxvd2x v25, r1, r4 # 16-byte Folded Reload
 ; BE-P8-NEXT:    li r4, 208
-; BE-P8-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lxvd2x v24, r1, r4 # 16-byte Folded Reload
 ; BE-P8-NEXT:    li r4, 192
-; BE-P8-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lxvd2x v23, r1, r4 # 16-byte Folded Reload
 ; BE-P8-NEXT:    li r4, 176
 ; BE-P8-NEXT:    lxvd2x v22, r1, r4 # 16-byte Folded Reload
@@ -675,9 +675,9 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-NEXT:    addi r1, r1, 624
 ; BE-P8-NEXT:    ld r0, 16(r1)
 ; BE-P8-NEXT:    lwz r12, 8(r1)
-; BE-P8-NEXT:    mtocrf 32, r12
 ; BE-P8-NEXT:    hashchk r0, -488(r1)
 ; BE-P8-NEXT:    mtlr r0
+; BE-P8-NEXT:    mtocrf 32, r12
 ; BE-P8-NEXT:    mtocrf 16, r12
 ; BE-P8-NEXT:    mtocrf 8, r12
 ; BE-P8-NEXT:    blr
@@ -941,74 +941,74 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-LABEL: spill:
 ; BE-32BIT-P8:       # %bb.0: # %entry
 ; BE-32BIT-P8-NEXT:    mfcr r12
-; BE-32BIT-P8-NEXT:    mflr r0
 ; BE-32BIT-P8-NEXT:    stw r12, 4(r1)
+; BE-32BIT-P8-NEXT:    mflr r0
 ; BE-32BIT-P8-NEXT:    stwu r1, -496(r1)
 ; BE-32BIT-P8-NEXT:    li r4, 80
 ; BE-32BIT-P8-NEXT:    stw r0, 504(r1)
 ; BE-32BIT-P8-NEXT:    hashst r0, -424(r1)
 ; BE-32BIT-P8-NEXT:    stw r13, 276(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 96
 ; BE-32BIT-P8-NEXT:    stw r14, 280(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 112
 ; BE-32BIT-P8-NEXT:    stw r15, 284(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 128
 ; BE-32BIT-P8-NEXT:    stw r16, 288(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 144
 ; BE-32BIT-P8-NEXT:    stw r17, 292(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 160
+; BE-32BIT-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 96
 ; BE-32BIT-P8-NEXT:    stw r18, 296(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 176
 ; BE-32BIT-P8-NEXT:    stw r19, 300(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 192
 ; BE-32BIT-P8-NEXT:    stw r20, 304(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 208
 ; BE-32BIT-P8-NEXT:    stw r21, 308(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 224
 ; BE-32BIT-P8-NEXT:    stw r22, 312(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 240
 ; BE-32BIT-P8-NEXT:    stw r23, 316(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    li r4, 256
+; BE-32BIT-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 112
 ; BE-32BIT-P8-NEXT:    stw r24, 320(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-NEXT:    lwz r4, 12(r3)
 ; BE-32BIT-P8-NEXT:    stw r25, 324(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r26, 328(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r27, 332(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r28, 336(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r29, 340(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 128
 ; BE-32BIT-P8-NEXT:    stw r30, 344(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r31, 348(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f14, 352(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f15, 360(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f16, 368(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f17, 376(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 144
 ; BE-32BIT-P8-NEXT:    stfd f18, 384(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f19, 392(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f20, 400(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f21, 408(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f22, 416(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f23, 424(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 160
 ; BE-32BIT-P8-NEXT:    stfd f24, 432(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f25, 440(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f26, 448(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f27, 456(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f28, 464(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f29, 472(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 176
 ; BE-32BIT-P8-NEXT:    stfd f30, 480(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f31, 488(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r3, 64(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 192
+; BE-32BIT-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 208
+; BE-32BIT-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 224
+; BE-32BIT-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 240
+; BE-32BIT-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    li r4, 256
+; BE-32BIT-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-NEXT:    lwz r4, 12(r3)
 ; BE-32BIT-P8-NEXT:    stw r4, 68(r1)
 ; BE-32BIT-P8-NEXT:    #APP
 ; BE-32BIT-P8-NEXT:    nop
@@ -1024,55 +1024,55 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    lfd f28, 464(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r30, 344(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r29, 340(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-NEXT:    lwz r4, 16(r4)
 ; BE-32BIT-P8-NEXT:    lfd f27, 456(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lfd f26, 448(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r28, 336(r1) # 4-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lwz r27, 332(r1) # 4-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lwz r4, 16(r4)
 ; BE-32BIT-P8-NEXT:    lfd f25, 440(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lfd f24, 432(r1) # 8-byte Folded Reload
-; BE-32BIT-P8-NEXT:    lwz r27, 332(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r26, 328(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-NEXT:    add r3, r4, r3
-; BE-32BIT-P8-NEXT:    li r4, 256
 ; BE-32BIT-P8-NEXT:    lfd f23, 424(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lfd f22, 416(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r25, 324(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r24, 320(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
-; BE-32BIT-P8-NEXT:    li r4, 240
 ; BE-32BIT-P8-NEXT:    lfd f21, 408(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f20, 400(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r23, 316(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r22, 312(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
-; BE-32BIT-P8-NEXT:    li r4, 224
-; BE-32BIT-P8-NEXT:    lfd f20, 400(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    add r3, r4, r3
+; BE-32BIT-P8-NEXT:    li r4, 256
+; BE-32BIT-P8-NEXT:    lfd f19, 392(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f18, 384(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f17, 376(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f16, 368(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r21, 308(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r20, 304(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
-; BE-32BIT-P8-NEXT:    li r4, 208
-; BE-32BIT-P8-NEXT:    lfd f19, 392(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
+; BE-32BIT-P8-NEXT:    li r4, 240
+; BE-32BIT-P8-NEXT:    lfd f15, 360(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r19, 300(r1) # 4-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f14, 352(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r18, 296(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
-; BE-32BIT-P8-NEXT:    li r4, 192
-; BE-32BIT-P8-NEXT:    lfd f18, 384(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r17, 292(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r16, 288(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
-; BE-32BIT-P8-NEXT:    li r4, 176
-; BE-32BIT-P8-NEXT:    lfd f17, 376(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; BE-32BIT-P8-NEXT:    li r4, 224
 ; BE-32BIT-P8-NEXT:    lwz r15, 284(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r14, 280(r1) # 4-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lwz r13, 276(r1) # 4-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
+; BE-32BIT-P8-NEXT:    li r4, 208
+; BE-32BIT-P8-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
+; BE-32BIT-P8-NEXT:    li r4, 192
+; BE-32BIT-P8-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
+; BE-32BIT-P8-NEXT:    li r4, 176
 ; BE-32BIT-P8-NEXT:    lxvd2x v26, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 160
-; BE-32BIT-P8-NEXT:    lfd f16, 368(r1) # 8-byte Folded Reload
-; BE-32BIT-P8-NEXT:    lwz r13, 276(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v25, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 144
-; BE-32BIT-P8-NEXT:    lfd f15, 360(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v24, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 128
-; BE-32BIT-P8-NEXT:    lfd f14, 352(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v23, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 112
 ; BE-32BIT-P8-NEXT:    lxvd2x v22, r1, r4 # 16-byte Folded Reload
@@ -1083,9 +1083,9 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    addi r1, r1, 496
 ; BE-32BIT-P8-NEXT:    lwz r0, 8(r1)
 ; BE-32BIT-P8-NEXT:    lwz r12, 4(r1)
-; BE-32BIT-P8-NEXT:    mtocrf 32, r12
 ; BE-32BIT-P8-NEXT:    hashchk r0, -424(r1)
 ; BE-32BIT-P8-NEXT:    mtlr r0
+; BE-32BIT-P8-NEXT:    mtocrf 32, r12
 ; BE-32BIT-P8-NEXT:    mtocrf 16, r12
 ; BE-32BIT-P8-NEXT:    mtocrf 8, r12
 ; BE-32BIT-P8-NEXT:    blr
@@ -1347,8 +1347,8 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-LABEL: spill:
 ; BE-P8-PRIV:       # %bb.0: # %entry
 ; BE-P8-PRIV-NEXT:    mfcr r12
-; BE-P8-PRIV-NEXT:    mflr r0
 ; BE-P8-PRIV-NEXT:    stw r12, 8(r1)
+; BE-P8-PRIV-NEXT:    mflr r0
 ; BE-P8-PRIV-NEXT:    stdu r1, -624(r1)
 ; BE-P8-PRIV-NEXT:    li r4, 144
 ; BE-P8-PRIV-NEXT:    std r0, 640(r1)
@@ -1356,64 +1356,64 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; BE-P8-PRIV-NEXT:    li r4, 160
 ; BE-P8-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; BE-P8-PRIV-NEXT:    li r4, 160
 ; BE-P8-PRIV-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; BE-P8-PRIV-NEXT:    li r4, 176
 ; BE-P8-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; BE-P8-PRIV-NEXT:    li r4, 176
 ; BE-P8-PRIV-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r28, 448(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r29, 456(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r30, 464(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    std r31, 472(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 192
+; BE-P8-PRIV-NEXT:    std r31, 472(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 208
-; BE-P8-PRIV-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 224
-; BE-P8-PRIV-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f26, 576(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f27, 584(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f28, 592(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f29, 600(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f30, 608(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 240
-; BE-P8-PRIV-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f31, 616(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 256
-; BE-P8-PRIV-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 272
-; BE-P8-PRIV-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 288
-; BE-P8-PRIV-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 304
-; BE-P8-PRIV-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 320
-; BE-P8-PRIV-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    lwz r4, 12(r3)
-; BE-P8-PRIV-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f26, 576(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f27, 584(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f28, 592(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f29, 600(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f30, 608(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f31, 616(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stw r4, 132(r1)
 ; BE-P8-PRIV-NEXT:    #APP
 ; BE-P8-PRIV-NEXT:    nop
@@ -1429,55 +1429,55 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    lfd f28, 592(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r30, 464(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r29, 456(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    lwz r4, 16(r4)
 ; BE-P8-PRIV-NEXT:    lfd f27, 584(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lfd f26, 576(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r28, 448(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    ld r27, 440(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lwz r4, 16(r4)
 ; BE-P8-PRIV-NEXT:    lfd f25, 568(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lfd f24, 560(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    ld r27, 440(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r26, 432(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    add r3, r4, r3
-; BE-P8-PRIV-NEXT:    li r4, 320
 ; BE-P8-PRIV-NEXT:    lfd f23, 552(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lfd f22, 544(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r25, 424(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r24, 416(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
-; BE-P8-PRIV-NEXT:    li r4, 304
 ; BE-P8-PRIV-NEXT:    lfd f21, 536(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r23, 408(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r22, 400(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    clrldi r3, r3, 32
-; BE-P8-PRIV-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
-; BE-P8-PRIV-NEXT:    li r4, 288
-; BE-P8-PRIV-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    add r3, r4, r3
+; BE-P8-PRIV-NEXT:    li r4, 320
+; BE-P8-PRIV-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r21, 392(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r20, 384(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
-; BE-P8-PRIV-NEXT:    li r4, 272
-; BE-P8-PRIV-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
+; BE-P8-PRIV-NEXT:    li r4, 304
+; BE-P8-PRIV-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r19, 376(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r18, 368(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
-; BE-P8-PRIV-NEXT:    li r4, 256
-; BE-P8-PRIV-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r17, 360(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r16, 352(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
-; BE-P8-PRIV-NEXT:    li r4, 240
-; BE-P8-PRIV-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; BE-P8-PRIV-NEXT:    li r4, 288
 ; BE-P8-PRIV-NEXT:    ld r15, 344(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r14, 336(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    clrldi r3, r3, 32
+; BE-P8-PRIV-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
+; BE-P8-PRIV-NEXT:    li r4, 272
+; BE-P8-PRIV-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
+; BE-P8-PRIV-NEXT:    li r4, 256
+; BE-P8-PRIV-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
+; BE-P8-PRIV-NEXT:    li r4, 240
 ; BE-P8-PRIV-NEXT:    lxvd2x v26, r1, r4 # 16-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    li r4, 224
-; BE-P8-PRIV-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lxvd2x v25, r1, r4 # 16-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    li r4, 208
-; BE-P8-PRIV-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lxvd2x v24, r1, r4 # 16-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    li r4, 192
-; BE-P8-PRIV-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lxvd2x v23, r1, r4 # 16-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    li r4, 176
 ; BE-P8-PRIV-NEXT:    lxvd2x v22, r1, r4 # 16-byte Folded Reload
@@ -1488,9 +1488,9 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    addi r1, r1, 624
 ; BE-P8-PRIV-NEXT:    ld r0, 16(r1)
 ; BE-P8-PRIV-NEXT:    lwz r12, 8(r1)
-; BE-P8-PRIV-NEXT:    mtocrf 32, r12
 ; BE-P8-PRIV-NEXT:    hashchkp r0, -488(r1)
 ; BE-P8-PRIV-NEXT:    mtlr r0
+; BE-P8-PRIV-NEXT:    mtocrf 32, r12
 ; BE-P8-PRIV-NEXT:    mtocrf 16, r12
 ; BE-P8-PRIV-NEXT:    mtocrf 8, r12
 ; BE-P8-PRIV-NEXT:    blr
@@ -1754,74 +1754,74 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-PRIV-LABEL: spill:
 ; BE-32BIT-P8-PRIV:       # %bb.0: # %entry
 ; BE-32BIT-P8-PRIV-NEXT:    mfcr r12
-; BE-32BIT-P8-PRIV-NEXT:    mflr r0
 ; BE-32BIT-P8-PRIV-NEXT:    stw r12, 4(r1)
+; BE-32BIT-P8-PRIV-NEXT:    mflr r0
 ; BE-32BIT-P8-PRIV-NEXT:    stwu r1, -496(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 80
 ; BE-32BIT-P8-PRIV-NEXT:    stw r0, 504(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    hashstp r0, -424(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    stw r13, 276(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 96
 ; BE-32BIT-P8-PRIV-NEXT:    stw r14, 280(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 112
 ; BE-32BIT-P8-PRIV-NEXT:    stw r15, 284(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 128
 ; BE-32BIT-P8-PRIV-NEXT:    stw r16, 288(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 144
 ; BE-32BIT-P8-PRIV-NEXT:    stw r17, 292(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 160
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 96
 ; BE-32BIT-P8-PRIV-NEXT:    stw r18, 296(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 176
 ; BE-32BIT-P8-PRIV-NEXT:    stw r19, 300(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 192
 ; BE-32BIT-P8-PRIV-NEXT:    stw r20, 304(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 208
 ; BE-32BIT-P8-PRIV-NEXT:    stw r21, 308(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 224
 ; BE-32BIT-P8-PRIV-NEXT:    stw r22, 312(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 240
 ; BE-32BIT-P8-PRIV-NEXT:    stw r23, 316(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 256
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 112
 ; BE-32BIT-P8-PRIV-NEXT:    stw r24, 320(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    lwz r4, 12(r3)
 ; BE-32BIT-P8-PRIV-NEXT:    stw r25, 324(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stw r26, 328(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stw r27, 332(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stw r28, 336(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stw r29, 340(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 128
 ; BE-32BIT-P8-PRIV-NEXT:    stw r30, 344(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stw r31, 348(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f14, 352(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f15, 360(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f16, 368(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f17, 376(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 144
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f18, 384(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f19, 392(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f20, 400(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f21, 408(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f22, 416(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f23, 424(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 160
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f24, 432(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f25, 440(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f26, 448(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f27, 456(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f28, 464(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f29, 472(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 176
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f30, 480(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stfd f31, 488(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stw r3, 64(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 192
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 208
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 224
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 240
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 256
+; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    lwz r4, 12(r3)
 ; BE-32BIT-P8-PRIV-NEXT:    stw r4, 68(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    #APP
 ; BE-32BIT-P8-PRIV-NEXT:    nop
@@ -1837,55 +1837,55 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-PRIV-NEXT:    lfd f28, 464(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r30, 344(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r29, 340(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    lwz r4, 16(r4)
 ; BE-32BIT-P8-PRIV-NEXT:    lfd f27, 456(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lfd f26, 448(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r28, 336(r1) # 4-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lwz r27, 332(r1) # 4-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lwz r4, 16(r4)
 ; BE-32BIT-P8-PRIV-NEXT:    lfd f25, 440(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lfd f24, 432(r1) # 8-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    lwz r27, 332(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r26, 328(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    add r3, r4, r3
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 256
 ; BE-32BIT-P8-PRIV-NEXT:    lfd f23, 424(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lfd f22, 416(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r25, 324(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r24, 320(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 240
 ; BE-32BIT-P8-PRIV-NEXT:    lfd f21, 408(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lfd f20, 400(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r23, 316(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r22, 312(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 224
-; BE-32BIT-P8-PRIV-NEXT:    lfd f20, 400(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    add r3, r4, r3
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 256
+; BE-32BIT-P8-PRIV-NEXT:    lfd f19, 392(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lfd f18, 384(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lfd f17, 376(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lfd f16, 368(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r21, 308(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r20, 304(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 208
-; BE-32BIT-P8-PRIV-NEXT:    lfd f19, 392(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 240
+; BE-32BIT-P8-PRIV-NEXT:    lfd f15, 360(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r19, 300(r1) # 4-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lfd f14, 352(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r18, 296(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 192
-; BE-32BIT-P8-PRIV-NEXT:    lfd f18, 384(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r17, 292(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r16, 288(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    li r4, 176
-; BE-32BIT-P8-PRIV-NEXT:    lfd f17, 376(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 224
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r15, 284(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r14, 280(r1) # 4-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lwz r13, 276(r1) # 4-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 208
+; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 192
+; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
+; BE-32BIT-P8-PRIV-NEXT:    li r4, 176
 ; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v26, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 160
-; BE-32BIT-P8-PRIV-NEXT:    lfd f16, 368(r1) # 8-byte Folded Reload
-; BE-32BIT-P8-PRIV-NEXT:    lwz r13, 276(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v25, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 144
-; BE-32BIT-P8-PRIV-NEXT:    lfd f15, 360(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v24, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 128
-; BE-32BIT-P8-PRIV-NEXT:    lfd f14, 352(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v23, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 112
 ; BE-32BIT-P8-PRIV-NEXT:    lxvd2x v22, r1, r4 # 16-byte Folded Reload
@@ -1896,9 +1896,9 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-PRIV-NEXT:    addi r1, r1, 496
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r0, 8(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r12, 4(r1)
-; BE-32BIT-P8-PRIV-NEXT:    mtocrf 32, r12
 ; BE-32BIT-P8-PRIV-NEXT:    hashchkp r0, -424(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    mtlr r0
+; BE-32BIT-P8-PRIV-NEXT:    mtocrf 32, r12
 ; BE-32BIT-P8-PRIV-NEXT:    mtocrf 16, r12
 ; BE-32BIT-P8-PRIV-NEXT:    mtocrf 8, r12
 ; BE-32BIT-P8-PRIV-NEXT:    blr
@@ -2082,8 +2082,8 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    add r3, r4, r3
 ; BE-32BIT-P8-NEXT:    addi r1, r1, 80
 ; BE-32BIT-P8-NEXT:    lwz r0, 8(r1)
-; BE-32BIT-P8-NEXT:    hashchk r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    mtlr r0
+; BE-32BIT-P8-NEXT:    hashchk r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    blr
 ; BE-32BIT-P8-NEXT:  L..BB2_2:
 ; BE-32BIT-P8-NEXT:    li r3, 0
@@ -2253,8 +2253,8 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P8-PRIV-NEXT:    add r3, r4, r3
 ; BE-32BIT-P8-PRIV-NEXT:    addi r1, r1, 80
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r0, 8(r1)
-; BE-32BIT-P8-PRIV-NEXT:    hashchkp r0, -16(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    mtlr r0
+; BE-32BIT-P8-PRIV-NEXT:    hashchkp r0, -16(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    blr
 ; BE-32BIT-P8-PRIV-NEXT:  L..BB2_2:
 ; BE-32BIT-P8-PRIV-NEXT:    li r3, 0
@@ -2379,25 +2379,25 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; BE-P8-NEXT:    clrldi r0, r1, 49
 ; BE-P8-NEXT:    subc r0, r12, r0
 ; BE-P8-NEXT:    stdux r1, r1, r0
+; BE-P8-NEXT:    lis r4, 0
 ; BE-P8-NEXT:    std r31, -8(r30) # 8-byte Folded Spill
 ; BE-P8-NEXT:    mr r31, r3
 ; BE-P8-NEXT:    lwz r3, 4(r3)
-; BE-P8-NEXT:    lis r6, 0
-; BE-P8-NEXT:    ori r6, r6, 65508
-; BE-P8-NEXT:    lwz r4, 12(r31)
-; BE-P8-NEXT:    lwz r5, 20(r31)
-; BE-P8-NEXT:    stwx r3, r1, r6
-; BE-P8-NEXT:    lis r3, 0
-; BE-P8-NEXT:    ori r3, r3, 32768
-; BE-P8-NEXT:    stw r5, 32764(r1)
 ; BE-P8-NEXT:    addi r5, r1, 32764
-; BE-P8-NEXT:    stwx r4, r1, r3
-; BE-P8-NEXT:    lis r3, 0
+; BE-P8-NEXT:    ori r4, r4, 65508
+; BE-P8-NEXT:    stwx r3, r1, r4
+; BE-P8-NEXT:    lis r4, 0
+; BE-P8-NEXT:    lwz r3, 12(r31)
+; BE-P8-NEXT:    ori r4, r4, 32768
+; BE-P8-NEXT:    stwx r3, r1, r4
+; BE-P8-NEXT:    lwz r3, 20(r31)
 ; BE-P8-NEXT:    lis r4, 0
-; BE-P8-NEXT:    ori r3, r3, 32768
 ; BE-P8-NEXT:    ori r4, r4, 65508
-; BE-P8-NEXT:    add r3, r1, r3
+; BE-P8-NEXT:    stw r3, 32764(r1)
+; BE-P8-NEXT:    lis r3, 0
 ; BE-P8-NEXT:    add r4, r1, r4
+; BE-P8-NEXT:    ori r3, r3, 32768
+; BE-P8-NEXT:    add r3, r1, r3
 ; BE-P8-NEXT:    bl .callee3[PR]
 ; BE-P8-NEXT:    nop
 ; BE-P8-NEXT:    lwz r4, 16(r31)
@@ -2506,25 +2506,25 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    clrlwi r0, r1, 17
 ; BE-32BIT-P8-NEXT:    subc r0, r12, r0
 ; BE-32BIT-P8-NEXT:    stwux r1, r1, r0
+; BE-32BIT-P8-NEXT:    lis r4, 0
 ; BE-32BIT-P8-NEXT:    stw r31, -4(r30) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    mr r31, r3
 ; BE-32BIT-P8-NEXT:    lwz r3, 4(r3)
-; BE-32BIT-P8-NEXT:    lis r6, 0
-; BE-32BIT-P8-NEXT:    ori r6, r6, 65516
-; BE-32BIT-P8-NEXT:    lwz r4, 12(r31)
-; BE-32BIT-P8-NEXT:    lwz r5, 20(r31)
-; BE-32BIT-P8-NEXT:    stwx r3, r1, r6
-; BE-32BIT-P8-NEXT:    lis r3, 0
-; BE-32BIT-P8-NEXT:    ori r3, r3, 32768
-; BE-32BIT-P8-NEXT:    stw r5, 32764(r1)
 ; BE-32BIT-P8-NEXT:    addi r5, r1, 32764
-; BE-32BIT-P8-NEXT:    stwx r4, r1, r3
-; BE-32BIT-P8-NEXT:    lis r3, 0
+; BE-32BIT-P8-NEXT:    ori r4, r4, 65516
+; BE-32BIT-P8-NEXT:    stwx r3, r1, r4
+; BE-32BIT-P8-NEXT:    lis r4, 0
+; BE-32BIT-P8-NEXT:    lwz r3, 12(r31)
+; BE-32BIT-P8-NEXT:    ori r4, r4, 32768
+; BE-32BIT-P8-NEXT:    stwx r3, r1, r4
+; BE-32BIT-P8-NEXT:    lwz r3, 20(r31)
 ; BE-32BIT-P8-NEXT:    lis r4, 0
-; BE-32BIT-P8-NEXT:    ori r3, r3, 32768
 ; BE-32BIT-P8-NEXT:    ori r4, r4, 65516
-; BE-32BIT-P8-NEXT:    add r3, r1, r3
+; BE-32BIT-P8-NEXT:    stw r3, 32764(r1)
+; BE-32BIT-P8-NEXT:    lis r3, 0
 ; BE-32BIT-P8-NEXT:    add r4, r1, r4
+; BE-32BIT-P8-NEXT:    ori r3, r3, 32768
+; BE-32BIT-P8-NEXT:    add r3, r1, r3
 ; BE-32BIT-P8-NEXT:    bl .callee3[PR]
 ; BE-32BIT-P8-NEXT:    nop
 ; BE-32BIT-P8-NEXT:    lwz r4, 16(r31)
@@ -2634,25 +2634,25 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    clrldi r0, r1, 49
 ; BE-P8-PRIV-NEXT:    subc r0, r12, r0
 ; BE-P8-PRIV-NEXT:    stdux r1, r1, r0
+; BE-P8-PRIV-NEXT:    lis r4, 0
 ; BE-P8-PRIV-NEXT:    std r31, -8(r30) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    mr r31, r3
 ; BE-P8-PRIV-NEXT:    lwz r3, 4(r3)
-; BE-P8-PRIV-NEXT:    lis r6, 0
-; BE-P8-PRIV-NEXT:    ori r6, r6, 65508
-; BE-P8-PRIV-NEXT:    lwz r4, 12(r31)
-; BE-P8-PRIV-NEXT:    lwz r5, 20(r31)
-; BE-P8-PRIV-NEXT:    stwx r3, r1, r6
-; BE-P8-PRIV-NEXT:    lis r3, 0
-; BE-P8-PRIV-NEXT:    ori r3, r3, 32768
-; BE-P8-PRIV-NEXT:    stw r5, 32764(r1)
 ; BE-P8-PRIV-NEXT:    addi r5, r1, 32764
-; BE-P8-PRIV-NEXT:    stwx r4, r1, r3
-; BE-P8-PRIV-NEXT:    lis r3, 0
+; BE-P8-PRIV-NEXT:    ori r4, r4, 65508
+; BE-P8-PRIV-NEXT:    stwx r3, r1, r4
+; BE-P8-PRIV-NEXT:    lis r4, 0
+; BE-P8-PRIV-NEXT:    lwz r3, 12(r31)
+; BE-P8-PRIV-NEXT:    ori r4, r4, 32768
+; BE-P8-PRIV-NEXT:    stwx r3, r1, r4
+; BE-P8-PRIV-NEXT:    lwz r3, 20(r31)
 ; BE-P8-PRIV-NEXT:    lis r4, 0
-; BE-P8-PRIV-NEXT:    ori r3, r3, 32768
 ; BE-P8-PRIV-NEXT:    ori r4, r4, 65508
-; BE-P8-PRIV-NEXT:    add r3, r1, r3
+; BE-P8-PRIV-NEXT:    stw r3, 32764(r1)
+; BE-P8-PRIV-NEXT:    lis r3, 0
 ; BE-P8-PRIV-NEXT:    add r4, r1, r4
+; BE-P8-PRIV-NEXT:    ori r3, r3, 32768
+; BE-P8-PRIV-NEXT:    add r3, r1, r3
 ; BE-P8-PRIV-NEXT:    bl .callee3[PR]
 ; BE-P8-PRIV-NEXT:    nop
 ; BE-P8-PRIV-NEXT:    lwz r4, 16(r31)
@@ -2761,25 +2761,25 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-PRIV-NEXT:    clrlwi r0, r1, 17
 ; BE-32BIT-P8-PRIV-NEXT:    subc r0, r12, r0
 ; BE-32BIT-P8-PRIV-NEXT:    stwux r1, r1, r0
+; BE-32BIT-P8-PRIV-NEXT:    lis r4, 0
 ; BE-32BIT-P8-PRIV-NEXT:    stw r31, -4(r30) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    mr r31, r3
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r3, 4(r3)
-; BE-32BIT-P8-PRIV-NEXT:    lis r6, 0
-; BE-32BIT-P8-PRIV-NEXT:    ori r6, r6, 65516
-; BE-32BIT-P8-PRIV-NEXT:    lwz r4, 12(r31)
-; BE-32BIT-P8-PRIV-NEXT:    lwz r5, 20(r31)
-; BE-32BIT-P8-PRIV-NEXT:    stwx r3, r1, r6
-; BE-32BIT-P8-PRIV-NEXT:    lis r3, 0
-; BE-32BIT-P8-PRIV-NEXT:    ori r3, r3, 32768
-; BE-32BIT-P8-PRIV-NEXT:    stw r5, 32764(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    addi r5, r1, 32764
-; BE-32BIT-P8-PRIV-NEXT:    stwx r4, r1, r3
-; BE-32BIT-P8-PRIV-NEXT:    lis r3, 0
+; BE-32BIT-P8-PRIV-NEXT:    ori r4, r4, 65516
+; BE-32BIT-P8-PRIV-NEXT:    stwx r3, r1, r4
+; BE-32BIT-P8-PRIV-NEXT:    lis r4, 0
+; BE-32BIT-P8-PRIV-NEXT:    lwz r3, 12(r31)
+; BE-32BIT-P8-PRIV-NEXT:    ori r4, r4, 32768
+; BE-32BIT-P8-PRIV-NEXT:    stwx r3, r1, r4
+; BE-32BIT-P8-PRIV-NEXT:    lwz r3, 20(r31)
 ; BE-32BIT-P8-PRIV-NEXT:    lis r4, 0
-; BE-32BIT-P8-PRIV-NEXT:    ori r3, r3, 32768
 ; BE-32BIT-P8-PRIV-NEXT:    ori r4, r4, 65516
-; BE-32BIT-P8-PRIV-NEXT:    add r3, r1, r3
+; BE-32BIT-P8-PRIV-NEXT:    stw r3, 32764(r1)
+; BE-32BIT-P8-PRIV-NEXT:    lis r3, 0
 ; BE-32BIT-P8-PRIV-NEXT:    add r4, r1, r4
+; BE-32BIT-P8-PRIV-NEXT:    ori r3, r3, 32768
+; BE-32BIT-P8-PRIV-NEXT:    add r3, r1, r3
 ; BE-32BIT-P8-PRIV-NEXT:    bl .callee3[PR]
 ; BE-32BIT-P8-PRIV-NEXT:    nop
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r4, 16(r31)

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
index 46b16728d9259ba..1cfa811807baf73 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
@@ -113,9 +113,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; LE-P8-NEXT:    clrldi r3, r3, 32
 ; LE-P8-NEXT:    addi r1, r1, 64
 ; LE-P8-NEXT:    ld r0, 16(r1)
+; LE-P8-NEXT:    mtlr r0
 ; LE-P8-NEXT:    hashchk r0, -24(r1)
 ; LE-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; LE-P8-NEXT:    mtlr r0
 ; LE-P8-NEXT:    blr
 ;
 ; LE-P10-O0-LABEL: caller:
@@ -236,8 +236,8 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P8-NEXT:    clrldi r3, r3, 32
 ; BE-P8-NEXT:    addi r1, r1, 144
 ; BE-P8-NEXT:    ld r0, 16(r1)
-; BE-P8-NEXT:    hashchk r0, -24(r1)
 ; BE-P8-NEXT:    mtlr r0
+; BE-P8-NEXT:    hashchk r0, -24(r1)
 ; BE-P8-NEXT:    blr
 ;
 ; BE-32BIT-P10-LABEL: caller:
@@ -342,9 +342,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; LE-P8-PRIV-NEXT:    clrldi r3, r3, 32
 ; LE-P8-PRIV-NEXT:    addi r1, r1, 64
 ; LE-P8-PRIV-NEXT:    ld r0, 16(r1)
+; LE-P8-PRIV-NEXT:    mtlr r0
 ; LE-P8-PRIV-NEXT:    hashchkp r0, -24(r1)
 ; LE-P8-PRIV-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; LE-P8-PRIV-NEXT:    mtlr r0
 ; LE-P8-PRIV-NEXT:    blr
 ;
 ; BE-P10-PRIV-LABEL: caller:
@@ -400,8 +400,8 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P8-PRIV-NEXT:    clrldi r3, r3, 32
 ; BE-P8-PRIV-NEXT:    addi r1, r1, 144
 ; BE-P8-PRIV-NEXT:    ld r0, 16(r1)
-; BE-P8-PRIV-NEXT:    hashchkp r0, -24(r1)
 ; BE-P8-PRIV-NEXT:    mtlr r0
+; BE-P8-PRIV-NEXT:    hashchkp r0, -24(r1)
 ; BE-P8-PRIV-NEXT:    blr
 entry:
   %call = tail call zeroext i32 @callee(i32 zeroext %in)
@@ -670,8 +670,8 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-LABEL: spill:
 ; LE-P8:       # %bb.0: # %entry
 ; LE-P8-NEXT:    mfcr r12
-; LE-P8-NEXT:    mflr r0
 ; LE-P8-NEXT:    stw r12, 8(r1)
+; LE-P8-NEXT:    mflr r0
 ; LE-P8-NEXT:    stdu r1, -544(r1)
 ; LE-P8-NEXT:    li r4, 64
 ; LE-P8-NEXT:    std r0, 560(r1)
@@ -679,64 +679,64 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-NEXT:    std r14, 256(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r15, 264(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r16, 272(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; LE-P8-NEXT:    li r4, 80
 ; LE-P8-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r18, 288(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; LE-P8-NEXT:    li r4, 80
 ; LE-P8-NEXT:    std r19, 296(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r20, 304(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; LE-P8-NEXT:    li r4, 96
 ; LE-P8-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r22, 320(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r23, 328(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r24, 336(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; LE-P8-NEXT:    li r4, 96
 ; LE-P8-NEXT:    std r25, 344(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r26, 352(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r27, 360(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r28, 368(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r29, 376(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r30, 384(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    std r31, 392(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 112
+; LE-P8-NEXT:    std r31, 392(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stfd f14, 400(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f15, 408(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f16, 416(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f17, 424(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f18, 432(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 128
-; LE-P8-NEXT:    stfd f15, 408(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f19, 440(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f20, 448(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f21, 456(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f22, 464(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f23, 472(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f24, 480(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 144
-; LE-P8-NEXT:    stfd f16, 416(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f25, 488(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f26, 496(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f27, 504(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f28, 512(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f29, 520(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f30, 528(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 160
-; LE-P8-NEXT:    stfd f17, 424(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f31, 536(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 176
-; LE-P8-NEXT:    stfd f18, 432(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 192
-; LE-P8-NEXT:    stfd f19, 440(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 208
-; LE-P8-NEXT:    stfd f20, 448(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 224
-; LE-P8-NEXT:    stfd f21, 456(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 240
-; LE-P8-NEXT:    stfd f22, 464(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    lwz r4, 12(r3)
-; LE-P8-NEXT:    stfd f23, 472(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    stfd f24, 480(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    stfd f25, 488(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    stfd f26, 496(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    stfd f27, 504(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    stfd f28, 512(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    stfd f29, 520(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    stfd f30, 528(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    stfd f31, 536(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stw r4, 52(r1)
 ; LE-P8-NEXT:    #APP
 ; LE-P8-NEXT:    nop
@@ -752,55 +752,55 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-NEXT:    lfd f28, 512(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r30, 384(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r29, 376(r1) # 8-byte Folded Reload
-; LE-P8-NEXT:    lwz r4, 16(r4)
 ; LE-P8-NEXT:    lfd f27, 504(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    lfd f26, 496(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r28, 368(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    ld r27, 360(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    lwz r4, 16(r4)
 ; LE-P8-NEXT:    lfd f25, 488(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    lfd f24, 480(r1) # 8-byte Folded Reload
-; LE-P8-NEXT:    ld r27, 360(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r26, 352(r1) # 8-byte Folded Reload
-; LE-P8-NEXT:    add r3, r4, r3
-; LE-P8-NEXT:    li r4, 240
 ; LE-P8-NEXT:    lfd f23, 472(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    lfd f22, 464(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r25, 344(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r24, 336(r1) # 8-byte Folded Reload
-; LE-P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
-; LE-P8-NEXT:    li r4, 224
 ; LE-P8-NEXT:    lfd f21, 456(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    lfd f20, 448(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r23, 328(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r22, 320(r1) # 8-byte Folded Reload
-; LE-P8-NEXT:    clrldi r3, r3, 32
-; LE-P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
-; LE-P8-NEXT:    li r4, 208
-; LE-P8-NEXT:    lfd f20, 448(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    add r3, r4, r3
+; LE-P8-NEXT:    li r4, 240
+; LE-P8-NEXT:    lfd f19, 440(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    lfd f18, 432(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    lfd f17, 424(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    lfd f16, 416(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r21, 312(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r20, 304(r1) # 8-byte Folded Reload
-; LE-P8-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
-; LE-P8-NEXT:    li r4, 192
-; LE-P8-NEXT:    lfd f19, 440(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
+; LE-P8-NEXT:    li r4, 224
+; LE-P8-NEXT:    lfd f15, 408(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r19, 296(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    lfd f14, 400(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r18, 288(r1) # 8-byte Folded Reload
-; LE-P8-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
-; LE-P8-NEXT:    li r4, 176
-; LE-P8-NEXT:    lfd f18, 432(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r17, 280(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r16, 272(r1) # 8-byte Folded Reload
-; LE-P8-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
-; LE-P8-NEXT:    li r4, 160
-; LE-P8-NEXT:    lfd f17, 424(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; LE-P8-NEXT:    li r4, 208
 ; LE-P8-NEXT:    ld r15, 264(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    ld r14, 256(r1) # 8-byte Folded Reload
+; LE-P8-NEXT:    clrldi r3, r3, 32
+; LE-P8-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
+; LE-P8-NEXT:    li r4, 192
+; LE-P8-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
+; LE-P8-NEXT:    li r4, 176
+; LE-P8-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
+; LE-P8-NEXT:    li r4, 160
 ; LE-P8-NEXT:    lxvd2x v26, r1, r4 # 16-byte Folded Reload
 ; LE-P8-NEXT:    li r4, 144
-; LE-P8-NEXT:    lfd f16, 416(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    lxvd2x v25, r1, r4 # 16-byte Folded Reload
 ; LE-P8-NEXT:    li r4, 128
-; LE-P8-NEXT:    lfd f15, 408(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    lxvd2x v24, r1, r4 # 16-byte Folded Reload
 ; LE-P8-NEXT:    li r4, 112
-; LE-P8-NEXT:    lfd f14, 400(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    lxvd2x v23, r1, r4 # 16-byte Folded Reload
 ; LE-P8-NEXT:    li r4, 96
 ; LE-P8-NEXT:    lxvd2x v22, r1, r4 # 16-byte Folded Reload
@@ -811,9 +811,9 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-NEXT:    addi r1, r1, 544
 ; LE-P8-NEXT:    ld r0, 16(r1)
 ; LE-P8-NEXT:    lwz r12, 8(r1)
-; LE-P8-NEXT:    mtocrf 32, r12
 ; LE-P8-NEXT:    hashchk r0, -488(r1)
 ; LE-P8-NEXT:    mtlr r0
+; LE-P8-NEXT:    mtocrf 32, r12
 ; LE-P8-NEXT:    mtocrf 16, r12
 ; LE-P8-NEXT:    mtocrf 8, r12
 ; LE-P8-NEXT:    blr
@@ -1483,8 +1483,8 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-LABEL: spill:
 ; BE-P8:       # %bb.0: # %entry
 ; BE-P8-NEXT:    mfcr r12
-; BE-P8-NEXT:    mflr r0
 ; BE-P8-NEXT:    stw r12, 8(r1)
+; BE-P8-NEXT:    mflr r0
 ; BE-P8-NEXT:    stdu r1, -624(r1)
 ; BE-P8-NEXT:    li r4, 144
 ; BE-P8-NEXT:    std r0, 640(r1)
@@ -1492,64 +1492,64 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; BE-P8-NEXT:    li r4, 160
 ; BE-P8-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; BE-P8-NEXT:    li r4, 160
 ; BE-P8-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; BE-P8-NEXT:    li r4, 176
 ; BE-P8-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; BE-P8-NEXT:    li r4, 176
 ; BE-P8-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r28, 448(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r29, 456(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r30, 464(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    std r31, 472(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 192
+; BE-P8-NEXT:    std r31, 472(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 208
-; BE-P8-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 224
-; BE-P8-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f26, 576(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f27, 584(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f28, 592(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f29, 600(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f30, 608(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 240
-; BE-P8-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f31, 616(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 256
-; BE-P8-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 272
-; BE-P8-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 288
-; BE-P8-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 304
-; BE-P8-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 320
-; BE-P8-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    lwz r4, 12(r3)
-; BE-P8-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f26, 576(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f27, 584(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f28, 592(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f29, 600(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f30, 608(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    stfd f31, 616(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stw r4, 132(r1)
 ; BE-P8-NEXT:    #APP
 ; BE-P8-NEXT:    nop
@@ -1565,55 +1565,55 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-NEXT:    lfd f28, 592(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r30, 464(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r29, 456(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    lwz r4, 16(r4)
 ; BE-P8-NEXT:    lfd f27, 584(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lfd f26, 576(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r28, 448(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    ld r27, 440(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lwz r4, 16(r4)
 ; BE-P8-NEXT:    lfd f25, 568(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lfd f24, 560(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    ld r27, 440(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r26, 432(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    add r3, r4, r3
-; BE-P8-NEXT:    li r4, 320
 ; BE-P8-NEXT:    lfd f23, 552(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lfd f22, 544(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r25, 424(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r24, 416(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
-; BE-P8-NEXT:    li r4, 304
 ; BE-P8-NEXT:    lfd f21, 536(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r23, 408(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r22, 400(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    clrldi r3, r3, 32
-; BE-P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
-; BE-P8-NEXT:    li r4, 288
-; BE-P8-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    add r3, r4, r3
+; BE-P8-NEXT:    li r4, 320
+; BE-P8-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r21, 392(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r20, 384(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
-; BE-P8-NEXT:    li r4, 272
-; BE-P8-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
+; BE-P8-NEXT:    li r4, 304
+; BE-P8-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r19, 376(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r18, 368(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
-; BE-P8-NEXT:    li r4, 256
-; BE-P8-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r17, 360(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r16, 352(r1) # 8-byte Folded Reload
-; BE-P8-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
-; BE-P8-NEXT:    li r4, 240
-; BE-P8-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; BE-P8-NEXT:    li r4, 288
 ; BE-P8-NEXT:    ld r15, 344(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    ld r14, 336(r1) # 8-byte Folded Reload
+; BE-P8-NEXT:    clrldi r3, r3, 32
+; BE-P8-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
+; BE-P8-NEXT:    li r4, 272
+; BE-P8-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
+; BE-P8-NEXT:    li r4, 256
+; BE-P8-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
+; BE-P8-NEXT:    li r4, 240
 ; BE-P8-NEXT:    lxvd2x v26, r1, r4 # 16-byte Folded Reload
 ; BE-P8-NEXT:    li r4, 224
-; BE-P8-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lxvd2x v25, r1, r4 # 16-byte Folded Reload
 ; BE-P8-NEXT:    li r4, 208
-; BE-P8-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lxvd2x v24, r1, r4 # 16-byte Folded Reload
 ; BE-P8-NEXT:    li r4, 192
-; BE-P8-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
 ; BE-P8-NEXT:    lxvd2x v23, r1, r4 # 16-byte Folded Reload
 ; BE-P8-NEXT:    li r4, 176
 ; BE-P8-NEXT:    lxvd2x v22, r1, r4 # 16-byte Folded Reload
@@ -1624,9 +1624,9 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-NEXT:    addi r1, r1, 624
 ; BE-P8-NEXT:    ld r0, 16(r1)
 ; BE-P8-NEXT:    lwz r12, 8(r1)
-; BE-P8-NEXT:    mtocrf 32, r12
 ; BE-P8-NEXT:    hashchk r0, -488(r1)
 ; BE-P8-NEXT:    mtlr r0
+; BE-P8-NEXT:    mtocrf 32, r12
 ; BE-P8-NEXT:    mtocrf 16, r12
 ; BE-P8-NEXT:    mtocrf 8, r12
 ; BE-P8-NEXT:    blr
@@ -1885,8 +1885,8 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8:       # %bb.0: # %entry
 ; BE-32BIT-P8-NEXT:    mflr r0
 ; BE-32BIT-P8-NEXT:    stwu r1, -448(r1)
-; BE-32BIT-P8-NEXT:    mfcr r12
 ; BE-32BIT-P8-NEXT:    li r4, 32
+; BE-32BIT-P8-NEXT:    mfcr r12
 ; BE-32BIT-P8-NEXT:    stw r0, 452(r1)
 ; BE-32BIT-P8-NEXT:    hashst r0, -424(r1)
 ; BE-32BIT-P8-NEXT:    stw r14, 232(r1) # 4-byte Folded Spill
@@ -1910,47 +1910,47 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    stw r12, 228(r1)
 ; BE-32BIT-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 48
-; BE-32BIT-P8-NEXT:    stfd f14, 304(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 64
-; BE-32BIT-P8-NEXT:    stfd f15, 312(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 80
+; BE-32BIT-P8-NEXT:    stfd f14, 304(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f15, 312(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f16, 320(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 96
 ; BE-32BIT-P8-NEXT:    stfd f17, 328(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f18, 336(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f19, 344(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f20, 352(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f21, 360(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f22, 368(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 112
-; BE-32BIT-P8-NEXT:    stfd f18, 336(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f23, 376(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f24, 384(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f25, 392(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f26, 400(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f27, 408(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f28, 416(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 128
-; BE-32BIT-P8-NEXT:    stfd f19, 344(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f29, 424(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f30, 432(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f31, 440(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r3, 16(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 144
-; BE-32BIT-P8-NEXT:    stfd f20, 352(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 160
-; BE-32BIT-P8-NEXT:    stfd f21, 360(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 176
-; BE-32BIT-P8-NEXT:    stfd f22, 368(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 192
-; BE-32BIT-P8-NEXT:    stfd f23, 376(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 208
-; BE-32BIT-P8-NEXT:    stfd f24, 384(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    lwz r4, 12(r3)
-; BE-32BIT-P8-NEXT:    stfd f25, 392(r1) # 8-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stfd f26, 400(r1) # 8-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stfd f27, 408(r1) # 8-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stfd f28, 416(r1) # 8-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stfd f29, 424(r1) # 8-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stfd f30, 432(r1) # 8-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stfd f31, 440(r1) # 8-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stw r3, 16(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r4, 20(r1)
 ; BE-32BIT-P8-NEXT:    #APP
 ; BE-32BIT-P8-NEXT:    nop
@@ -1960,9 +1960,9 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    lwz r4, 16(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lfd f31, 440(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lfd f30, 432(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lwz r4, 16(r4)
 ; BE-32BIT-P8-NEXT:    lfd f29, 424(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lfd f28, 416(r1) # 8-byte Folded Reload
-; BE-32BIT-P8-NEXT:    lwz r4, 16(r4)
 ; BE-32BIT-P8-NEXT:    lfd f27, 408(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lfd f26, 400(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lfd f25, 392(r1) # 8-byte Folded Reload
@@ -1971,27 +1971,27 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    li r4, 208
 ; BE-32BIT-P8-NEXT:    lfd f23, 376(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lfd f22, 368(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f21, 360(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f20, 352(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f19, 344(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f18, 336(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f17, 328(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f16, 320(r1) # 8-byte Folded Reload
+; BE-32BIT-P8-NEXT:    lfd f15, 312(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 192
-; BE-32BIT-P8-NEXT:    lfd f21, 360(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 176
-; BE-32BIT-P8-NEXT:    lfd f20, 352(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 160
-; BE-32BIT-P8-NEXT:    lfd f19, 344(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 144
-; BE-32BIT-P8-NEXT:    lfd f18, 336(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 128
-; BE-32BIT-P8-NEXT:    lfd f17, 328(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v26, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 112
-; BE-32BIT-P8-NEXT:    lfd f16, 320(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v25, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 96
-; BE-32BIT-P8-NEXT:    lfd f15, 312(r1) # 8-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lxvd2x v24, r1, r4 # 16-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    li r4, 80
 ; BE-32BIT-P8-NEXT:    lxvd2x v23, r1, r4 # 16-byte Folded Reload
@@ -2012,19 +2012,19 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    lwz r25, 276(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r24, 272(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r23, 268(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-NEXT:    mtocrf 32, r12
 ; BE-32BIT-P8-NEXT:    lwz r22, 264(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r21, 260(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r20, 256(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r19, 252(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-NEXT:    mtocrf 16, r12
 ; BE-32BIT-P8-NEXT:    lwz r18, 248(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r17, 244(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r16, 240(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r15, 236(r1) # 4-byte Folded Reload
-; BE-32BIT-P8-NEXT:    mtocrf 8, r12
 ; BE-32BIT-P8-NEXT:    lwz r14, 232(r1) # 4-byte Folded Reload
 ; BE-32BIT-P8-NEXT:    lwz r0, 452(r1)
+; BE-32BIT-P8-NEXT:    mtocrf 32, r12
+; BE-32BIT-P8-NEXT:    mtocrf 16, r12
+; BE-32BIT-P8-NEXT:    mtocrf 8, r12
 ; BE-32BIT-P8-NEXT:    addi r1, r1, 448
 ; BE-32BIT-P8-NEXT:    mtlr r0
 ; BE-32BIT-P8-NEXT:    hashchk r0, -424(r1)
@@ -2286,8 +2286,8 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-PRIV-LABEL: spill:
 ; LE-P8-PRIV:       # %bb.0: # %entry
 ; LE-P8-PRIV-NEXT:    mfcr r12
-; LE-P8-PRIV-NEXT:    mflr r0
 ; LE-P8-PRIV-NEXT:    stw r12, 8(r1)
+; LE-P8-PRIV-NEXT:    mflr r0
 ; LE-P8-PRIV-NEXT:    stdu r1, -544(r1)
 ; LE-P8-PRIV-NEXT:    li r4, 64
 ; LE-P8-PRIV-NEXT:    std r0, 560(r1)
@@ -2295,64 +2295,64 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-PRIV-NEXT:    std r14, 256(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r15, 264(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r16, 272(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; LE-P8-PRIV-NEXT:    li r4, 80
 ; LE-P8-PRIV-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r18, 288(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; LE-P8-PRIV-NEXT:    li r4, 80
 ; LE-P8-PRIV-NEXT:    std r19, 296(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r20, 304(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; LE-P8-PRIV-NEXT:    li r4, 96
 ; LE-P8-PRIV-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r22, 320(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r23, 328(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r24, 336(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; LE-P8-PRIV-NEXT:    li r4, 96
 ; LE-P8-PRIV-NEXT:    std r25, 344(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r26, 352(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r27, 360(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r28, 368(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r29, 376(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r30, 384(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    std r31, 392(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 112
+; LE-P8-PRIV-NEXT:    std r31, 392(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stfd f14, 400(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f15, 408(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f16, 416(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f17, 424(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f18, 432(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 128
-; LE-P8-PRIV-NEXT:    stfd f15, 408(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f19, 440(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f20, 448(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f21, 456(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f22, 464(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f23, 472(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f24, 480(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 144
-; LE-P8-PRIV-NEXT:    stfd f16, 416(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f25, 488(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f26, 496(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f27, 504(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f28, 512(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f29, 520(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f30, 528(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 160
-; LE-P8-PRIV-NEXT:    stfd f17, 424(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f31, 536(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 176
-; LE-P8-PRIV-NEXT:    stfd f18, 432(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 192
-; LE-P8-PRIV-NEXT:    stfd f19, 440(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 208
-; LE-P8-PRIV-NEXT:    stfd f20, 448(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 224
-; LE-P8-PRIV-NEXT:    stfd f21, 456(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 240
-; LE-P8-PRIV-NEXT:    stfd f22, 464(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    lwz r4, 12(r3)
-; LE-P8-PRIV-NEXT:    stfd f23, 472(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    stfd f24, 480(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    stfd f25, 488(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    stfd f26, 496(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    stfd f27, 504(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    stfd f28, 512(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    stfd f29, 520(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    stfd f30, 528(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    stfd f31, 536(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stw r4, 52(r1)
 ; LE-P8-PRIV-NEXT:    #APP
 ; LE-P8-PRIV-NEXT:    nop
@@ -2368,55 +2368,55 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-PRIV-NEXT:    lfd f28, 512(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r30, 384(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r29, 376(r1) # 8-byte Folded Reload
-; LE-P8-PRIV-NEXT:    lwz r4, 16(r4)
 ; LE-P8-PRIV-NEXT:    lfd f27, 504(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    lfd f26, 496(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r28, 368(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    ld r27, 360(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    lwz r4, 16(r4)
 ; LE-P8-PRIV-NEXT:    lfd f25, 488(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    lfd f24, 480(r1) # 8-byte Folded Reload
-; LE-P8-PRIV-NEXT:    ld r27, 360(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r26, 352(r1) # 8-byte Folded Reload
-; LE-P8-PRIV-NEXT:    add r3, r4, r3
-; LE-P8-PRIV-NEXT:    li r4, 240
 ; LE-P8-PRIV-NEXT:    lfd f23, 472(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    lfd f22, 464(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r25, 344(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r24, 336(r1) # 8-byte Folded Reload
-; LE-P8-PRIV-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
-; LE-P8-PRIV-NEXT:    li r4, 224
 ; LE-P8-PRIV-NEXT:    lfd f21, 456(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    lfd f20, 448(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r23, 328(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r22, 320(r1) # 8-byte Folded Reload
-; LE-P8-PRIV-NEXT:    clrldi r3, r3, 32
-; LE-P8-PRIV-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
-; LE-P8-PRIV-NEXT:    li r4, 208
-; LE-P8-PRIV-NEXT:    lfd f20, 448(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    add r3, r4, r3
+; LE-P8-PRIV-NEXT:    li r4, 240
+; LE-P8-PRIV-NEXT:    lfd f19, 440(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    lfd f18, 432(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    lfd f17, 424(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    lfd f16, 416(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r21, 312(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r20, 304(r1) # 8-byte Folded Reload
-; LE-P8-PRIV-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
-; LE-P8-PRIV-NEXT:    li r4, 192
-; LE-P8-PRIV-NEXT:    lfd f19, 440(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
+; LE-P8-PRIV-NEXT:    li r4, 224
+; LE-P8-PRIV-NEXT:    lfd f15, 408(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r19, 296(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    lfd f14, 400(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r18, 288(r1) # 8-byte Folded Reload
-; LE-P8-PRIV-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
-; LE-P8-PRIV-NEXT:    li r4, 176
-; LE-P8-PRIV-NEXT:    lfd f18, 432(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r17, 280(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r16, 272(r1) # 8-byte Folded Reload
-; LE-P8-PRIV-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
-; LE-P8-PRIV-NEXT:    li r4, 160
-; LE-P8-PRIV-NEXT:    lfd f17, 424(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; LE-P8-PRIV-NEXT:    li r4, 208
 ; LE-P8-PRIV-NEXT:    ld r15, 264(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    ld r14, 256(r1) # 8-byte Folded Reload
+; LE-P8-PRIV-NEXT:    clrldi r3, r3, 32
+; LE-P8-PRIV-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
+; LE-P8-PRIV-NEXT:    li r4, 192
+; LE-P8-PRIV-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
+; LE-P8-PRIV-NEXT:    li r4, 176
+; LE-P8-PRIV-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
+; LE-P8-PRIV-NEXT:    li r4, 160
 ; LE-P8-PRIV-NEXT:    lxvd2x v26, r1, r4 # 16-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    li r4, 144
-; LE-P8-PRIV-NEXT:    lfd f16, 416(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    lxvd2x v25, r1, r4 # 16-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    li r4, 128
-; LE-P8-PRIV-NEXT:    lfd f15, 408(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    lxvd2x v24, r1, r4 # 16-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    li r4, 112
-; LE-P8-PRIV-NEXT:    lfd f14, 400(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    lxvd2x v23, r1, r4 # 16-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    li r4, 96
 ; LE-P8-PRIV-NEXT:    lxvd2x v22, r1, r4 # 16-byte Folded Reload
@@ -2427,9 +2427,9 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-PRIV-NEXT:    addi r1, r1, 544
 ; LE-P8-PRIV-NEXT:    ld r0, 16(r1)
 ; LE-P8-PRIV-NEXT:    lwz r12, 8(r1)
-; LE-P8-PRIV-NEXT:    mtocrf 32, r12
 ; LE-P8-PRIV-NEXT:    hashchkp r0, -488(r1)
 ; LE-P8-PRIV-NEXT:    mtlr r0
+; LE-P8-PRIV-NEXT:    mtocrf 32, r12
 ; LE-P8-PRIV-NEXT:    mtocrf 16, r12
 ; LE-P8-PRIV-NEXT:    mtocrf 8, r12
 ; LE-P8-PRIV-NEXT:    blr
@@ -2691,8 +2691,8 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-LABEL: spill:
 ; BE-P8-PRIV:       # %bb.0: # %entry
 ; BE-P8-PRIV-NEXT:    mfcr r12
-; BE-P8-PRIV-NEXT:    mflr r0
 ; BE-P8-PRIV-NEXT:    stw r12, 8(r1)
+; BE-P8-PRIV-NEXT:    mflr r0
 ; BE-P8-PRIV-NEXT:    stdu r1, -624(r1)
 ; BE-P8-PRIV-NEXT:    li r4, 144
 ; BE-P8-PRIV-NEXT:    std r0, 640(r1)
@@ -2700,64 +2700,64 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
-; BE-P8-PRIV-NEXT:    li r4, 160
 ; BE-P8-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; BE-P8-PRIV-NEXT:    li r4, 160
 ; BE-P8-PRIV-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
-; BE-P8-PRIV-NEXT:    li r4, 176
 ; BE-P8-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; BE-P8-PRIV-NEXT:    li r4, 176
 ; BE-P8-PRIV-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r28, 448(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r29, 456(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r30, 464(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    std r31, 472(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 192
+; BE-P8-PRIV-NEXT:    std r31, 472(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 208
-; BE-P8-PRIV-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 224
-; BE-P8-PRIV-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f26, 576(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f27, 584(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f28, 592(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f29, 600(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f30, 608(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 240
-; BE-P8-PRIV-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f31, 616(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 256
-; BE-P8-PRIV-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 272
-; BE-P8-PRIV-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 288
-; BE-P8-PRIV-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 304
-; BE-P8-PRIV-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 320
-; BE-P8-PRIV-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    lwz r4, 12(r3)
-; BE-P8-PRIV-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f26, 576(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f27, 584(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f28, 592(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f29, 600(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f30, 608(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    stfd f31, 616(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stw r4, 132(r1)
 ; BE-P8-PRIV-NEXT:    #APP
 ; BE-P8-PRIV-NEXT:    nop
@@ -2773,55 +2773,55 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    lfd f28, 592(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r30, 464(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r29, 456(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    lwz r4, 16(r4)
 ; BE-P8-PRIV-NEXT:    lfd f27, 584(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lfd f26, 576(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r28, 448(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    ld r27, 440(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lwz r4, 16(r4)
 ; BE-P8-PRIV-NEXT:    lfd f25, 568(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lfd f24, 560(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    ld r27, 440(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r26, 432(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    add r3, r4, r3
-; BE-P8-PRIV-NEXT:    li r4, 320
 ; BE-P8-PRIV-NEXT:    lfd f23, 552(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lfd f22, 544(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r25, 424(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r24, 416(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
-; BE-P8-PRIV-NEXT:    li r4, 304
 ; BE-P8-PRIV-NEXT:    lfd f21, 536(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r23, 408(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r22, 400(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    clrldi r3, r3, 32
-; BE-P8-PRIV-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
-; BE-P8-PRIV-NEXT:    li r4, 288
-; BE-P8-PRIV-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    add r3, r4, r3
+; BE-P8-PRIV-NEXT:    li r4, 320
+; BE-P8-PRIV-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r21, 392(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r20, 384(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
-; BE-P8-PRIV-NEXT:    li r4, 272
-; BE-P8-PRIV-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lxvd2x v31, r1, r4 # 16-byte Folded Reload
+; BE-P8-PRIV-NEXT:    li r4, 304
+; BE-P8-PRIV-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r19, 376(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r18, 368(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
-; BE-P8-PRIV-NEXT:    li r4, 256
-; BE-P8-PRIV-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r17, 360(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r16, 352(r1) # 8-byte Folded Reload
-; BE-P8-PRIV-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
-; BE-P8-PRIV-NEXT:    li r4, 240
-; BE-P8-PRIV-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    lxvd2x v30, r1, r4 # 16-byte Folded Reload
+; BE-P8-PRIV-NEXT:    li r4, 288
 ; BE-P8-PRIV-NEXT:    ld r15, 344(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    ld r14, 336(r1) # 8-byte Folded Reload
+; BE-P8-PRIV-NEXT:    clrldi r3, r3, 32
+; BE-P8-PRIV-NEXT:    lxvd2x v29, r1, r4 # 16-byte Folded Reload
+; BE-P8-PRIV-NEXT:    li r4, 272
+; BE-P8-PRIV-NEXT:    lxvd2x v28, r1, r4 # 16-byte Folded Reload
+; BE-P8-PRIV-NEXT:    li r4, 256
+; BE-P8-PRIV-NEXT:    lxvd2x v27, r1, r4 # 16-byte Folded Reload
+; BE-P8-PRIV-NEXT:    li r4, 240
 ; BE-P8-PRIV-NEXT:    lxvd2x v26, r1, r4 # 16-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    li r4, 224
-; BE-P8-PRIV-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lxvd2x v25, r1, r4 # 16-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    li r4, 208
-; BE-P8-PRIV-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lxvd2x v24, r1, r4 # 16-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    li r4, 192
-; BE-P8-PRIV-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    lxvd2x v23, r1, r4 # 16-byte Folded Reload
 ; BE-P8-PRIV-NEXT:    li r4, 176
 ; BE-P8-PRIV-NEXT:    lxvd2x v22, r1, r4 # 16-byte Folded Reload
@@ -2832,9 +2832,9 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    addi r1, r1, 624
 ; BE-P8-PRIV-NEXT:    ld r0, 16(r1)
 ; BE-P8-PRIV-NEXT:    lwz r12, 8(r1)
-; BE-P8-PRIV-NEXT:    mtocrf 32, r12
 ; BE-P8-PRIV-NEXT:    hashchkp r0, -488(r1)
 ; BE-P8-PRIV-NEXT:    mtlr r0
+; BE-P8-PRIV-NEXT:    mtocrf 32, r12
 ; BE-P8-PRIV-NEXT:    mtocrf 16, r12
 ; BE-P8-PRIV-NEXT:    mtocrf 8, r12
 ; BE-P8-PRIV-NEXT:    blr
@@ -2933,8 +2933,8 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P8-NEXT:    ld r0, 16(r1)
 ; LE-P8-NEXT:    clrldi r3, r3, 32
 ; LE-P8-NEXT:    hashchk r0, -24(r1)
-; LE-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    mtlr r0
+; LE-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; LE-P8-NEXT:    blr
 ; LE-P8-NEXT:  .LBB2_2:
 ; LE-P8-NEXT:    li r3, 0
@@ -3286,8 +3286,8 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P8-PRIV-NEXT:    ld r0, 16(r1)
 ; LE-P8-PRIV-NEXT:    clrldi r3, r3, 32
 ; LE-P8-PRIV-NEXT:    hashchkp r0, -24(r1)
-; LE-P8-PRIV-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    mtlr r0
+; LE-P8-PRIV-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; LE-P8-PRIV-NEXT:    blr
 ; LE-P8-PRIV-NEXT:  .LBB2_2:
 ; LE-P8-PRIV-NEXT:    li r3, 0
@@ -3498,25 +3498,25 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; LE-P8-NEXT:    clrldi r0, r1, 49
 ; LE-P8-NEXT:    subc r0, r12, r0
 ; LE-P8-NEXT:    stdux r1, r1, r0
+; LE-P8-NEXT:    lis r4, 0
 ; LE-P8-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
 ; LE-P8-NEXT:    mr r29, r3
 ; LE-P8-NEXT:    lwz r3, 4(r3)
-; LE-P8-NEXT:    lis r6, 0
-; LE-P8-NEXT:    ori r6, r6, 65500
-; LE-P8-NEXT:    lwz r4, 12(r29)
-; LE-P8-NEXT:    lwz r5, 20(r29)
-; LE-P8-NEXT:    stwx r3, r1, r6
-; LE-P8-NEXT:    lis r3, 0
-; LE-P8-NEXT:    ori r3, r3, 32768
-; LE-P8-NEXT:    stw r5, 32764(r1)
 ; LE-P8-NEXT:    addi r5, r1, 32764
-; LE-P8-NEXT:    stwx r4, r1, r3
-; LE-P8-NEXT:    lis r3, 0
+; LE-P8-NEXT:    ori r4, r4, 65500
+; LE-P8-NEXT:    stwx r3, r1, r4
+; LE-P8-NEXT:    lis r4, 0
+; LE-P8-NEXT:    lwz r3, 12(r29)
+; LE-P8-NEXT:    ori r4, r4, 32768
+; LE-P8-NEXT:    stwx r3, r1, r4
+; LE-P8-NEXT:    lwz r3, 20(r29)
 ; LE-P8-NEXT:    lis r4, 0
-; LE-P8-NEXT:    ori r3, r3, 32768
 ; LE-P8-NEXT:    ori r4, r4, 65500
-; LE-P8-NEXT:    add r3, r1, r3
+; LE-P8-NEXT:    stw r3, 32764(r1)
+; LE-P8-NEXT:    lis r3, 0
 ; LE-P8-NEXT:    add r4, r1, r4
+; LE-P8-NEXT:    ori r3, r3, 32768
+; LE-P8-NEXT:    add r3, r1, r3
 ; LE-P8-NEXT:    bl callee3
 ; LE-P8-NEXT:    nop
 ; LE-P8-NEXT:    lwz r4, 16(r29)
@@ -3756,25 +3756,25 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; BE-P8-NEXT:    clrldi r0, r1, 49
 ; BE-P8-NEXT:    subc r0, r12, r0
 ; BE-P8-NEXT:    stdux r1, r1, r0
+; BE-P8-NEXT:    lis r4, 0
 ; BE-P8-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
 ; BE-P8-NEXT:    mr r29, r3
 ; BE-P8-NEXT:    lwz r3, 4(r3)
-; BE-P8-NEXT:    lis r6, 0
-; BE-P8-NEXT:    ori r6, r6, 65500
-; BE-P8-NEXT:    lwz r4, 12(r29)
-; BE-P8-NEXT:    lwz r5, 20(r29)
-; BE-P8-NEXT:    stwx r3, r1, r6
-; BE-P8-NEXT:    lis r3, 0
-; BE-P8-NEXT:    ori r3, r3, 32768
-; BE-P8-NEXT:    stw r5, 32764(r1)
 ; BE-P8-NEXT:    addi r5, r1, 32764
-; BE-P8-NEXT:    stwx r4, r1, r3
-; BE-P8-NEXT:    lis r3, 0
+; BE-P8-NEXT:    ori r4, r4, 65500
+; BE-P8-NEXT:    stwx r3, r1, r4
+; BE-P8-NEXT:    lis r4, 0
+; BE-P8-NEXT:    lwz r3, 12(r29)
+; BE-P8-NEXT:    ori r4, r4, 32768
+; BE-P8-NEXT:    stwx r3, r1, r4
+; BE-P8-NEXT:    lwz r3, 20(r29)
 ; BE-P8-NEXT:    lis r4, 0
-; BE-P8-NEXT:    ori r3, r3, 32768
 ; BE-P8-NEXT:    ori r4, r4, 65500
-; BE-P8-NEXT:    add r3, r1, r3
+; BE-P8-NEXT:    stw r3, 32764(r1)
+; BE-P8-NEXT:    lis r3, 0
 ; BE-P8-NEXT:    add r4, r1, r4
+; BE-P8-NEXT:    ori r3, r3, 32768
+; BE-P8-NEXT:    add r3, r1, r3
 ; BE-P8-NEXT:    bl callee3
 ; BE-P8-NEXT:    nop
 ; BE-P8-NEXT:    lwz r4, 16(r29)
@@ -3890,35 +3890,35 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    subc r0, r12, r0
 ; BE-32BIT-P8-NEXT:    stwux r1, r1, r0
 ; BE-32BIT-P8-NEXT:    sub r0, r1, r0
-; BE-32BIT-P8-NEXT:    lis r6, 0
+; BE-32BIT-P8-NEXT:    lis r4, 0
+; BE-32BIT-P8-NEXT:    addi r5, r1, 32764
 ; BE-32BIT-P8-NEXT:    addic r0, r0, -8
-; BE-32BIT-P8-NEXT:    ori r6, r6, 65508
+; BE-32BIT-P8-NEXT:    ori r4, r4, 65508
 ; BE-32BIT-P8-NEXT:    stwx r30, 0, r0
 ; BE-32BIT-P8-NEXT:    addic r30, r0, 8
 ; BE-32BIT-P8-NEXT:    stw r29, -12(r30) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    mr r29, r3
 ; BE-32BIT-P8-NEXT:    lwz r3, 4(r3)
-; BE-32BIT-P8-NEXT:    lwz r4, 12(r29)
-; BE-32BIT-P8-NEXT:    lwz r5, 20(r29)
-; BE-32BIT-P8-NEXT:    stwx r3, r1, r6
-; BE-32BIT-P8-NEXT:    lis r3, 0
-; BE-32BIT-P8-NEXT:    ori r3, r3, 32768
-; BE-32BIT-P8-NEXT:    stw r5, 32764(r1)
-; BE-32BIT-P8-NEXT:    addi r5, r1, 32764
-; BE-32BIT-P8-NEXT:    stwx r4, r1, r3
-; BE-32BIT-P8-NEXT:    lis r3, 0
+; BE-32BIT-P8-NEXT:    stwx r3, r1, r4
+; BE-32BIT-P8-NEXT:    lis r4, 0
+; BE-32BIT-P8-NEXT:    lwz r3, 12(r29)
+; BE-32BIT-P8-NEXT:    ori r4, r4, 32768
+; BE-32BIT-P8-NEXT:    stwx r3, r1, r4
+; BE-32BIT-P8-NEXT:    lwz r3, 20(r29)
 ; BE-32BIT-P8-NEXT:    lis r4, 0
-; BE-32BIT-P8-NEXT:    ori r3, r3, 32768
 ; BE-32BIT-P8-NEXT:    ori r4, r4, 65508
-; BE-32BIT-P8-NEXT:    add r3, r1, r3
+; BE-32BIT-P8-NEXT:    stw r3, 32764(r1)
+; BE-32BIT-P8-NEXT:    lis r3, 0
 ; BE-32BIT-P8-NEXT:    add r4, r1, r4
+; BE-32BIT-P8-NEXT:    ori r3, r3, 32768
+; BE-32BIT-P8-NEXT:    add r3, r1, r3
 ; BE-32BIT-P8-NEXT:    bl callee3
 ; BE-32BIT-P8-NEXT:    lwz r4, 16(r29)
 ; BE-32BIT-P8-NEXT:    lwz r29, -12(r30) # 4-byte Folded Reload
+; BE-32BIT-P8-NEXT:    add r3, r4, r3
 ; BE-32BIT-P8-NEXT:    mr r0, r31
 ; BE-32BIT-P8-NEXT:    lwz r31, 0(r1)
 ; BE-32BIT-P8-NEXT:    lwz r30, -8(r31)
-; BE-32BIT-P8-NEXT:    add r3, r4, r3
 ; BE-32BIT-P8-NEXT:    mr r1, r31
 ; BE-32BIT-P8-NEXT:    mr r31, r0
 ; BE-32BIT-P8-NEXT:    lwz r0, 4(r1)
@@ -4022,25 +4022,25 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; LE-P8-PRIV-NEXT:    clrldi r0, r1, 49
 ; LE-P8-PRIV-NEXT:    subc r0, r12, r0
 ; LE-P8-PRIV-NEXT:    stdux r1, r1, r0
+; LE-P8-PRIV-NEXT:    lis r4, 0
 ; LE-P8-PRIV-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    mr r29, r3
 ; LE-P8-PRIV-NEXT:    lwz r3, 4(r3)
-; LE-P8-PRIV-NEXT:    lis r6, 0
-; LE-P8-PRIV-NEXT:    ori r6, r6, 65500
-; LE-P8-PRIV-NEXT:    lwz r4, 12(r29)
-; LE-P8-PRIV-NEXT:    lwz r5, 20(r29)
-; LE-P8-PRIV-NEXT:    stwx r3, r1, r6
-; LE-P8-PRIV-NEXT:    lis r3, 0
-; LE-P8-PRIV-NEXT:    ori r3, r3, 32768
-; LE-P8-PRIV-NEXT:    stw r5, 32764(r1)
 ; LE-P8-PRIV-NEXT:    addi r5, r1, 32764
-; LE-P8-PRIV-NEXT:    stwx r4, r1, r3
-; LE-P8-PRIV-NEXT:    lis r3, 0
+; LE-P8-PRIV-NEXT:    ori r4, r4, 65500
+; LE-P8-PRIV-NEXT:    stwx r3, r1, r4
+; LE-P8-PRIV-NEXT:    lis r4, 0
+; LE-P8-PRIV-NEXT:    lwz r3, 12(r29)
+; LE-P8-PRIV-NEXT:    ori r4, r4, 32768
+; LE-P8-PRIV-NEXT:    stwx r3, r1, r4
+; LE-P8-PRIV-NEXT:    lwz r3, 20(r29)
 ; LE-P8-PRIV-NEXT:    lis r4, 0
-; LE-P8-PRIV-NEXT:    ori r3, r3, 32768
 ; LE-P8-PRIV-NEXT:    ori r4, r4, 65500
-; LE-P8-PRIV-NEXT:    add r3, r1, r3
+; LE-P8-PRIV-NEXT:    stw r3, 32764(r1)
+; LE-P8-PRIV-NEXT:    lis r3, 0
 ; LE-P8-PRIV-NEXT:    add r4, r1, r4
+; LE-P8-PRIV-NEXT:    ori r3, r3, 32768
+; LE-P8-PRIV-NEXT:    add r3, r1, r3
 ; LE-P8-PRIV-NEXT:    bl callee3
 ; LE-P8-PRIV-NEXT:    nop
 ; LE-P8-PRIV-NEXT:    lwz r4, 16(r29)
@@ -4151,25 +4151,25 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    clrldi r0, r1, 49
 ; BE-P8-PRIV-NEXT:    subc r0, r12, r0
 ; BE-P8-PRIV-NEXT:    stdux r1, r1, r0
+; BE-P8-PRIV-NEXT:    lis r4, 0
 ; BE-P8-PRIV-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    mr r29, r3
 ; BE-P8-PRIV-NEXT:    lwz r3, 4(r3)
-; BE-P8-PRIV-NEXT:    lis r6, 0
-; BE-P8-PRIV-NEXT:    ori r6, r6, 65500
-; BE-P8-PRIV-NEXT:    lwz r4, 12(r29)
-; BE-P8-PRIV-NEXT:    lwz r5, 20(r29)
-; BE-P8-PRIV-NEXT:    stwx r3, r1, r6
-; BE-P8-PRIV-NEXT:    lis r3, 0
-; BE-P8-PRIV-NEXT:    ori r3, r3, 32768
-; BE-P8-PRIV-NEXT:    stw r5, 32764(r1)
 ; BE-P8-PRIV-NEXT:    addi r5, r1, 32764
-; BE-P8-PRIV-NEXT:    stwx r4, r1, r3
-; BE-P8-PRIV-NEXT:    lis r3, 0
+; BE-P8-PRIV-NEXT:    ori r4, r4, 65500
+; BE-P8-PRIV-NEXT:    stwx r3, r1, r4
+; BE-P8-PRIV-NEXT:    lis r4, 0
+; BE-P8-PRIV-NEXT:    lwz r3, 12(r29)
+; BE-P8-PRIV-NEXT:    ori r4, r4, 32768
+; BE-P8-PRIV-NEXT:    stwx r3, r1, r4
+; BE-P8-PRIV-NEXT:    lwz r3, 20(r29)
 ; BE-P8-PRIV-NEXT:    lis r4, 0
-; BE-P8-PRIV-NEXT:    ori r3, r3, 32768
 ; BE-P8-PRIV-NEXT:    ori r4, r4, 65500
-; BE-P8-PRIV-NEXT:    add r3, r1, r3
+; BE-P8-PRIV-NEXT:    stw r3, 32764(r1)
+; BE-P8-PRIV-NEXT:    lis r3, 0
 ; BE-P8-PRIV-NEXT:    add r4, r1, r4
+; BE-P8-PRIV-NEXT:    ori r3, r3, 32768
+; BE-P8-PRIV-NEXT:    add r3, r1, r3
 ; BE-P8-PRIV-NEXT:    bl callee3
 ; BE-P8-PRIV-NEXT:    nop
 ; BE-P8-PRIV-NEXT:    lwz r4, 16(r29)

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-varargs.ll b/llvm/test/CodeGen/PowerPC/ppc64-varargs.ll
index e22dfe9f080312d..712d0cb2646685f 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-varargs.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-varargs.ll
@@ -38,16 +38,16 @@ define i32 @f1(...) nounwind {
 ; LE-LABEL: f1:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    std r3, 32(r1)
+; LE-NEXT:    addi r3, r1, 32
 ; LE-NEXT:    std r4, 40(r1)
-; LE-NEXT:    addi r4, r1, 32
-; LE-NEXT:    li r3, 0
 ; LE-NEXT:    std r5, 48(r1)
 ; LE-NEXT:    std r6, 56(r1)
 ; LE-NEXT:    std r7, 64(r1)
 ; LE-NEXT:    std r8, 72(r1)
 ; LE-NEXT:    std r9, 80(r1)
+; LE-NEXT:    std r3, -8(r1)
+; LE-NEXT:    li r3, 0
 ; LE-NEXT:    std r10, 88(r1)
-; LE-NEXT:    std r4, -8(r1)
 ; LE-NEXT:    blr
 entry:
   %va = alloca ptr, align 8

diff  --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
index 9bc88c93ccb0a3c..033728500abc8cf 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
@@ -1293,11 +1293,11 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-NEXT:    xxlxor 3, 3, 3
 ; PC64LE-NEXT:    std 0, 64(1)
 ; PC64LE-NEXT:    lfs 0, .LCPI31_0 at toc@l(3)
+; PC64LE-NEXT:    fcmpo 1, 2, 3
 ; PC64LE-NEXT:    lis 3, -32768
-; PC64LE-NEXT:    fcmpo 0, 2, 3
-; PC64LE-NEXT:    fcmpo 1, 1, 0
-; PC64LE-NEXT:    crand 20, 6, 0
-; PC64LE-NEXT:    crandc 21, 4, 6
+; PC64LE-NEXT:    fcmpo 0, 1, 0
+; PC64LE-NEXT:    crand 20, 2, 4
+; PC64LE-NEXT:    crandc 21, 0, 2
 ; PC64LE-NEXT:    cror 20, 21, 20
 ; PC64LE-NEXT:    isel 30, 0, 3, 20
 ; PC64LE-NEXT:    bc 12, 20, .LBB31_2
@@ -1424,10 +1424,10 @@ define void @test_constrained_libcall_multichain(ptr %firstptr, ptr %result) #0
 ; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    mr 29, 3
 ; PC64LE-NEXT:    xxlxor 2, 2, 2
+; PC64LE-NEXT:    xxlxor 4, 4, 4
+; PC64LE-NEXT:    lfs 31, 0(3)
 ; PC64LE-NEXT:    li 3, 0
 ; PC64LE-NEXT:    mr 30, 4
-; PC64LE-NEXT:    lfs 31, 0(29)
-; PC64LE-NEXT:    xxlxor 4, 4, 4
 ; PC64LE-NEXT:    std 3, 8(4)
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    fmr 3, 31
@@ -1436,10 +1436,10 @@ define void @test_constrained_libcall_multichain(ptr %firstptr, ptr %result) #0
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 4, 2
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    fmr 29, 2
 ; PC64LE-NEXT:    stfd 2, 24(30)
 ; PC64LE-NEXT:    stfd 1, 16(30)
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    fmr 29, 2
 ; PC64LE-NEXT:    bl __gcc_qmul
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    fmr 1, 31

diff  --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll
index 4d9dc128a0c6db0..c696f0b6bd3f22b 100644
--- a/llvm/test/CodeGen/PowerPC/pr25080.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25080.ll
@@ -13,39 +13,39 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; LE-NEXT:    xxland 35, 35, 0
 ; LE-NEXT:    vcmpequw 2, 2, 4
 ; LE-NEXT:    vcmpequw 3, 3, 4
-; LE-NEXT:    xxswapd 0, 34
-; LE-NEXT:    mfvsrwz 3, 34
-; LE-NEXT:    xxsldwi 1, 34, 34, 1
-; LE-NEXT:    mfvsrwz 4, 35
-; LE-NEXT:    xxsldwi 2, 34, 34, 3
-; LE-NEXT:    mtvsrd 36, 3
-; LE-NEXT:    mffprwz 3, 0
+; LE-NEXT:    xxswapd 1, 34
+; LE-NEXT:    xxsldwi 2, 34, 34, 1
+; LE-NEXT:    xxsldwi 3, 34, 34, 3
 ; LE-NEXT:    xxswapd 0, 35
-; LE-NEXT:    mtvsrd 37, 4
-; LE-NEXT:    mffprwz 4, 1
-; LE-NEXT:    xxsldwi 1, 35, 35, 1
-; LE-NEXT:    mtvsrd 34, 3
-; LE-NEXT:    mffprwz 3, 2
-; LE-NEXT:    mtvsrd 32, 4
-; LE-NEXT:    mffprwz 4, 0
-; LE-NEXT:    xxsldwi 0, 35, 35, 3
-; LE-NEXT:    mtvsrd 33, 3
+; LE-NEXT:    xxsldwi 4, 35, 35, 1
+; LE-NEXT:    xxsldwi 5, 35, 35, 3
 ; LE-NEXT:    mffprwz 3, 1
-; LE-NEXT:    mtvsrd 38, 4
-; LE-NEXT:    mtvsrd 35, 3
+; LE-NEXT:    mtvsrd 36, 3
+; LE-NEXT:    mffprwz 3, 2
+; LE-NEXT:    mtvsrd 37, 3
+; LE-NEXT:    mfvsrwz 3, 34
+; LE-NEXT:    mtvsrd 34, 3
+; LE-NEXT:    mffprwz 3, 3
+; LE-NEXT:    vmrghh 4, 5, 4
+; LE-NEXT:    mtvsrd 37, 3
 ; LE-NEXT:    mffprwz 3, 0
-; LE-NEXT:    vmrghh 2, 0, 2
+; LE-NEXT:    vmrghh 2, 5, 2
+; LE-NEXT:    mtvsrd 37, 3
+; LE-NEXT:    mffprwz 3, 4
+; LE-NEXT:    mtvsrd 32, 3
+; LE-NEXT:    mfvsrwz 3, 35
+; LE-NEXT:    mtvsrd 35, 3
+; LE-NEXT:    mffprwz 3, 5
+; LE-NEXT:    xxmrglw 0, 34, 36
+; LE-NEXT:    vmrghh 5, 0, 5
 ; LE-NEXT:    mtvsrd 32, 3
 ; LE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
-; LE-NEXT:    vmrghh 4, 1, 4
 ; LE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; LE-NEXT:    vmrghh 3, 3, 6
-; LE-NEXT:    lxvd2x 2, 0, 3
-; LE-NEXT:    vmrghh 5, 0, 5
-; LE-NEXT:    xxmrglw 0, 36, 34
-; LE-NEXT:    xxmrglw 1, 37, 35
-; LE-NEXT:    xxswapd 35, 2
+; LE-NEXT:    vmrghh 3, 0, 3
+; LE-NEXT:    xxmrglw 1, 35, 37
 ; LE-NEXT:    xxmrgld 34, 1, 0
+; LE-NEXT:    lxvd2x 0, 0, 3
+; LE-NEXT:    xxswapd 35, 0
 ; LE-NEXT:    xxlor 34, 34, 35
 ; LE-NEXT:    blr
 ;
@@ -55,43 +55,43 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; BE-NEXT:    xxlxor 36, 36, 36
 ; BE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
 ; BE-NEXT:    lxvw4x 0, 0, 3
+; BE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
+; BE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
 ; BE-NEXT:    xxland 35, 35, 0
 ; BE-NEXT:    xxland 34, 34, 0
 ; BE-NEXT:    vcmpequw 3, 3, 4
 ; BE-NEXT:    vcmpequw 2, 2, 4
-; BE-NEXT:    xxswapd 0, 35
-; BE-NEXT:    mfvsrwz 3, 35
-; BE-NEXT:    xxsldwi 1, 35, 35, 1
-; BE-NEXT:    mfvsrwz 4, 34
-; BE-NEXT:    mtvsrwz 36, 3
-; BE-NEXT:    xxsldwi 2, 35, 35, 3
-; BE-NEXT:    mffprwz 3, 0
+; BE-NEXT:    lxvw4x 36, 0, 3
+; BE-NEXT:    xxswapd 1, 35
+; BE-NEXT:    xxsldwi 2, 35, 35, 1
+; BE-NEXT:    xxsldwi 3, 35, 35, 3
 ; BE-NEXT:    xxswapd 0, 34
-; BE-NEXT:    mtvsrwz 35, 4
-; BE-NEXT:    mffprwz 4, 1
-; BE-NEXT:    xxsldwi 1, 34, 34, 1
-; BE-NEXT:    mtvsrwz 37, 3
-; BE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
-; BE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; BE-NEXT:    mtvsrwz 32, 4
-; BE-NEXT:    mffprwz 4, 0
-; BE-NEXT:    lxvw4x 33, 0, 3
-; BE-NEXT:    xxsldwi 0, 34, 34, 3
+; BE-NEXT:    xxsldwi 4, 34, 34, 1
+; BE-NEXT:    xxsldwi 5, 34, 34, 3
 ; BE-NEXT:    mffprwz 3, 1
-; BE-NEXT:    mffprwz 5, 2
-; BE-NEXT:    vperm 2, 0, 5, 1
 ; BE-NEXT:    mtvsrwz 37, 3
+; BE-NEXT:    mffprwz 3, 2
+; BE-NEXT:    mtvsrwz 32, 3
+; BE-NEXT:    mfvsrwz 3, 35
+; BE-NEXT:    mtvsrwz 35, 3
+; BE-NEXT:    mffprwz 3, 3
+; BE-NEXT:    vperm 5, 0, 5, 4
+; BE-NEXT:    mtvsrwz 32, 3
 ; BE-NEXT:    mffprwz 3, 0
-; BE-NEXT:    mtvsrwz 38, 5
-; BE-NEXT:    mtvsrwz 39, 4
+; BE-NEXT:    vperm 3, 0, 3, 4
 ; BE-NEXT:    mtvsrwz 32, 3
+; BE-NEXT:    mffprwz 3, 4
+; BE-NEXT:    mtvsrwz 33, 3
+; BE-NEXT:    mfvsrwz 3, 34
+; BE-NEXT:    mtvsrwz 34, 3
+; BE-NEXT:    mffprwz 3, 5
+; BE-NEXT:    xxmrghw 0, 35, 37
+; BE-NEXT:    vperm 0, 1, 0, 4
+; BE-NEXT:    mtvsrwz 33, 3
 ; BE-NEXT:    addis 3, 2, .LCPI0_2 at toc@ha
-; BE-NEXT:    vperm 4, 6, 4, 1
 ; BE-NEXT:    addi 3, 3, .LCPI0_2 at toc@l
-; BE-NEXT:    vperm 5, 5, 7, 1
-; BE-NEXT:    vperm 3, 0, 3, 1
-; BE-NEXT:    xxmrghw 0, 36, 34
-; BE-NEXT:    xxmrghw 1, 35, 37
+; BE-NEXT:    vperm 2, 1, 2, 4
+; BE-NEXT:    xxmrghw 1, 34, 32
 ; BE-NEXT:    xxmrghd 34, 1, 0
 ; BE-NEXT:    lxvw4x 0, 0, 3
 ; BE-NEXT:    xxlor 34, 34, 0

diff  --git a/llvm/test/CodeGen/PowerPC/pr27078.ll b/llvm/test/CodeGen/PowerPC/pr27078.ll
index 6036e4e5830004e..ee4d4ff9c6c790a 100644
--- a/llvm/test/CodeGen/PowerPC/pr27078.ll
+++ b/llvm/test/CodeGen/PowerPC/pr27078.ll
@@ -5,24 +5,24 @@ define <4 x float> @bar(ptr %p, ptr %q) {
 ; CHECK-LABEL: bar:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li 5, 16
-; CHECK-NEXT:    lxvw4x 2, 0, 3
+; CHECK-NEXT:    lxvw4x 1, 0, 3
 ; CHECK-NEXT:    lxvw4x 3, 0, 4
-; CHECK-NEXT:    addis 6, 2, .LCPI0_0 at toc@ha
+; CHECK-NEXT:    xvsubsp 35, 3, 1
 ; CHECK-NEXT:    lxvw4x 0, 3, 5
-; CHECK-NEXT:    lxvw4x 1, 4, 5
+; CHECK-NEXT:    lxvw4x 2, 4, 5
+; CHECK-NEXT:    addis 5, 2, .LCPI0_0 at toc@ha
+; CHECK-NEXT:    addi 5, 5, .LCPI0_0 at toc@l
+; CHECK-NEXT:    lxvw4x 36, 0, 5
 ; CHECK-NEXT:    li 5, 32
-; CHECK-NEXT:    xvsubsp 35, 3, 2
-; CHECK-NEXT:    xvsubsp 34, 1, 0
+; CHECK-NEXT:    xvsubsp 34, 2, 0
 ; CHECK-NEXT:    lxvw4x 0, 3, 5
-; CHECK-NEXT:    addi 3, 6, .LCPI0_0 at toc@l
 ; CHECK-NEXT:    lxvw4x 1, 4, 5
-; CHECK-NEXT:    lxvw4x 36, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; CHECK-NEXT:    xvsubsp 37, 1, 0
 ; CHECK-NEXT:    vperm 2, 3, 2, 4
-; CHECK-NEXT:    lxvw4x 35, 0, 3
-; CHECK-NEXT:    vperm 2, 2, 5, 3
+; CHECK-NEXT:    xvsubsp 35, 1, 0
+; CHECK-NEXT:    lxvw4x 36, 0, 3
+; CHECK-NEXT:    vperm 2, 2, 3, 4
 ; CHECK-NEXT:    blr
   %1 = load <12 x float>, ptr %p, align 16
   %2 = load <12 x float>, ptr %q, align 16

diff  --git a/llvm/test/CodeGen/PowerPC/pr33093.ll b/llvm/test/CodeGen/PowerPC/pr33093.ll
index 2a8da6ef62b9f1c..4992ae287b5a691 100644
--- a/llvm/test/CodeGen/PowerPC/pr33093.ll
+++ b/llvm/test/CodeGen/PowerPC/pr33093.ll
@@ -7,30 +7,30 @@ define zeroext i32 @ReverseBits(i32 zeroext %n) {
 ; CHECK-LABEL: ReverseBits:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lis 4, -21846
-; CHECK-NEXT:    lis 5, 21845
-; CHECK-NEXT:    slwi 6, 3, 1
+; CHECK-NEXT:    slwi 5, 3, 1
 ; CHECK-NEXT:    srwi 3, 3, 1
 ; CHECK-NEXT:    ori 4, 4, 43690
+; CHECK-NEXT:    and 4, 5, 4
+; CHECK-NEXT:    lis 5, 21845
 ; CHECK-NEXT:    ori 5, 5, 21845
-; CHECK-NEXT:    and 4, 6, 4
 ; CHECK-NEXT:    and 3, 3, 5
-; CHECK-NEXT:    lis 5, 13107
+; CHECK-NEXT:    lis 5, -13108
 ; CHECK-NEXT:    or 3, 3, 4
-; CHECK-NEXT:    lis 4, -13108
-; CHECK-NEXT:    ori 5, 5, 13107
-; CHECK-NEXT:    slwi 6, 3, 2
-; CHECK-NEXT:    ori 4, 4, 52428
+; CHECK-NEXT:    ori 5, 5, 52428
+; CHECK-NEXT:    slwi 4, 3, 2
 ; CHECK-NEXT:    srwi 3, 3, 2
-; CHECK-NEXT:    and 4, 6, 4
+; CHECK-NEXT:    and 4, 4, 5
+; CHECK-NEXT:    lis 5, 13107
+; CHECK-NEXT:    ori 5, 5, 13107
 ; CHECK-NEXT:    and 3, 3, 5
-; CHECK-NEXT:    lis 5, 3855
+; CHECK-NEXT:    lis 5, -3856
 ; CHECK-NEXT:    or 3, 3, 4
-; CHECK-NEXT:    lis 4, -3856
-; CHECK-NEXT:    ori 5, 5, 3855
-; CHECK-NEXT:    slwi 6, 3, 4
-; CHECK-NEXT:    ori 4, 4, 61680
+; CHECK-NEXT:    ori 5, 5, 61680
+; CHECK-NEXT:    slwi 4, 3, 4
 ; CHECK-NEXT:    srwi 3, 3, 4
-; CHECK-NEXT:    and 4, 6, 4
+; CHECK-NEXT:    and 4, 4, 5
+; CHECK-NEXT:    lis 5, 3855
+; CHECK-NEXT:    ori 5, 5, 3855
 ; CHECK-NEXT:    and 3, 3, 5
 ; CHECK-NEXT:    or 3, 3, 4
 ; CHECK-NEXT:    rotlwi 4, 3, 24
@@ -71,59 +71,59 @@ define i64 @ReverseBits64(i64 %n) {
 ; CHECK-LABEL: ReverseBits64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lis 4, -21846
-; CHECK-NEXT:    lis 5, 21845
-; CHECK-NEXT:    lis 7, -13108
-; CHECK-NEXT:    lis 8, 13107
+; CHECK-NEXT:    sldi 5, 3, 1
+; CHECK-NEXT:    rldicl 3, 3, 63, 1
 ; CHECK-NEXT:    ori 4, 4, 43690
-; CHECK-NEXT:    ori 5, 5, 21845
-; CHECK-NEXT:    ori 7, 7, 52428
-; CHECK-NEXT:    ori 8, 8, 13107
 ; CHECK-NEXT:    sldi 4, 4, 32
-; CHECK-NEXT:    sldi 5, 5, 32
 ; CHECK-NEXT:    oris 4, 4, 43690
-; CHECK-NEXT:    oris 5, 5, 21845
-; CHECK-NEXT:    sldi 6, 3, 1
-; CHECK-NEXT:    rldicl 3, 3, 63, 1
 ; CHECK-NEXT:    ori 4, 4, 43690
+; CHECK-NEXT:    and 4, 5, 4
+; CHECK-NEXT:    lis 5, 21845
+; CHECK-NEXT:    ori 5, 5, 21845
+; CHECK-NEXT:    sldi 5, 5, 32
+; CHECK-NEXT:    oris 5, 5, 21845
 ; CHECK-NEXT:    ori 5, 5, 21845
-; CHECK-NEXT:    sldi 7, 7, 32
-; CHECK-NEXT:    sldi 8, 8, 32
-; CHECK-NEXT:    and 4, 6, 4
 ; CHECK-NEXT:    and 3, 3, 5
-; CHECK-NEXT:    lis 5, -3856
-; CHECK-NEXT:    oris 6, 7, 52428
-; CHECK-NEXT:    oris 7, 8, 13107
+; CHECK-NEXT:    lis 5, -13108
+; CHECK-NEXT:    ori 5, 5, 52428
 ; CHECK-NEXT:    or 3, 3, 4
-; CHECK-NEXT:    lis 4, 3855
-; CHECK-NEXT:    ori 5, 5, 61680
-; CHECK-NEXT:    ori 6, 6, 52428
-; CHECK-NEXT:    ori 7, 7, 13107
-; CHECK-NEXT:    ori 4, 4, 3855
-; CHECK-NEXT:    sldi 8, 3, 2
+; CHECK-NEXT:    sldi 5, 5, 32
+; CHECK-NEXT:    sldi 4, 3, 2
 ; CHECK-NEXT:    rldicl 3, 3, 62, 2
-; CHECK-NEXT:    and 6, 8, 6
-; CHECK-NEXT:    and 3, 3, 7
+; CHECK-NEXT:    oris 5, 5, 52428
+; CHECK-NEXT:    ori 5, 5, 52428
+; CHECK-NEXT:    and 4, 4, 5
+; CHECK-NEXT:    lis 5, 13107
+; CHECK-NEXT:    ori 5, 5, 13107
 ; CHECK-NEXT:    sldi 5, 5, 32
-; CHECK-NEXT:    sldi 4, 4, 32
-; CHECK-NEXT:    or 3, 3, 6
-; CHECK-NEXT:    oris 5, 5, 61680
-; CHECK-NEXT:    oris 4, 4, 3855
-; CHECK-NEXT:    sldi 6, 3, 4
+; CHECK-NEXT:    oris 5, 5, 13107
+; CHECK-NEXT:    ori 5, 5, 13107
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    lis 5, -3856
 ; CHECK-NEXT:    ori 5, 5, 61680
-; CHECK-NEXT:    ori 4, 4, 3855
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    sldi 5, 5, 32
+; CHECK-NEXT:    sldi 4, 3, 4
 ; CHECK-NEXT:    rldicl 3, 3, 60, 4
-; CHECK-NEXT:    and 5, 6, 5
-; CHECK-NEXT:    and 3, 3, 4
-; CHECK-NEXT:    or 3, 3, 5
+; CHECK-NEXT:    oris 5, 5, 61680
+; CHECK-NEXT:    ori 5, 5, 61680
+; CHECK-NEXT:    and 4, 4, 5
+; CHECK-NEXT:    lis 5, 3855
+; CHECK-NEXT:    ori 5, 5, 3855
+; CHECK-NEXT:    sldi 5, 5, 32
+; CHECK-NEXT:    oris 5, 5, 3855
+; CHECK-NEXT:    ori 5, 5, 3855
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    or 3, 3, 4
 ; CHECK-NEXT:    rldicl 4, 3, 32, 32
-; CHECK-NEXT:    rotlwi 5, 3, 24
-; CHECK-NEXT:    rotlwi 6, 4, 24
-; CHECK-NEXT:    rlwimi 5, 3, 8, 8, 15
-; CHECK-NEXT:    rlwimi 5, 3, 8, 24, 31
-; CHECK-NEXT:    rlwimi 6, 4, 8, 8, 15
-; CHECK-NEXT:    rlwimi 6, 4, 8, 24, 31
-; CHECK-NEXT:    sldi 3, 5, 32
-; CHECK-NEXT:    or 3, 3, 6
+; CHECK-NEXT:    rotlwi 5, 4, 24
+; CHECK-NEXT:    rlwimi 5, 4, 8, 8, 15
+; CHECK-NEXT:    rlwimi 5, 4, 8, 24, 31
+; CHECK-NEXT:    rotlwi 4, 3, 24
+; CHECK-NEXT:    rlwimi 4, 3, 8, 8, 15
+; CHECK-NEXT:    rlwimi 4, 3, 8, 24, 31
+; CHECK-NEXT:    sldi 3, 4, 32
+; CHECK-NEXT:    or 3, 3, 5
 ; CHECK-NEXT:    blr
 entry:
   %shr = lshr i64 %n, 1

diff  --git a/llvm/test/CodeGen/PowerPC/pr33547.ll b/llvm/test/CodeGen/PowerPC/pr33547.ll
index 1a41b3d6522a6e5..057f8b2b833edea 100644
--- a/llvm/test/CodeGen/PowerPC/pr33547.ll
+++ b/llvm/test/CodeGen/PowerPC/pr33547.ll
@@ -16,9 +16,9 @@ define void @main() {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LC1 at toc@ha
-; CHECK-NEXT:    ld 3, .LC0 at toc@l(3)
-; CHECK-NEXT:    ld 4, .LC1 at toc@l(4)
+; CHECK-NEXT:    addis 5, 2, .LC1 at toc@ha
+; CHECK-NEXT:    ld 4, .LC0 at toc@l(3)
+; CHECK-NEXT:    ld 3, .LC1 at toc@l(5)
 ; CHECK-NEXT:    addi 3, 3, 124
 ; CHECK-NEXT:    bl testFunc
 ; CHECK-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/pr35402.ll b/llvm/test/CodeGen/PowerPC/pr35402.ll
index 566beeddeef4cf6..bcbae3bbbb97359 100644
--- a/llvm/test/CodeGen/PowerPC/pr35402.ll
+++ b/llvm/test/CodeGen/PowerPC/pr35402.ll
@@ -7,15 +7,15 @@ define void @test(ptr %p, i64 %data) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    rotldi 5, 4, 16
 ; CHECK-NEXT:    rldicl 6, 4, 8, 56
-; CHECK-NEXT:    rotldi 7, 4, 24
 ; CHECK-NEXT:    rldimi 6, 5, 8, 48
-; CHECK-NEXT:    rldimi 6, 7, 16, 40
+; CHECK-NEXT:    rotldi 5, 4, 24
+; CHECK-NEXT:    rldimi 6, 5, 16, 40
 ; CHECK-NEXT:    rotldi 5, 4, 32
-; CHECK-NEXT:    rlwinm 7, 4, 8, 24, 31
 ; CHECK-NEXT:    rldimi 6, 5, 24, 32
-; CHECK-NEXT:    rlwimi 7, 4, 24, 16, 23
-; CHECK-NEXT:    sth 7, 4(3)
+; CHECK-NEXT:    rlwinm 5, 4, 8, 24, 31
+; CHECK-NEXT:    rlwimi 5, 4, 24, 16, 23
 ; CHECK-NEXT:    stw 6, 0(3)
+; CHECK-NEXT:    sth 5, 4(3)
 ; CHECK-NEXT:    blr
 entry:
   %0 = tail call i64 @llvm.bswap.i64(i64 %data)

diff  --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll
index c1bfb5227ddb728..1794b3ba526ed69 100644
--- a/llvm/test/CodeGen/PowerPC/pr36292.ll
+++ b/llvm/test/CodeGen/PowerPC/pr36292.ll
@@ -30,8 +30,8 @@ define void @test() nounwind comdat {
 ; CHECK-NEXT:    xxlxor 1, 1, 1
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi 30, 30, 1
 ; CHECK-NEXT:    stfs 1, 0(3)
+; CHECK-NEXT:    addi 30, 30, 1
 ; CHECK-NEXT:    b .LBB0_1
 ; CHECK-NEXT:  .LBB0_3: # %bounds.fail
 ; CHECK-NEXT:    std 30, 32(1)

diff  --git a/llvm/test/CodeGen/PowerPC/pr45628.ll b/llvm/test/CodeGen/PowerPC/pr45628.ll
index ceda941d8ab3e32..9d11b1f0402c9da 100644
--- a/llvm/test/CodeGen/PowerPC/pr45628.ll
+++ b/llvm/test/CodeGen/PowerPC/pr45628.ll
@@ -232,14 +232,14 @@ define <1 x i128> @rotl_28(<1 x i128> %num) {
 ; P8-VSX-LABEL: rotl_28:
 ; P8-VSX:       # %bb.0: # %entry
 ; P8-VSX-NEXT:    xxswapd vs0, v2
-; P8-VSX-NEXT:    mfvsrd r3, v2
-; P8-VSX-NEXT:    rotldi r5, r3, 28
-; P8-VSX-NEXT:    mffprd r4, f0
-; P8-VSX-NEXT:    rldimi r5, r4, 28, 0
-; P8-VSX-NEXT:    rotldi r4, r4, 28
-; P8-VSX-NEXT:    rldimi r4, r3, 28, 0
+; P8-VSX-NEXT:    mfvsrd r4, v2
+; P8-VSX-NEXT:    rotldi r5, r4, 28
+; P8-VSX-NEXT:    mffprd r3, f0
+; P8-VSX-NEXT:    rldimi r5, r3, 28, 0
+; P8-VSX-NEXT:    rotldi r3, r3, 28
+; P8-VSX-NEXT:    rldimi r3, r4, 28, 0
 ; P8-VSX-NEXT:    mtfprd f0, r5
-; P8-VSX-NEXT:    mtfprd f1, r4
+; P8-VSX-NEXT:    mtfprd f1, r3
 ; P8-VSX-NEXT:    xxmrghd v2, vs1, vs0
 ; P8-VSX-NEXT:    blr
 ;
@@ -247,15 +247,15 @@ define <1 x i128> @rotl_28(<1 x i128> %num) {
 ; P8-NOVSX:       # %bb.0: # %entry
 ; P8-NOVSX-NEXT:    addi r3, r1, -32
 ; P8-NOVSX-NEXT:    stvx v2, 0, r3
-; P8-NOVSX-NEXT:    ld r3, -24(r1)
 ; P8-NOVSX-NEXT:    ld r4, -32(r1)
+; P8-NOVSX-NEXT:    ld r3, -24(r1)
 ; P8-NOVSX-NEXT:    rotldi r5, r4, 28
-; P8-NOVSX-NEXT:    rotldi r6, r3, 28
 ; P8-NOVSX-NEXT:    rldimi r5, r3, 28, 0
-; P8-NOVSX-NEXT:    rldimi r6, r4, 28, 0
-; P8-NOVSX-NEXT:    addi r3, r1, -16
+; P8-NOVSX-NEXT:    rotldi r3, r3, 28
+; P8-NOVSX-NEXT:    rldimi r3, r4, 28, 0
 ; P8-NOVSX-NEXT:    std r5, -8(r1)
-; P8-NOVSX-NEXT:    std r6, -16(r1)
+; P8-NOVSX-NEXT:    std r3, -16(r1)
+; P8-NOVSX-NEXT:    addi r3, r1, -16
 ; P8-NOVSX-NEXT:    lvx v2, 0, r3
 ; P8-NOVSX-NEXT:    blr
 entry:
@@ -303,36 +303,36 @@ define <1 x i128> @NO_rotl(<1 x i128> %num) {
 ; P8-VSX-LABEL: NO_rotl:
 ; P8-VSX:       # %bb.0: # %entry
 ; P8-VSX-NEXT:    xxswapd vs0, v2
+; P8-VSX-NEXT:    mfvsrd r4, v2
+; P8-VSX-NEXT:    mffprd r3, f0
+; P8-VSX-NEXT:    rotldi r5, r3, 20
+; P8-VSX-NEXT:    sldi r3, r3, 20
+; P8-VSX-NEXT:    rldimi r5, r4, 20, 0
+; P8-VSX-NEXT:    mtfprd f0, r3
 ; P8-VSX-NEXT:    li r3, 0
-; P8-VSX-NEXT:    mfvsrd r5, v2
-; P8-VSX-NEXT:    mffprd r4, f0
+; P8-VSX-NEXT:    mtfprd f1, r5
+; P8-VSX-NEXT:    xxmrghd v2, vs1, vs0
 ; P8-VSX-NEXT:    mtfprd f0, r3
-; P8-VSX-NEXT:    rotldi r3, r4, 20
-; P8-VSX-NEXT:    sldi r4, r4, 20
-; P8-VSX-NEXT:    rldimi r3, r5, 20, 0
-; P8-VSX-NEXT:    mtfprd f1, r4
-; P8-VSX-NEXT:    rldicl r4, r5, 28, 36
-; P8-VSX-NEXT:    mtfprd f2, r3
-; P8-VSX-NEXT:    mtfprd f3, r4
-; P8-VSX-NEXT:    xxmrghd v2, vs2, vs1
-; P8-VSX-NEXT:    xxmrghd v3, vs0, vs3
+; P8-VSX-NEXT:    rldicl r3, r4, 28, 36
+; P8-VSX-NEXT:    mtfprd f1, r3
+; P8-VSX-NEXT:    xxmrghd v3, vs0, vs1
 ; P8-VSX-NEXT:    xxlor v2, v2, v3
 ; P8-VSX-NEXT:    blr
 ;
 ; P8-NOVSX-LABEL: NO_rotl:
 ; P8-NOVSX:       # %bb.0: # %entry
 ; P8-NOVSX-NEXT:    addis r3, r2, .LCPI8_0 at toc@ha
-; P8-NOVSX-NEXT:    addis r4, r2, .LCPI8_1 at toc@ha
 ; P8-NOVSX-NEXT:    addi r3, r3, .LCPI8_0 at toc@l
 ; P8-NOVSX-NEXT:    lvx v3, 0, r3
-; P8-NOVSX-NEXT:    addi r3, r4, .LCPI8_1 at toc@l
-; P8-NOVSX-NEXT:    lvx v4, 0, r3
-; P8-NOVSX-NEXT:    vslo v5, v2, v3
+; P8-NOVSX-NEXT:    addis r3, r2, .LCPI8_1 at toc@ha
+; P8-NOVSX-NEXT:    addi r3, r3, .LCPI8_1 at toc@l
+; P8-NOVSX-NEXT:    lvx v5, 0, r3
+; P8-NOVSX-NEXT:    vslo v4, v2, v3
 ; P8-NOVSX-NEXT:    vspltb v3, v3, 15
-; P8-NOVSX-NEXT:    vsro v2, v2, v4
-; P8-NOVSX-NEXT:    vspltb v4, v4, 15
-; P8-NOVSX-NEXT:    vsl v3, v5, v3
-; P8-NOVSX-NEXT:    vsr v2, v2, v4
+; P8-NOVSX-NEXT:    vsl v3, v4, v3
+; P8-NOVSX-NEXT:    vsro v2, v2, v5
+; P8-NOVSX-NEXT:    vspltb v5, v5, 15
+; P8-NOVSX-NEXT:    vsr v2, v2, v5
 ; P8-NOVSX-NEXT:    vor v2, v3, v2
 ; P8-NOVSX-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
index 16186480fe00ada..d1129b1825aeef3 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -30,20 +30,20 @@ define void @foo(i32 %vla_size) #0 {
 ; CHECK-LE-NEXT:    .cfi_offset r31, -8
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
 ; CHECK-LE-NEXT:    clrldi r3, r3, 32
-; CHECK-LE-NEXT:    li r5, -2048
+; CHECK-LE-NEXT:    li r4, -2048
+; CHECK-LE-NEXT:    li r6, -4096
 ; CHECK-LE-NEXT:    mr r31, r1
 ; CHECK-LE-NEXT:    addi r3, r3, 15
 ; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-LE-NEXT:    rldicl r3, r3, 4, 31
-; CHECK-LE-NEXT:    neg r4, r3
+; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    ld r3, 0(r1)
-; CHECK-LE-NEXT:    and r5, r4, r5
-; CHECK-LE-NEXT:    mr r4, r5
-; CHECK-LE-NEXT:    li r5, -4096
-; CHECK-LE-NEXT:    divd r6, r4, r5
-; CHECK-LE-NEXT:    mulld r5, r6, r5
-; CHECK-LE-NEXT:    sub r5, r4, r5
-; CHECK-LE-NEXT:    add r4, r1, r4
+; CHECK-LE-NEXT:    and r4, r5, r4
+; CHECK-LE-NEXT:    mr r5, r4
+; CHECK-LE-NEXT:    divd r7, r5, r6
+; CHECK-LE-NEXT:    add r4, r1, r5
+; CHECK-LE-NEXT:    mulld r6, r7, r6
+; CHECK-LE-NEXT:    sub r5, r5, r6
 ; CHECK-LE-NEXT:    stdux r3, r1, r5
 ; CHECK-LE-NEXT:    cmpd r1, r4
 ; CHECK-LE-NEXT:    beq cr0, .LBB0_4

diff  --git a/llvm/test/CodeGen/PowerPC/pr47707.ll b/llvm/test/CodeGen/PowerPC/pr47707.ll
index 047087ed71a27d4..4a99eef4cbb3fc9 100644
--- a/llvm/test/CodeGen/PowerPC/pr47707.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47707.ll
@@ -7,13 +7,12 @@ target triple = "powerpc64le-grtev4-linux-gnu"
 define void @foo(ptr %p1, i64 %v1, i8 %v2, i64 %v3) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    li 7, 0
+; CHECK-NEXT:    std 7, 0(3)
 ; CHECK-NEXT:    mr 7, 5
 ; CHECK-NEXT:    rldimi. 7, 4, 8, 0
-; CHECK-NEXT:    mcrf 1, 0
+; CHECK-NEXT:    crnot 20, 2
 ; CHECK-NEXT:    andi. 5, 5, 1
-; CHECK-NEXT:    li 5, 0
-; CHECK-NEXT:    std 5, 0(3)
-; CHECK-NEXT:    crnot 20, 6
 ; CHECK-NEXT:    bc 4, 1, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %bb1
 ; CHECK-NEXT:    std 4, 0(3)

diff  --git a/llvm/test/CodeGen/PowerPC/pr47830.ll b/llvm/test/CodeGen/PowerPC/pr47830.ll
index bd320907a341872..3c0a077bdc39c6b 100644
--- a/llvm/test/CodeGen/PowerPC/pr47830.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47830.ll
@@ -5,8 +5,8 @@
 define i64 @f(i64 %a, i64 %b) {
 ; CHECK-LABEL: f:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    sub r5, r3, r4
 ; CHECK-NEXT:    cmpd r3, r4
+; CHECK-NEXT:    sub r5, r3, r4
 ; CHECK-NEXT:    isellt r3, 0, r5
 ; CHECK-NEXT:    blr
   %c = icmp slt i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/pr47891.ll b/llvm/test/CodeGen/PowerPC/pr47891.ll
index 13626638639a590..0949b814a131017 100644
--- a/llvm/test/CodeGen/PowerPC/pr47891.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47891.ll
@@ -8,55 +8,55 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 {
 ; CHECK-LABEL: poly2_lshift1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li r4, 72
+; CHECK-NEXT:    addis r6, r2, .LCPI0_1 at toc@ha
 ; CHECK-NEXT:    ld r5, 64(r3)
-; CHECK-NEXT:    addis r6, r2, .LCPI0_0 at toc@ha
-; CHECK-NEXT:    addis r7, r2, .LCPI0_1 at toc@ha
-; CHECK-NEXT:    ld r8, 0(r3)
-; CHECK-NEXT:    ld r10, 24(r3)
-; CHECK-NEXT:    ld r11, 32(r3)
 ; CHECK-NEXT:    lxvd2x vs0, r3, r4
+; CHECK-NEXT:    addi r6, r6, .LCPI0_1 at toc@l
+; CHECK-NEXT:    lxvd2x v4, 0, r6
+; CHECK-NEXT:    addis r6, r2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    addi r6, r6, .LCPI0_0 at toc@l
-; CHECK-NEXT:    addi r7, r7, .LCPI0_1 at toc@l
-; CHECK-NEXT:    ld r12, 56(r3)
-; CHECK-NEXT:    lxvd2x v3, 0, r6
-; CHECK-NEXT:    lxvd2x v5, 0, r7
-; CHECK-NEXT:    ld r6, 8(r3)
-; CHECK-NEXT:    ld r7, 16(r3)
-; CHECK-NEXT:    rotldi r9, r8, 1
-; CHECK-NEXT:    sldi r8, r8, 1
-; CHECK-NEXT:    std r8, 0(r3)
-; CHECK-NEXT:    rotldi r8, r10, 1
 ; CHECK-NEXT:    xxswapd v2, vs0
 ; CHECK-NEXT:    mtfprd f0, r5
-; CHECK-NEXT:    rldimi r9, r6, 1, 0
+; CHECK-NEXT:    xxpermdi v3, v2, vs0, 2
+; CHECK-NEXT:    vsld v2, v2, v4
+; CHECK-NEXT:    lxvd2x v4, 0, r6
+; CHECK-NEXT:    ld r6, 0(r3)
+; CHECK-NEXT:    sldi r7, r6, 1
+; CHECK-NEXT:    rotldi r6, r6, 1
+; CHECK-NEXT:    std r7, 0(r3)
+; CHECK-NEXT:    ld r7, 8(r3)
+; CHECK-NEXT:    vsrd v3, v3, v4
+; CHECK-NEXT:    xxlor vs0, v2, v3
+; CHECK-NEXT:    rldimi r6, r7, 1, 0
+; CHECK-NEXT:    rotldi r7, r7, 1
+; CHECK-NEXT:    std r6, 8(r3)
+; CHECK-NEXT:    ld r6, 16(r3)
+; CHECK-NEXT:    rldimi r7, r6, 1, 0
 ; CHECK-NEXT:    rotldi r6, r6, 1
+; CHECK-NEXT:    std r7, 16(r3)
+; CHECK-NEXT:    ld r7, 24(r3)
 ; CHECK-NEXT:    rldimi r6, r7, 1, 0
 ; CHECK-NEXT:    rotldi r7, r7, 1
-; CHECK-NEXT:    std r9, 8(r3)
-; CHECK-NEXT:    ld r9, 40(r3)
-; CHECK-NEXT:    rldimi r7, r10, 1, 0
-; CHECK-NEXT:    rldimi r8, r11, 1, 0
-; CHECK-NEXT:    std r6, 16(r3)
-; CHECK-NEXT:    xxpermdi v4, v2, vs0, 2
-; CHECK-NEXT:    vsld v2, v2, v5
-; CHECK-NEXT:    rotldi r10, r11, 1
-; CHECK-NEXT:    ld r11, 48(r3)
-; CHECK-NEXT:    std r7, 24(r3)
-; CHECK-NEXT:    rotldi r7, r12, 1
-; CHECK-NEXT:    rldimi r10, r9, 1, 0
-; CHECK-NEXT:    rotldi r9, r9, 1
-; CHECK-NEXT:    std r8, 32(r3)
-; CHECK-NEXT:    rotldi r6, r11, 1
-; CHECK-NEXT:    rldimi r9, r11, 1, 0
-; CHECK-NEXT:    std r10, 40(r3)
-; CHECK-NEXT:    vsrd v3, v4, v3
-; CHECK-NEXT:    rldimi r6, r12, 1, 0
-; CHECK-NEXT:    rldimi r7, r5, 1, 0
-; CHECK-NEXT:    std r9, 48(r3)
+; CHECK-NEXT:    std r6, 24(r3)
+; CHECK-NEXT:    ld r6, 32(r3)
+; CHECK-NEXT:    rldimi r7, r6, 1, 0
+; CHECK-NEXT:    rotldi r6, r6, 1
+; CHECK-NEXT:    std r7, 32(r3)
+; CHECK-NEXT:    ld r7, 40(r3)
+; CHECK-NEXT:    rldimi r6, r7, 1, 0
+; CHECK-NEXT:    rotldi r7, r7, 1
+; CHECK-NEXT:    std r6, 40(r3)
+; CHECK-NEXT:    ld r6, 48(r3)
+; CHECK-NEXT:    rldimi r7, r6, 1, 0
+; CHECK-NEXT:    rotldi r6, r6, 1
+; CHECK-NEXT:    std r7, 48(r3)
+; CHECK-NEXT:    ld r7, 56(r3)
+; CHECK-NEXT:    rldimi r6, r7, 1, 0
 ; CHECK-NEXT:    std r6, 56(r3)
-; CHECK-NEXT:    std r7, 64(r3)
-; CHECK-NEXT:    xxlor vs0, v2, v3
+; CHECK-NEXT:    rotldi r6, r7, 1
 ; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    rldimi r6, r5, 1, 0
+; CHECK-NEXT:    std r6, 64(r3)
 ; CHECK-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/pr48388.ll b/llvm/test/CodeGen/PowerPC/pr48388.ll
index 822e5d85231719e..04efd989f5e86ca 100644
--- a/llvm/test/CodeGen/PowerPC/pr48388.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48388.ll
@@ -5,25 +5,25 @@
 define i64 @julia_div_i64(i64 %0, i64 %1) local_unnamed_addr #0 {
 ; CHECK-LABEL: julia_div_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    divd r6, r3, r4
-; CHECK-NEXT:    lis r5, -1592
-; CHECK-NEXT:    ori r7, r5, 21321
-; CHECK-NEXT:    ori r5, r5, 65519
+; CHECK-NEXT:    divd r5, r3, r4
+; CHECK-NEXT:    lis r6, -1592
 ; CHECK-NEXT:    cmpdi r3, 0
+; CHECK-NEXT:    ori r7, r6, 21321
+; CHECK-NEXT:    ori r6, r6, 65519
 ; CHECK-NEXT:    rldic r7, r7, 4, 17
-; CHECK-NEXT:    rldic r5, r5, 4, 17
-; CHECK-NEXT:    iselgt r9, r5, r7
+; CHECK-NEXT:    rldic r6, r6, 4, 17
+; CHECK-NEXT:    iselgt r8, r6, r7
 ; CHECK-NEXT:    cmpdi r4, 0
-; CHECK-NEXT:    mulld r8, r6, r4
-; CHECK-NEXT:    iselgt r4, r5, r7
-; CHECK-NEXT:    xor r4, r9, r4
-; CHECK-NEXT:    cntlzd r4, r4
-; CHECK-NEXT:    rldicl r4, r4, 58, 63
-; CHECK-NEXT:    xor r3, r8, r3
-; CHECK-NEXT:    addic r5, r3, -1
-; CHECK-NEXT:    subfe r3, r5, r3
-; CHECK-NEXT:    and r3, r4, r3
-; CHECK-NEXT:    add r3, r6, r3
+; CHECK-NEXT:    iselgt r6, r6, r7
+; CHECK-NEXT:    xor r6, r8, r6
+; CHECK-NEXT:    cntlzd r6, r6
+; CHECK-NEXT:    rldicl r6, r6, 58, 63
+; CHECK-NEXT:    mulld r4, r5, r4
+; CHECK-NEXT:    xor r3, r4, r3
+; CHECK-NEXT:    addic r4, r3, -1
+; CHECK-NEXT:    subfe r3, r4, r3
+; CHECK-NEXT:    and r3, r6, r3
+; CHECK-NEXT:    add r3, r5, r3
 ; CHECK-NEXT:    blr
 entry:
   %2 = sdiv i64 %0, %1

diff  --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll
index 2ab0c1ef96c6ccf..002dd8f0d167a90 100644
--- a/llvm/test/CodeGen/PowerPC/pr48519.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48519.ll
@@ -147,8 +147,8 @@ define void @func_48786() #0 {
 ; CHECK-LABEL: func_48786:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    mfocrf r12, 32
-; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stw r12, 8(r1)
+; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -48(r1)
 ; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    std r30, 32(r1) # 8-byte Folded Spill
@@ -183,8 +183,8 @@ define void @func_48786() #0 {
 ; CHECK-NEXT:    addi r1, r1, 48
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    lwz r12, 8(r1)
-; CHECK-NEXT:    mtocrf 32, r12
 ; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    mtocrf 32, r12
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB2_6: # %bb15
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/pr48527.ll b/llvm/test/CodeGen/PowerPC/pr48527.ll
index 3eb0f6885ed76e8..de855b2d1b70f68 100644
--- a/llvm/test/CodeGen/PowerPC/pr48527.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48527.ll
@@ -20,12 +20,12 @@ define void @_ZNK1q1rEv() local_unnamed_addr #0 align 2 {
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stdu 1, -64(1)
 ; CHECK-NEXT:    std 0, 80(1)
-; CHECK-NEXT:    addis 4, 2, .LC0 at toc@ha
 ; CHECK-NEXT:    lwz 3, 0(3)
-; CHECK-NEXT:    ld 29, .LC0 at toc@l(4)
 ; CHECK-NEXT:    addi 3, 3, -1
 ; CHECK-NEXT:    clrldi 3, 3, 32
 ; CHECK-NEXT:    addi 30, 3, 1
+; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
+; CHECK-NEXT:    ld 29, .LC0 at toc@l(3)
 ; CHECK-NEXT:    addis 3, 2, aj at got@tlsgd at ha
 ; CHECK-NEXT:    addi 3, 3, aj at got@tlsgd at l
 ; CHECK-NEXT:    bl __tls_get_addr(aj at tlsgd)

diff  --git a/llvm/test/CodeGen/PowerPC/pr52894-32bit.ll b/llvm/test/CodeGen/PowerPC/pr52894-32bit.ll
index 5ecb8c3ce663e80..ed6de3ea482f3b3 100644
--- a/llvm/test/CodeGen/PowerPC/pr52894-32bit.ll
+++ b/llvm/test/CodeGen/PowerPC/pr52894-32bit.ll
@@ -9,12 +9,12 @@
 ; Function Attrs: mustprogress uwtable
 define dso_local void @_Z1g1dILi17EE(ptr nocapture noundef readnone byval(%struct.d) align 4 %0) local_unnamed_addr #0 {
 ; CHECK-LABEL: _Z1g1dILi17EE:
-; CHECK:    mtfprwz f0, r4
-; CHECK:    stwx [[REG:r[0-9]+]], r1, r4
-; CHECK:    mffprwz r4, f0
-; CHECK:    mtfprwz f0, r4
-; CHECK:    lwzx [[REG]], r1, r4
-; CHECK:    mffprwz r4, f0
+; CHECK-NOT:    mtfprwz f0, r4
+; CHECK-NOT:    stwx r3, r1, r4
+; CHECK-NOT:    mffprwz r4, f0
+; CHECK-NOT:    mtfprwz f0, r4
+; CHECK-NOT:    lwzx r3, r1, r4
+; CHECK-NOT:    mffprwz r4, f0
 entry:
   %c = alloca %struct.d, align 4
   call void @llvm.lifetime.start.p0(i64 524288, ptr nonnull %c) #3

diff  --git a/llvm/test/CodeGen/PowerPC/pr52894.ll b/llvm/test/CodeGen/PowerPC/pr52894.ll
index 9fcb1b523a18815..65a9e2accb680d7 100644
--- a/llvm/test/CodeGen/PowerPC/pr52894.ll
+++ b/llvm/test/CodeGen/PowerPC/pr52894.ll
@@ -9,18 +9,18 @@
 ; Function Attrs: mustprogress uwtable
 define dso_local void @_Z1g1dILi17EE(ptr nocapture noundef readnone byval(%struct.d) align 8 %0) local_unnamed_addr #0 {
 ; CHECK-LABEL: _Z1g1dILi17EE:
-; CHECK:    mtfprd f0, r4
-; CHECK:    stdx [[REG:r[0-9]+]], r1, r4
-; CHECK:    mffprd r4, f0
-; CHECK:    mtfprd f0, r4
-; CHECK:    ldx [[REG]], r1, r4
-; CHECK:    mffprd r4, f0
-; CHECK:    mtfprd f0, r4
-; CHECK:    stdx [[REG2:r[0-9]+]], r1, r4
-; CHECK:    mffprd r4, f0
-; CHECK:    mtfprd f0, r4
-; CHECK:    ldx [[REG2]], r1, r4
-; CHECK:    mffprd r4, f0
+; CHECK-NOT:    mtfprd f0, r4
+; CHECK-NOT:    stdx r3, r1, r4
+; CHECK-NOT:    mffprd r4, f0
+; CHECK-NOT:    mtfprd f0, r4
+; CHECK-NOT:    ldx r3, r1, r4
+; CHECK-NOT:    mffprd r4, f0
+; CHECK-NOT:    mtfprd f0, r4
+; CHECK-NOT:    stdx r3, r1, r4
+; CHECK-NOT:    mffprd r4, f0
+; CHECK-NOT:    mtfprd f0, r4
+; CHECK-NOT:    ldx r3, r1, r4
+; CHECK-NOT:    mffprd r4, f0
 entry:
   %c = alloca %struct.d, align 8
   call void @llvm.lifetime.start.p0(i64 524288, ptr nonnull %c) #3

diff  --git a/llvm/test/CodeGen/PowerPC/pr61882.ll b/llvm/test/CodeGen/PowerPC/pr61882.ll
index bce1d911b849196..37dab4097b795f9 100644
--- a/llvm/test/CodeGen/PowerPC/pr61882.ll
+++ b/llvm/test/CodeGen/PowerPC/pr61882.ll
@@ -34,8 +34,8 @@ define void @foo(ptr %a, i32 %x) {
 ;
 ; PWR8-LABEL: foo:
 ; PWR8:       # %bb.0:
-; PWR8-NEXT:    extsb r4, r4
 ; PWR8-NEXT:    sync
+; PWR8-NEXT:    extsb r4, r4
 ; PWR8-NEXT:  .LBB0_1:
 ; PWR8-NEXT:    lbarx r5, 0, r3
 ; PWR8-NEXT:    extsb r5, r5

diff  --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index 63f9c73a9ff7735..328d38f93ac0756 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -35,17 +35,17 @@ define double @foo_fmf(double %a, double %b) nounwind {
 ; CHECK-P8-NEXT:    vspltisw 2, -3
 ; CHECK-P8-NEXT:    xsrsqrtedp 0, 2
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; CHECK-P8-NEXT:    lfs 5, .LCPI0_0 at toc@l(3)
-; CHECK-P8-NEXT:    xvcvsxwdp 3, 34
-; CHECK-P8-NEXT:    xsmuldp 4, 2, 0
-; CHECK-P8-NEXT:    fmr 6, 3
-; CHECK-P8-NEXT:    xsmaddadp 6, 4, 0
+; CHECK-P8-NEXT:    xvcvsxwdp 4, 34
+; CHECK-P8-NEXT:    xsmuldp 3, 2, 0
+; CHECK-P8-NEXT:    fmr 5, 4
+; CHECK-P8-NEXT:    xsmaddadp 5, 3, 0
+; CHECK-P8-NEXT:    lfs 3, .LCPI0_0 at toc@l(3)
+; CHECK-P8-NEXT:    xsmuldp 0, 0, 3
 ; CHECK-P8-NEXT:    xsmuldp 0, 0, 5
-; CHECK-P8-NEXT:    xsmuldp 0, 0, 6
 ; CHECK-P8-NEXT:    xsmuldp 2, 2, 0
-; CHECK-P8-NEXT:    xsmaddadp 3, 2, 0
-; CHECK-P8-NEXT:    xsmuldp 0, 0, 5
+; CHECK-P8-NEXT:    xsmaddadp 4, 2, 0
 ; CHECK-P8-NEXT:    xsmuldp 0, 0, 3
+; CHECK-P8-NEXT:    xsmuldp 0, 0, 4
 ; CHECK-P8-NEXT:    xsmuldp 1, 1, 0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -138,11 +138,11 @@ define double @foof_fmf(double %a, float %b) nounwind {
 ; CHECK-P8-NEXT:    xsrsqrtesp 0, 2
 ; CHECK-P8-NEXT:    vspltisw 2, -3
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
-; CHECK-P8-NEXT:    lfs 4, .LCPI3_0 at toc@l(3)
 ; CHECK-P8-NEXT:    xvcvsxwdp 3, 34
 ; CHECK-P8-NEXT:    xsmulsp 2, 2, 0
 ; CHECK-P8-NEXT:    xsmaddasp 3, 2, 0
-; CHECK-P8-NEXT:    xsmulsp 0, 0, 4
+; CHECK-P8-NEXT:    lfs 2, .LCPI3_0 at toc@l(3)
+; CHECK-P8-NEXT:    xsmulsp 0, 0, 2
 ; CHECK-P8-NEXT:    xsmulsp 0, 0, 3
 ; CHECK-P8-NEXT:    xsmuldp 1, 1, 0
 ; CHECK-P8-NEXT:    blr
@@ -215,17 +215,17 @@ define float @food_fmf(float %a, double %b) nounwind {
 ; CHECK-P8-NEXT:    vspltisw 2, -3
 ; CHECK-P8-NEXT:    xsrsqrtedp 0, 2
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
-; CHECK-P8-NEXT:    lfs 5, .LCPI5_0 at toc@l(3)
-; CHECK-P8-NEXT:    xvcvsxwdp 3, 34
-; CHECK-P8-NEXT:    xsmuldp 4, 2, 0
-; CHECK-P8-NEXT:    fmr 6, 3
-; CHECK-P8-NEXT:    xsmaddadp 6, 4, 0
+; CHECK-P8-NEXT:    xvcvsxwdp 4, 34
+; CHECK-P8-NEXT:    xsmuldp 3, 2, 0
+; CHECK-P8-NEXT:    fmr 5, 4
+; CHECK-P8-NEXT:    xsmaddadp 5, 3, 0
+; CHECK-P8-NEXT:    lfs 3, .LCPI5_0 at toc@l(3)
+; CHECK-P8-NEXT:    xsmuldp 0, 0, 3
 ; CHECK-P8-NEXT:    xsmuldp 0, 0, 5
-; CHECK-P8-NEXT:    xsmuldp 0, 0, 6
 ; CHECK-P8-NEXT:    xsmuldp 2, 2, 0
-; CHECK-P8-NEXT:    xsmaddadp 3, 2, 0
-; CHECK-P8-NEXT:    xsmuldp 0, 0, 5
+; CHECK-P8-NEXT:    xsmaddadp 4, 2, 0
 ; CHECK-P8-NEXT:    xsmuldp 0, 0, 3
+; CHECK-P8-NEXT:    xsmuldp 0, 0, 4
 ; CHECK-P8-NEXT:    xsrsp 0, 0
 ; CHECK-P8-NEXT:    xsmulsp 1, 1, 0
 ; CHECK-P8-NEXT:    blr
@@ -302,11 +302,11 @@ define float @goo_fmf(float %a, float %b) nounwind {
 ; CHECK-P8-NEXT:    xsrsqrtesp 0, 2
 ; CHECK-P8-NEXT:    vspltisw 2, -3
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
-; CHECK-P8-NEXT:    lfs 4, .LCPI7_0 at toc@l(3)
 ; CHECK-P8-NEXT:    xvcvsxwdp 3, 34
 ; CHECK-P8-NEXT:    xsmulsp 2, 2, 0
 ; CHECK-P8-NEXT:    xsmaddasp 3, 2, 0
-; CHECK-P8-NEXT:    xsmulsp 0, 0, 4
+; CHECK-P8-NEXT:    lfs 2, .LCPI7_0 at toc@l(3)
+; CHECK-P8-NEXT:    xsmulsp 0, 0, 2
 ; CHECK-P8-NEXT:    xsmulsp 0, 0, 3
 ; CHECK-P8-NEXT:    xsmulsp 1, 1, 0
 ; CHECK-P8-NEXT:    blr
@@ -399,11 +399,11 @@ define float @rsqrt_fmul_fmf(float %a, float %b, float %c) {
 ; CHECK-P8-NEXT:    xsrsqrtesp 0, 1
 ; CHECK-P8-NEXT:    vspltisw 2, -3
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
-; CHECK-P8-NEXT:    lfs 5, .LCPI10_0 at toc@l(3)
 ; CHECK-P8-NEXT:    xvcvsxwdp 4, 34
 ; CHECK-P8-NEXT:    xsmulsp 1, 1, 0
 ; CHECK-P8-NEXT:    xsmaddasp 4, 1, 0
-; CHECK-P8-NEXT:    xsmulsp 0, 0, 5
+; CHECK-P8-NEXT:    lfs 1, .LCPI10_0 at toc@l(3)
+; CHECK-P8-NEXT:    xsmulsp 0, 0, 1
 ; CHECK-P8-NEXT:    xsresp 1, 2
 ; CHECK-P8-NEXT:    xsmulsp 0, 0, 4
 ; CHECK-P8-NEXT:    xsmulsp 4, 0, 1
@@ -485,14 +485,14 @@ define <4 x float> @hoo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    xvrsqrtesp 0, 35
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI12_0 at toc@ha
-; CHECK-P8-NEXT:    addis 4, 2, .LCPI12_1 at toc@ha
 ; CHECK-P8-NEXT:    addi 3, 3, .LCPI12_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x 2, 0, 3
-; CHECK-P8-NEXT:    addi 3, 4, .LCPI12_1 at toc@l
-; CHECK-P8-NEXT:    lxvd2x 3, 0, 3
+; CHECK-P8-NEXT:    addis 3, 2, .LCPI12_1 at toc@ha
 ; CHECK-P8-NEXT:    xvmulsp 1, 35, 0
+; CHECK-P8-NEXT:    addi 3, 3, .LCPI12_1 at toc@l
 ; CHECK-P8-NEXT:    xvmaddasp 2, 1, 0
-; CHECK-P8-NEXT:    xvmulsp 0, 0, 3
+; CHECK-P8-NEXT:    lxvd2x 1, 0, 3
+; CHECK-P8-NEXT:    xvmulsp 0, 0, 1
 ; CHECK-P8-NEXT:    xvmulsp 0, 0, 2
 ; CHECK-P8-NEXT:    xvmulsp 34, 34, 0
 ; CHECK-P8-NEXT:    blr
@@ -778,17 +778,17 @@ define double @foo3_fmf(double %a) nounwind {
 ; CHECK-P8-NEXT:    vspltisw 2, -3
 ; CHECK-P8-NEXT:    xsrsqrtedp 0, 1
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI20_0 at toc@ha
-; CHECK-P8-NEXT:    lfs 4, .LCPI20_0 at toc@l(3)
-; CHECK-P8-NEXT:    xvcvsxwdp 2, 34
-; CHECK-P8-NEXT:    xsmuldp 3, 1, 0
-; CHECK-P8-NEXT:    fmr 5, 2
-; CHECK-P8-NEXT:    xsmaddadp 5, 3, 0
+; CHECK-P8-NEXT:    xvcvsxwdp 3, 34
+; CHECK-P8-NEXT:    xsmuldp 2, 1, 0
+; CHECK-P8-NEXT:    fmr 4, 3
+; CHECK-P8-NEXT:    xsmaddadp 4, 2, 0
+; CHECK-P8-NEXT:    lfs 2, .LCPI20_0 at toc@l(3)
+; CHECK-P8-NEXT:    xsmuldp 0, 0, 2
 ; CHECK-P8-NEXT:    xsmuldp 0, 0, 4
-; CHECK-P8-NEXT:    xsmuldp 0, 0, 5
 ; CHECK-P8-NEXT:    xsmuldp 1, 1, 0
-; CHECK-P8-NEXT:    xsmaddadp 2, 1, 0
-; CHECK-P8-NEXT:    xsmuldp 0, 1, 4
-; CHECK-P8-NEXT:    xsmuldp 1, 0, 2
+; CHECK-P8-NEXT:    xsmaddadp 3, 1, 0
+; CHECK-P8-NEXT:    xsmuldp 0, 1, 2
+; CHECK-P8-NEXT:    xsmuldp 1, 0, 3
 ; CHECK-P8-NEXT:    blr
 ; CHECK-P8-NEXT:  .LBB20_2:
 ; CHECK-P8-NEXT:    xssqrtdp 1, 1
@@ -850,8 +850,8 @@ define double @foo3_fmf_crbits_off(double %a) #2 {
 ;
 ; CHECK-P8-LABEL: foo3_fmf_crbits_off:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    xsabsdp 0, 1
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI21_1 at toc@ha
+; CHECK-P8-NEXT:    xsabsdp 0, 1
 ; CHECK-P8-NEXT:    lfd 2, .LCPI21_1 at toc@l(3)
 ; CHECK-P8-NEXT:    xscmpudp 0, 0, 2
 ; CHECK-P8-NEXT:    blt 0, .LBB21_2
@@ -859,17 +859,17 @@ define double @foo3_fmf_crbits_off(double %a) #2 {
 ; CHECK-P8-NEXT:    vspltisw 2, -3
 ; CHECK-P8-NEXT:    xsrsqrtedp 0, 1
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI21_0 at toc@ha
-; CHECK-P8-NEXT:    lfs 4, .LCPI21_0 at toc@l(3)
-; CHECK-P8-NEXT:    xvcvsxwdp 2, 34
-; CHECK-P8-NEXT:    xsmuldp 3, 1, 0
-; CHECK-P8-NEXT:    fmr 5, 2
-; CHECK-P8-NEXT:    xsmaddadp 5, 3, 0
+; CHECK-P8-NEXT:    xvcvsxwdp 3, 34
+; CHECK-P8-NEXT:    xsmuldp 2, 1, 0
+; CHECK-P8-NEXT:    fmr 4, 3
+; CHECK-P8-NEXT:    xsmaddadp 4, 2, 0
+; CHECK-P8-NEXT:    lfs 2, .LCPI21_0 at toc@l(3)
+; CHECK-P8-NEXT:    xsmuldp 0, 0, 2
 ; CHECK-P8-NEXT:    xsmuldp 0, 0, 4
-; CHECK-P8-NEXT:    xsmuldp 0, 0, 5
 ; CHECK-P8-NEXT:    xsmuldp 1, 1, 0
-; CHECK-P8-NEXT:    xsmaddadp 2, 1, 0
-; CHECK-P8-NEXT:    xsmuldp 0, 1, 4
-; CHECK-P8-NEXT:    xsmuldp 1, 0, 2
+; CHECK-P8-NEXT:    xsmaddadp 3, 1, 0
+; CHECK-P8-NEXT:    xsmuldp 0, 1, 2
+; CHECK-P8-NEXT:    xsmuldp 1, 0, 3
 ; CHECK-P8-NEXT:    blr
 ; CHECK-P8-NEXT:  .LBB21_2:
 ; CHECK-P8-NEXT:    xssqrtdp 1, 1
@@ -950,8 +950,8 @@ define float @goo3_fmf(float %a) nounwind {
 ;
 ; CHECK-P8-LABEL: goo3_fmf:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    xsabsdp 0, 1
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI23_1 at toc@ha
+; CHECK-P8-NEXT:    xsabsdp 0, 1
 ; CHECK-P8-NEXT:    lfs 2, .LCPI23_1 at toc@l(3)
 ; CHECK-P8-NEXT:    fcmpu 0, 0, 2
 ; CHECK-P8-NEXT:    xxlxor 0, 0, 0
@@ -960,11 +960,11 @@ define float @goo3_fmf(float %a) nounwind {
 ; CHECK-P8-NEXT:    xsrsqrtesp 0, 1
 ; CHECK-P8-NEXT:    vspltisw 2, -3
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI23_0 at toc@ha
-; CHECK-P8-NEXT:    lfs 3, .LCPI23_0 at toc@l(3)
 ; CHECK-P8-NEXT:    xvcvsxwdp 2, 34
 ; CHECK-P8-NEXT:    xsmulsp 1, 1, 0
 ; CHECK-P8-NEXT:    xsmaddasp 2, 1, 0
-; CHECK-P8-NEXT:    xsmulsp 0, 1, 3
+; CHECK-P8-NEXT:    lfs 0, .LCPI23_0 at toc@l(3)
+; CHECK-P8-NEXT:    xsmulsp 0, 1, 0
 ; CHECK-P8-NEXT:    xsmulsp 0, 0, 2
 ; CHECK-P8-NEXT:  .LBB23_2:
 ; CHECK-P8-NEXT:    fmr 1, 0
@@ -1042,14 +1042,14 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 {
 ; CHECK-P8-NEXT:  # %bb.1:
 ; CHECK-P8-NEXT:    xvrsqrtesp 0, 34
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI25_0 at toc@ha
-; CHECK-P8-NEXT:    addis 4, 2, .LCPI25_1 at toc@ha
 ; CHECK-P8-NEXT:    addi 3, 3, .LCPI25_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x 2, 0, 3
-; CHECK-P8-NEXT:    addi 3, 4, .LCPI25_1 at toc@l
-; CHECK-P8-NEXT:    lxvd2x 3, 0, 3
+; CHECK-P8-NEXT:    addis 3, 2, .LCPI25_1 at toc@ha
 ; CHECK-P8-NEXT:    xvmulsp 1, 34, 0
+; CHECK-P8-NEXT:    addi 3, 3, .LCPI25_1 at toc@l
 ; CHECK-P8-NEXT:    xvmaddasp 2, 1, 0
-; CHECK-P8-NEXT:    xvmulsp 0, 1, 3
+; CHECK-P8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-P8-NEXT:    xvmulsp 0, 1, 0
 ; CHECK-P8-NEXT:    xvmulsp 34, 0, 2
 ; CHECK-P8-NEXT:    blr
 ; CHECK-P8-NEXT:  .LBB25_2:
@@ -1164,16 +1164,16 @@ define <2 x double> @hoo4_fmf(<2 x double> %a) #1 {
 ; CHECK-P8-NEXT:    addi 3, 3, .LCPI27_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x 2, 0, 3
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI27_1 at toc@ha
-; CHECK-P8-NEXT:    addi 3, 3, .LCPI27_1 at toc@l
-; CHECK-P8-NEXT:    lxvd2x 3, 0, 3
-; CHECK-P8-NEXT:    xxlor 4, 2, 2
 ; CHECK-P8-NEXT:    xvmuldp 1, 34, 0
-; CHECK-P8-NEXT:    xvmaddadp 4, 1, 0
+; CHECK-P8-NEXT:    addi 3, 3, .LCPI27_1 at toc@l
+; CHECK-P8-NEXT:    xxlor 3, 2, 2
+; CHECK-P8-NEXT:    xvmaddadp 3, 1, 0
+; CHECK-P8-NEXT:    lxvd2x 1, 0, 3
+; CHECK-P8-NEXT:    xvmuldp 0, 0, 1
 ; CHECK-P8-NEXT:    xvmuldp 0, 0, 3
-; CHECK-P8-NEXT:    xvmuldp 0, 0, 4
-; CHECK-P8-NEXT:    xvmuldp 1, 34, 0
-; CHECK-P8-NEXT:    xvmaddadp 2, 1, 0
-; CHECK-P8-NEXT:    xvmuldp 0, 1, 3
+; CHECK-P8-NEXT:    xvmuldp 3, 34, 0
+; CHECK-P8-NEXT:    xvmaddadp 2, 3, 0
+; CHECK-P8-NEXT:    xvmuldp 0, 3, 1
 ; CHECK-P8-NEXT:    xvmuldp 34, 0, 2
 ; CHECK-P8-NEXT:    blr
 ; CHECK-P8-NEXT:  .LBB27_2:

diff  --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
index b7c7fe2f93f57c7..6d188ce3b4a5ee6 100644
--- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
+++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
@@ -70,11 +70,11 @@ define dso_local <2 x double> @test2(ptr nocapture readonly %a, ptr nocapture re
 ; AIX-32:       # %bb.0: # %entry
 ; AIX-32-NEXT:    li r5, 4
 ; AIX-32-NEXT:    lxsiwzx v3, 0, r3
-; AIX-32-NEXT:    lxsiwzx v5, 0, r4
+; AIX-32-NEXT:    lxsiwzx v4, 0, r4
 ; AIX-32-NEXT:    lxsiwzx v2, r3, r5
-; AIX-32-NEXT:    lxsiwzx v4, r4, r5
 ; AIX-32-NEXT:    vmrgow v2, v3, v2
-; AIX-32-NEXT:    vmrgow v3, v5, v4
+; AIX-32-NEXT:    lxsiwzx v3, r4, r5
+; AIX-32-NEXT:    vmrgow v3, v4, v3
 ; AIX-32-NEXT:    xvsubsp vs0, v2, v3
 ; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-32-NEXT:    xscvspdpn f0, vs0
@@ -114,11 +114,11 @@ define dso_local <2 x double> @test3(ptr nocapture readonly %a, ptr nocapture re
 ; AIX-32:       # %bb.0: # %entry
 ; AIX-32-NEXT:    li r5, 4
 ; AIX-32-NEXT:    lxsiwzx v3, 0, r3
-; AIX-32-NEXT:    lxsiwzx v5, 0, r4
+; AIX-32-NEXT:    lxsiwzx v4, 0, r4
 ; AIX-32-NEXT:    lxsiwzx v2, r3, r5
-; AIX-32-NEXT:    lxsiwzx v4, r4, r5
 ; AIX-32-NEXT:    vmrgow v2, v3, v2
-; AIX-32-NEXT:    vmrgow v3, v5, v4
+; AIX-32-NEXT:    lxsiwzx v3, r4, r5
+; AIX-32-NEXT:    vmrgow v3, v4, v3
 ; AIX-32-NEXT:    xvaddsp vs0, v2, v3
 ; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-32-NEXT:    xscvspdpn f0, vs0
@@ -158,11 +158,11 @@ define dso_local <2 x double> @test4(ptr nocapture readonly %a, ptr nocapture re
 ; AIX-32:       # %bb.0: # %entry
 ; AIX-32-NEXT:    li r5, 4
 ; AIX-32-NEXT:    lxsiwzx v3, 0, r3
-; AIX-32-NEXT:    lxsiwzx v5, 0, r4
+; AIX-32-NEXT:    lxsiwzx v4, 0, r4
 ; AIX-32-NEXT:    lxsiwzx v2, r3, r5
-; AIX-32-NEXT:    lxsiwzx v4, r4, r5
 ; AIX-32-NEXT:    vmrgow v2, v3, v2
-; AIX-32-NEXT:    vmrgow v3, v5, v4
+; AIX-32-NEXT:    lxsiwzx v3, r4, r5
+; AIX-32-NEXT:    vmrgow v3, v4, v3
 ; AIX-32-NEXT:    xvmulsp vs0, v2, v3
 ; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-32-NEXT:    xscvspdpn f0, vs0
@@ -260,8 +260,8 @@ define dso_local i32 @test6() #0 {
 ;
 ; AIX-64-LABEL: test6:
 ; AIX-64:       # %bb.0: # %bb
-; AIX-64-NEXT:    ld r3, L..C1(r2) # @Glob1
 ; AIX-64-NEXT:    lis r4, 8
+; AIX-64-NEXT:    ld r3, L..C1(r2) # @Glob1
 ; AIX-64-NEXT:    xxlxor vs1, vs1, vs1
 ; AIX-64-NEXT:    ori r4, r4, 38248
 ; AIX-64-NEXT:    lfdx f0, r3, r4
@@ -278,8 +278,8 @@ define dso_local i32 @test6() #0 {
 ;
 ; AIX-32-LABEL: test6:
 ; AIX-32:       # %bb.0: # %bb
-; AIX-32-NEXT:    lwz r3, L..C1(r2) # @Glob1
 ; AIX-32-NEXT:    lis r4, 8
+; AIX-32-NEXT:    lwz r3, L..C1(r2) # @Glob1
 ; AIX-32-NEXT:    ori r4, r4, 38248
 ; AIX-32-NEXT:    lfsux f0, r3, r4
 ; AIX-32-NEXT:    lfs f1, 4(r3)

diff  --git a/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll b/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll
index 87f6ffaa2481d53..114d0f5ceaf063f 100644
--- a/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll
+++ b/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll
@@ -20,8 +20,8 @@ define float @foo_float(float %0, float %1, float %2, float %3) {
 ;
 ; CHECK-P8-LABEL: foo_float:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    xsmulsp f1, f2, f1
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-P8-NEXT:    xsmulsp f1, f2, f1
 ; CHECK-P8-NEXT:    xssubsp f0, f3, f4
 ; CHECK-P8-NEXT:    lfs f2, .LCPI0_0 at toc@l(r3)
 ; CHECK-P8-NEXT:    xsmaddasp f1, f0, f2
@@ -56,8 +56,8 @@ define double @foo_double(double %0, double %1, double %2, double %3) {
 ;
 ; CHECK-P8-LABEL: foo_double:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    xsmuldp f1, f2, f1
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-P8-NEXT:    xsmuldp f1, f2, f1
 ; CHECK-P8-NEXT:    xssubdp f0, f3, f4
 ; CHECK-P8-NEXT:    lfd f2, .LCPI1_0 at toc@l(r3)
 ; CHECK-P8-NEXT:    xsmaddadp f1, f0, f2
@@ -98,16 +98,16 @@ define float @foo_float_reuse_const(float %0, float %1, float %2, float %3) {
 ;
 ; CHECK-P8-LABEL: foo_float_reuse_const:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    xsmulsp f1, f2, f1
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-P8-NEXT:    xsmulsp f1, f2, f1
 ; CHECK-P8-NEXT:    xssubsp f0, f3, f4
 ; CHECK-P8-NEXT:    lfs f3, .LCPI2_0 at toc@l(r3)
 ; CHECK-P8-NEXT:    addis r3, r2, .LCPI2_1 at toc@ha
-; CHECK-P8-NEXT:    lfs f4, .LCPI2_1 at toc@l(r3)
+; CHECK-P8-NEXT:    xsmaddasp f1, f0, f3
+; CHECK-P8-NEXT:    lfs f0, .LCPI2_1 at toc@l(r3)
 ; CHECK-P8-NEXT:    addis r3, r2, .LC0 at toc@ha
 ; CHECK-P8-NEXT:    ld r3, .LC0 at toc@l(r3)
-; CHECK-P8-NEXT:    xsmaddasp f1, f0, f3
-; CHECK-P8-NEXT:    xsmulsp f0, f2, f4
+; CHECK-P8-NEXT:    xsmulsp f0, f2, f0
 ; CHECK-P8-NEXT:    stfs f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
index 68db90ad2e19873..228c1308b49c3f0 100644
--- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
+++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
@@ -5,15 +5,15 @@
 define <4 x float> @repeated_fp_divisor_noest(float %a, <4 x float> %b) {
 ; CHECK-LABEL: repeated_fp_divisor_noest:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    xscvdpspn 0, 1
+; CHECK-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; CHECK-NEXT:    lxvd2x 1, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; CHECK-NEXT:    lxvd2x 1, 0, 3
-; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; CHECK-NEXT:    lxvd2x 2, 0, 3
 ; CHECK-NEXT:    xxspltw 0, 0, 0
-; CHECK-NEXT:    xvdivsp 0, 1, 0
-; CHECK-NEXT:    lxvd2x 1, 0, 3
+; CHECK-NEXT:    xvdivsp 0, 2, 0
 ; CHECK-NEXT:    xxswapd 35, 1
 ; CHECK-NEXT:    xvmulsp 1, 34, 35
 ; CHECK-NEXT:    xvmulsp 34, 1, 0
@@ -29,19 +29,19 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
 ; CHECK-LABEL: repeated_fp_divisor:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xscvdpspn 0, 1
-; CHECK-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; CHECK-NEXT:    lxvd2x 2, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI1_1 at toc@l
+; CHECK-NEXT:    lxvd2x 1, 0, 3
+; CHECK-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
 ; CHECK-NEXT:    xxspltw 0, 0, 0
-; CHECK-NEXT:    xvresp 1, 0
-; CHECK-NEXT:    xvmaddasp 2, 0, 1
-; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxswapd 35, 0
-; CHECK-NEXT:    xvnmsubasp 1, 1, 2
+; CHECK-NEXT:    xvresp 2, 0
+; CHECK-NEXT:    xxswapd 35, 1
+; CHECK-NEXT:    lxvd2x 1, 0, 3
+; CHECK-NEXT:    xvmaddasp 1, 0, 2
 ; CHECK-NEXT:    xvmulsp 0, 34, 35
-; CHECK-NEXT:    xvmulsp 34, 0, 1
+; CHECK-NEXT:    xvnmsubasp 2, 2, 1
+; CHECK-NEXT:    xvmulsp 34, 0, 2
 ; CHECK-NEXT:    blr
   %ins = insertelement <4 x float> undef, float %a, i32 0
   %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
index 1e94332bec42363..fd5f26ba35742f3 100644
--- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
+++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
@@ -17,11 +17,11 @@ define i1 @test_saddo_i8(i8 %a, i8 %b) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    extsb 4, 4
 ; CHECK-NEXT:    extsb 3, 3
-; CHECK-NEXT:    li 5, 1
 ; CHECK-NEXT:    add 3, 3, 4
 ; CHECK-NEXT:    extsb 4, 3
 ; CHECK-NEXT:    cmpw 4, 3
-; CHECK-NEXT:    iseleq 3, 0, 5
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    iseleq 3, 0, 3
 ; CHECK-NEXT:    blr
 entry:
   %res = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 %b) nounwind
@@ -34,11 +34,11 @@ define i1 @test_saddo_i16(i16 %a, i16 %b) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    extsh 4, 4
 ; CHECK-NEXT:    extsh 3, 3
-; CHECK-NEXT:    li 5, 1
 ; CHECK-NEXT:    add 3, 3, 4
 ; CHECK-NEXT:    extsh 4, 3
 ; CHECK-NEXT:    cmpw 4, 3
-; CHECK-NEXT:    iseleq 3, 0, 5
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    iseleq 3, 0, 3
 ; CHECK-NEXT:    blr
 entry:
   %res = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %a, i16 %b) nounwind
@@ -97,11 +97,11 @@ define i1 @test_ssubo_i8(i8 %a, i8 %b) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    extsb 3, 3
 ; CHECK-NEXT:    extsb 4, 4
-; CHECK-NEXT:    li 5, 1
 ; CHECK-NEXT:    sub 3, 3, 4
 ; CHECK-NEXT:    extsb 4, 3
 ; CHECK-NEXT:    cmpw 4, 3
-; CHECK-NEXT:    iseleq 3, 0, 5
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    iseleq 3, 0, 3
 ; CHECK-NEXT:    blr
 entry:
   %res = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 %b) nounwind
@@ -114,11 +114,11 @@ define i1 @test_ssubo_i16(i16 %a, i16 %b) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    extsh 3, 3
 ; CHECK-NEXT:    extsh 4, 4
-; CHECK-NEXT:    li 5, 1
 ; CHECK-NEXT:    sub 3, 3, 4
 ; CHECK-NEXT:    extsh 4, 3
 ; CHECK-NEXT:    cmpw 4, 3
-; CHECK-NEXT:    iseleq 3, 0, 5
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    iseleq 3, 0, 3
 ; CHECK-NEXT:    blr
 entry:
   %res = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 %a, i16 %b) nounwind

diff  --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index 766f48809304c6e..9fea8e5f8ab47ed 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -9,9 +9,9 @@
 define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i8_using_min:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    clrlwi 5, 3, 24
+; CHECK-NEXT:    clrlwi 4, 3, 24
+; CHECK-NEXT:    cmplwi 4, 213
 ; CHECK-NEXT:    li 4, -43
-; CHECK-NEXT:    cmplwi 5, 213
 ; CHECK-NEXT:    isellt 3, 3, 4
 ; CHECK-NEXT:    addi 3, 3, 42
 ; CHECK-NEXT:    blr
@@ -39,10 +39,10 @@ define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
 define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    clrlwi 5, 3, 24
-; CHECK-NEXT:    li 4, -1
+; CHECK-NEXT:    clrlwi 4, 3, 24
 ; CHECK-NEXT:    addi 3, 3, 42
-; CHECK-NEXT:    cmplwi 5, 213
+; CHECK-NEXT:    cmplwi 4, 213
+; CHECK-NEXT:    li 4, -1
 ; CHECK-NEXT:    iselgt 3, 4, 3
 ; CHECK-NEXT:    blr
   %a = add i8 %x, 42
@@ -54,9 +54,9 @@ define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
 define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i16_using_min:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    clrlwi 5, 3, 16
+; CHECK-NEXT:    clrlwi 4, 3, 16
+; CHECK-NEXT:    cmplwi 4, 65493
 ; CHECK-NEXT:    li 4, -43
-; CHECK-NEXT:    cmplwi 5, 65493
 ; CHECK-NEXT:    isellt 3, 3, 4
 ; CHECK-NEXT:    addi 3, 3, 42
 ; CHECK-NEXT:    blr
@@ -84,10 +84,10 @@ define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
 define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    clrlwi 5, 3, 16
-; CHECK-NEXT:    li 4, -1
+; CHECK-NEXT:    clrlwi 4, 3, 16
 ; CHECK-NEXT:    addi 3, 3, 42
-; CHECK-NEXT:    cmplwi 5, 65493
+; CHECK-NEXT:    cmplwi 4, 65493
+; CHECK-NEXT:    li 4, -1
 ; CHECK-NEXT:    iselgt 3, 4, 3
 ; CHECK-NEXT:    blr
   %a = add i16 %x, 42
@@ -113,10 +113,10 @@ define i32 @unsigned_sat_constant_i32_using_min(i32 %x) {
 define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi 5, 3, 42
-; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    cmplw 5, 3
-; CHECK-NEXT:    isellt 3, 4, 5
+; CHECK-NEXT:    addi 4, 3, 42
+; CHECK-NEXT:    cmplw 4, 3
+; CHECK-NEXT:    li 3, -1
+; CHECK-NEXT:    isellt 3, 3, 4
 ; CHECK-NEXT:    blr
   %a = add i32 %x, 42
   %c = icmp ugt i32 %x, %a
@@ -127,11 +127,11 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
 define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, -43
-; CHECK-NEXT:    addi 5, 3, 42
-; CHECK-NEXT:    cmplw 3, 4
+; CHECK-NEXT:    li 5, -43
+; CHECK-NEXT:    addi 4, 3, 42
+; CHECK-NEXT:    cmplw 3, 5
 ; CHECK-NEXT:    li 3, -1
-; CHECK-NEXT:    iselgt 3, 3, 5
+; CHECK-NEXT:    iselgt 3, 3, 4
 ; CHECK-NEXT:    blr
   %a = add i32 %x, 42
   %c = icmp ugt i32 %x, -43
@@ -156,10 +156,10 @@ define i64 @unsigned_sat_constant_i64_using_min(i64 %x) {
 define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi 5, 3, 42
-; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    cmpld 5, 3
-; CHECK-NEXT:    isellt 3, 4, 5
+; CHECK-NEXT:    addi 4, 3, 42
+; CHECK-NEXT:    cmpld 4, 3
+; CHECK-NEXT:    li 3, -1
+; CHECK-NEXT:    isellt 3, 3, 4
 ; CHECK-NEXT:    blr
   %a = add i64 %x, 42
   %c = icmp ugt i64 %x, %a
@@ -170,11 +170,11 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
 define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, -43
-; CHECK-NEXT:    addi 5, 3, 42
-; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    li 5, -43
+; CHECK-NEXT:    addi 4, 3, 42
+; CHECK-NEXT:    cmpld 3, 5
 ; CHECK-NEXT:    li 3, -1
-; CHECK-NEXT:    iselgt 3, 3, 5
+; CHECK-NEXT:    iselgt 3, 3, 4
 ; CHECK-NEXT:    blr
   %a = add i64 %x, 42
   %c = icmp ugt i64 %x, -43
@@ -185,11 +185,11 @@ define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
 define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i8_using_min:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    not 5, 4
-; CHECK-NEXT:    clrlwi 6, 3, 24
-; CHECK-NEXT:    clrlwi 7, 5, 24
-; CHECK-NEXT:    cmplw 6, 7
-; CHECK-NEXT:    isellt 3, 3, 5
+; CHECK-NEXT:    not 6, 4
+; CHECK-NEXT:    clrlwi 5, 3, 24
+; CHECK-NEXT:    clrlwi 7, 6, 24
+; CHECK-NEXT:    cmplw 5, 7
+; CHECK-NEXT:    isellt 3, 3, 6
 ; CHECK-NEXT:    add 3, 3, 4
 ; CHECK-NEXT:    blr
   %noty = xor i8 %y, -1
@@ -219,12 +219,12 @@ define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    not 6, 4
-; CHECK-NEXT:    clrlwi 7, 3, 24
-; CHECK-NEXT:    li 5, -1
+; CHECK-NEXT:    clrlwi 5, 3, 24
 ; CHECK-NEXT:    add 3, 3, 4
+; CHECK-NEXT:    li 4, -1
 ; CHECK-NEXT:    clrlwi 6, 6, 24
-; CHECK-NEXT:    cmplw 7, 6
-; CHECK-NEXT:    iselgt 3, 5, 3
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    iselgt 3, 4, 3
 ; CHECK-NEXT:    blr
   %noty = xor i8 %y, -1
   %a = add i8 %x, %y
@@ -236,11 +236,11 @@ define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
 define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i16_using_min:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    not 5, 4
-; CHECK-NEXT:    clrlwi 6, 3, 16
-; CHECK-NEXT:    clrlwi 7, 5, 16
-; CHECK-NEXT:    cmplw 6, 7
-; CHECK-NEXT:    isellt 3, 3, 5
+; CHECK-NEXT:    not 6, 4
+; CHECK-NEXT:    clrlwi 5, 3, 16
+; CHECK-NEXT:    clrlwi 7, 6, 16
+; CHECK-NEXT:    cmplw 5, 7
+; CHECK-NEXT:    isellt 3, 3, 6
 ; CHECK-NEXT:    add 3, 3, 4
 ; CHECK-NEXT:    blr
   %noty = xor i16 %y, -1
@@ -270,12 +270,12 @@ define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    not 6, 4
-; CHECK-NEXT:    clrlwi 7, 3, 16
-; CHECK-NEXT:    li 5, -1
+; CHECK-NEXT:    clrlwi 5, 3, 16
 ; CHECK-NEXT:    add 3, 3, 4
+; CHECK-NEXT:    li 4, -1
 ; CHECK-NEXT:    clrlwi 6, 6, 16
-; CHECK-NEXT:    cmplw 7, 6
-; CHECK-NEXT:    iselgt 3, 5, 3
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    iselgt 3, 4, 3
 ; CHECK-NEXT:    blr
   %noty = xor i16 %y, -1
   %a = add i16 %x, %y
@@ -303,9 +303,9 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    add 4, 3, 4
-; CHECK-NEXT:    li 5, -1
 ; CHECK-NEXT:    cmplw 4, 3
-; CHECK-NEXT:    isellt 3, 5, 4
+; CHECK-NEXT:    li 3, -1
+; CHECK-NEXT:    isellt 3, 3, 4
 ; CHECK-NEXT:    blr
   %a = add i32 %x, %y
   %c = icmp ugt i32 %x, %a
@@ -316,11 +316,11 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
 define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    not 6, 4
-; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    add 3, 3, 4
-; CHECK-NEXT:    iselgt 3, 5, 3
+; CHECK-NEXT:    not 5, 4
+; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    cmplw 3, 5
+; CHECK-NEXT:    li 3, -1
+; CHECK-NEXT:    iselgt 3, 3, 4
 ; CHECK-NEXT:    blr
   %noty = xor i32 %y, -1
   %a = add i32 %x, %y
@@ -348,9 +348,9 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i64_using_cmp_sum:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    add 4, 3, 4
-; CHECK-NEXT:    li 5, -1
 ; CHECK-NEXT:    cmpld 4, 3
-; CHECK-NEXT:    isellt 3, 5, 4
+; CHECK-NEXT:    li 3, -1
+; CHECK-NEXT:    isellt 3, 3, 4
 ; CHECK-NEXT:    blr
   %a = add i64 %x, %y
   %c = icmp ugt i64 %x, %a
@@ -361,11 +361,11 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
 define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    not 6, 4
-; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    cmpld 3, 6
-; CHECK-NEXT:    add 3, 3, 4
-; CHECK-NEXT:    iselgt 3, 5, 3
+; CHECK-NEXT:    not 5, 4
+; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    cmpld 3, 5
+; CHECK-NEXT:    li 3, -1
+; CHECK-NEXT:    iselgt 3, 3, 4
 ; CHECK-NEXT:    blr
   %noty = xor i64 %y, -1
   %a = add i64 %x, %y
@@ -550,16 +550,16 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
 define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
 ; CHECK-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addis 3, 2, .LCPI35_1 at toc@ha
-; CHECK-NEXT:    xxleqv 0, 0, 0
-; CHECK-NEXT:    addi 3, 3, .LCPI35_1 at toc@l
-; CHECK-NEXT:    lxvd2x 35, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI35_0 at toc@ha
+; CHECK-NEXT:    xxleqv 0, 0, 0
 ; CHECK-NEXT:    addi 3, 3, .LCPI35_0 at toc@l
+; CHECK-NEXT:    lxvd2x 35, 0, 3
+; CHECK-NEXT:    addis 3, 2, .LCPI35_1 at toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI35_1 at toc@l
 ; CHECK-NEXT:    lxvd2x 36, 0, 3
-; CHECK-NEXT:    vcmpgtud 3, 2, 3
-; CHECK-NEXT:    vaddudm 2, 2, 4
-; CHECK-NEXT:    xxsel 34, 34, 0, 35
+; CHECK-NEXT:    vaddudm 3, 2, 3
+; CHECK-NEXT:    vcmpgtud 2, 2, 4
+; CHECK-NEXT:    xxsel 34, 35, 0, 34
 ; CHECK-NEXT:    blr
   %a = add <2 x i64> %x, <i64 42, i64 42>
   %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
@@ -596,10 +596,10 @@ define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16
 ; CHECK-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xxlnor 36, 35, 35
+; CHECK-NEXT:    vaddubm 3, 2, 3
 ; CHECK-NEXT:    xxleqv 0, 0, 0
-; CHECK-NEXT:    vcmpgtub 4, 2, 4
-; CHECK-NEXT:    vaddubm 2, 2, 3
-; CHECK-NEXT:    xxsel 34, 34, 0, 36
+; CHECK-NEXT:    vcmpgtub 2, 2, 4
+; CHECK-NEXT:    xxsel 34, 35, 0, 34
 ; CHECK-NEXT:    blr
   %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %a = add <16 x i8> %x, %y
@@ -637,10 +637,10 @@ define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8
 ; CHECK-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xxlnor 36, 35, 35
+; CHECK-NEXT:    vadduhm 3, 2, 3
 ; CHECK-NEXT:    xxleqv 0, 0, 0
-; CHECK-NEXT:    vcmpgtuh 4, 2, 4
-; CHECK-NEXT:    vadduhm 2, 2, 3
-; CHECK-NEXT:    xxsel 34, 34, 0, 36
+; CHECK-NEXT:    vcmpgtuh 2, 2, 4
+; CHECK-NEXT:    xxsel 34, 35, 0, 34
 ; CHECK-NEXT:    blr
   %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   %a = add <8 x i16> %x, %y
@@ -678,10 +678,10 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4
 ; CHECK-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xxlnor 36, 35, 35
+; CHECK-NEXT:    vadduwm 3, 2, 3
 ; CHECK-NEXT:    xxleqv 0, 0, 0
-; CHECK-NEXT:    vcmpgtuw 4, 2, 4
-; CHECK-NEXT:    vadduwm 2, 2, 3
-; CHECK-NEXT:    xxsel 34, 34, 0, 36
+; CHECK-NEXT:    vcmpgtuw 2, 2, 4
+; CHECK-NEXT:    xxsel 34, 35, 0, 34
 ; CHECK-NEXT:    blr
   %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
   %a = add <4 x i32> %x, %y
@@ -722,10 +722,10 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2
 ; CHECK-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xxlnor 36, 35, 35
+; CHECK-NEXT:    vaddudm 3, 2, 3
 ; CHECK-NEXT:    xxleqv 0, 0, 0
-; CHECK-NEXT:    vcmpgtud 4, 2, 4
-; CHECK-NEXT:    vaddudm 2, 2, 3
-; CHECK-NEXT:    xxsel 34, 34, 0, 36
+; CHECK-NEXT:    vcmpgtud 2, 2, 4
+; CHECK-NEXT:    xxsel 34, 35, 0, 34
 ; CHECK-NEXT:    blr
   %noty = xor <2 x i64> %y, <i64 -1, i64 -1>
   %a = add <2 x i64> %x, %y
@@ -740,116 +740,118 @@ define <4 x i128> @sadd(<4 x i128> %a, <4 x i128> %b) local_unnamed_addr {
 ; CHECK-LABEL: sadd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vadduqm 0, 2, 6
+; CHECK-NEXT:    vadduqm 10, 4, 8
+; CHECK-NEXT:    mfocrf 12, 32
+; CHECK-NEXT:    stw 12, 8(1)
 ; CHECK-NEXT:    xxswapd 0, 34
-; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    addis 3, 2, .LCPI48_0 at toc@ha
+; CHECK-NEXT:    xxswapd 4, 36
 ; CHECK-NEXT:    vadduqm 1, 3, 7
-; CHECK-NEXT:    xxswapd 1, 35
-; CHECK-NEXT:    addi 3, 3, .LCPI48_0 at toc@l
-; CHECK-NEXT:    xxswapd 3, 32
-; CHECK-NEXT:    mfvsrd 4, 34
-; CHECK-NEXT:    mfvsrd 8, 32
-; CHECK-NEXT:    xxswapd 2, 36
-; CHECK-NEXT:    mffprd 12, 0
-; CHECK-NEXT:    xxswapd 0, 33
-; CHECK-NEXT:    vadduqm 10, 4, 8
-; CHECK-NEXT:    cmpld 8, 4
-; CHECK-NEXT:    cmpd 1, 8, 4
-; CHECK-NEXT:    mffprd 4, 3
-; CHECK-NEXT:    lxvd2x 3, 0, 3
-; CHECK-NEXT:    sradi 3, 8, 63
-; CHECK-NEXT:    mffprd 0, 1
-; CHECK-NEXT:    xxswapd 1, 37
-; CHECK-NEXT:    mfvsrd 5, 35
 ; CHECK-NEXT:    vadduqm 11, 5, 9
-; CHECK-NEXT:    xxswapd 34, 3
-; CHECK-NEXT:    mfvsrd 9, 33
-; CHECK-NEXT:    crandc 20, 4, 2
-; CHECK-NEXT:    cmpld 1, 4, 12
-; CHECK-NEXT:    mffprd 4, 0
-; CHECK-NEXT:    xxswapd 0, 42
-; CHECK-NEXT:    mfvsrd 6, 36
-; CHECK-NEXT:    mfvsrd 10, 42
-; CHECK-NEXT:    cmpld 6, 4, 0
-; CHECK-NEXT:    crand 21, 2, 4
-; CHECK-NEXT:    cmpld 9, 5
-; CHECK-NEXT:    cmpd 1, 9, 5
+; CHECK-NEXT:    mffprd 3, 0
+; CHECK-NEXT:    mffprd 6, 4
+; CHECK-NEXT:    lwz 12, 8(1)
+; CHECK-NEXT:    xxswapd 2, 35
+; CHECK-NEXT:    xxswapd 5, 37
+; CHECK-NEXT:    mffprd 4, 2
+; CHECK-NEXT:    xxswapd 1, 32
+; CHECK-NEXT:    xxswapd 6, 42
 ; CHECK-NEXT:    mffprd 5, 1
-; CHECK-NEXT:    xxswapd 1, 43
-; CHECK-NEXT:    mffprd 30, 2
-; CHECK-NEXT:    mffprd 4, 0
-; CHECK-NEXT:    mfvsrd 7, 37
-; CHECK-NEXT:    mfvsrd 11, 43
-; CHECK-NEXT:    crandc 22, 4, 2
-; CHECK-NEXT:    cmpd 1, 10, 6
-; CHECK-NEXT:    crand 23, 2, 24
-; CHECK-NEXT:    cmpld 10, 6
-; CHECK-NEXT:    crandc 24, 4, 2
-; CHECK-NEXT:    cmpld 1, 4, 30
-; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
-; CHECK-NEXT:    mffprd 4, 1
-; CHECK-NEXT:    mfvsrd 6, 38
+; CHECK-NEXT:    cmpld 5, 3
+; CHECK-NEXT:    mffprd 7, 6
+; CHECK-NEXT:    xxswapd 3, 33
+; CHECK-NEXT:    xxswapd 7, 43
+; CHECK-NEXT:    mffprd 3, 3
+; CHECK-NEXT:    cmpld 5, 7, 6
+; CHECK-NEXT:    mffprd 6, 5
+; CHECK-NEXT:    mffprd 7, 7
+; CHECK-NEXT:    mfvsrd 5, 36
+; CHECK-NEXT:    cmpld 1, 3, 4
+; CHECK-NEXT:    mfvsrd 3, 34
+; CHECK-NEXT:    cmpld 6, 7, 6
+; CHECK-NEXT:    mfvsrd 7, 32
+; CHECK-NEXT:    mfvsrd 4, 35
+; CHECK-NEXT:    mfvsrd 6, 37
+; CHECK-NEXT:    cmpld 7, 7, 3
+; CHECK-NEXT:    cmpd 2, 7, 3
+; CHECK-NEXT:    mfvsrd 3, 33
+; CHECK-NEXT:    crandc 21, 8, 30
+; CHECK-NEXT:    crand 22, 30, 0
+; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    cmpd 7, 3, 4
+; CHECK-NEXT:    mfvsrd 4, 42
+; CHECK-NEXT:    sradi 3, 3, 63
+; CHECK-NEXT:    mtocrf 32, 12
+; CHECK-NEXT:    crnor 21, 22, 21
+; CHECK-NEXT:    crandc 23, 28, 2
 ; CHECK-NEXT:    crand 25, 2, 4
-; CHECK-NEXT:    cmpld 11, 7
-; CHECK-NEXT:    cmpd 1, 11, 7
-; CHECK-NEXT:    crandc 26, 4, 2
-; CHECK-NEXT:    cmpld 1, 4, 5
-; CHECK-NEXT:    sradi 4, 6, 63
-; CHECK-NEXT:    mtfprd 0, 4
-; CHECK-NEXT:    mfvsrd 4, 39
-; CHECK-NEXT:    mfvsrd 5, 40
-; CHECK-NEXT:    mfvsrd 6, 41
-; CHECK-NEXT:    sradi 4, 4, 63
-; CHECK-NEXT:    mtfprd 1, 4
-; CHECK-NEXT:    sradi 4, 5, 63
-; CHECK-NEXT:    mtfprd 2, 4
-; CHECK-NEXT:    sradi 4, 6, 63
+; CHECK-NEXT:    cmpld 4, 5
+; CHECK-NEXT:    cmpd 1, 4, 5
+; CHECK-NEXT:    mfvsrd 5, 43
+; CHECK-NEXT:    crnor 22, 25, 23
 ; CHECK-NEXT:    mtfprd 5, 3
-; CHECK-NEXT:    sradi 3, 10, 63
-; CHECK-NEXT:    mtfprd 4, 4
-; CHECK-NEXT:    sradi 4, 9, 63
+; CHECK-NEXT:    sradi 4, 4, 63
 ; CHECK-NEXT:    mtfprd 6, 4
-; CHECK-NEXT:    xxspltd 35, 5, 0
-; CHECK-NEXT:    sradi 4, 11, 63
-; CHECK-NEXT:    crnor 20, 21, 20
-; CHECK-NEXT:    xxspltd 38, 4, 0
-; CHECK-NEXT:    mtfprd 3, 3
-; CHECK-NEXT:    li 3, -1
-; CHECK-NEXT:    xxspltd 36, 6, 0
-; CHECK-NEXT:    mtfprd 5, 4
-; CHECK-NEXT:    crand 27, 2, 4
-; CHECK-NEXT:    xxspltd 37, 3, 0
-; CHECK-NEXT:    xxlxor 3, 35, 34
-; CHECK-NEXT:    xxspltd 35, 5, 0
-; CHECK-NEXT:    isel 4, 0, 3, 20
-; CHECK-NEXT:    mtfprd 8, 4
-; CHECK-NEXT:    crnor 20, 23, 22
-; CHECK-NEXT:    crnor 21, 25, 24
-; CHECK-NEXT:    crnor 22, 27, 26
-; CHECK-NEXT:    xxlxor 5, 36, 34
-; CHECK-NEXT:    xxspltd 36, 2, 0
-; CHECK-NEXT:    xxlxor 6, 37, 34
-; CHECK-NEXT:    xxlxor 7, 35, 34
+; CHECK-NEXT:    crandc 26, 4, 2
+; CHECK-NEXT:    crand 20, 2, 20
+; CHECK-NEXT:    cmpld 5, 6
+; CHECK-NEXT:    cmpd 1, 5, 6
+; CHECK-NEXT:    mfvsrd 6, 38
+; CHECK-NEXT:    sradi 5, 5, 63
+; CHECK-NEXT:    crnor 20, 20, 26
+; CHECK-NEXT:    mtfprd 7, 5
+; CHECK-NEXT:    sradi 6, 6, 63
+; CHECK-NEXT:    crandc 27, 4, 2
+; CHECK-NEXT:    crand 24, 2, 24
+; CHECK-NEXT:    crnor 23, 24, 27
+; CHECK-NEXT:    mtfprd 0, 6
+; CHECK-NEXT:    mfvsrd 6, 39
+; CHECK-NEXT:    sradi 6, 6, 63
+; CHECK-NEXT:    mtfprd 1, 6
+; CHECK-NEXT:    mfvsrd 6, 40
+; CHECK-NEXT:    sradi 6, 6, 63
+; CHECK-NEXT:    mtfprd 2, 6
+; CHECK-NEXT:    mfvsrd 6, 41
+; CHECK-NEXT:    sradi 6, 6, 63
+; CHECK-NEXT:    mtfprd 3, 6
+; CHECK-NEXT:    sradi 6, 7, 63
+; CHECK-NEXT:    mtfprd 4, 6
+; CHECK-NEXT:    li 6, -1
+; CHECK-NEXT:    isel 3, 0, 6, 21
+; CHECK-NEXT:    isel 4, 0, 6, 22
+; CHECK-NEXT:    isel 5, 0, 6, 20
+; CHECK-NEXT:    isel 6, 0, 6, 23
+; CHECK-NEXT:    mtfprd 8, 3
+; CHECK-NEXT:    addis 3, 2, .LCPI48_0 at toc@ha
+; CHECK-NEXT:    mtfprd 10, 4
+; CHECK-NEXT:    mtfprd 11, 5
+; CHECK-NEXT:    mtfprd 12, 6
+; CHECK-NEXT:    addi 3, 3, .LCPI48_0 at toc@l
+; CHECK-NEXT:    lxvd2x 9, 0, 3
+; CHECK-NEXT:    xxspltd 45, 6, 0
+; CHECK-NEXT:    xxspltd 46, 7, 0
 ; CHECK-NEXT:    xxspltd 34, 0, 0
-; CHECK-NEXT:    xxspltd 35, 8, 0
-; CHECK-NEXT:    isel 4, 0, 3, 20
-; CHECK-NEXT:    isel 5, 0, 3, 21
-; CHECK-NEXT:    isel 3, 0, 3, 22
-; CHECK-NEXT:    xxlxor 0, 34, 35
-; CHECK-NEXT:    xxspltd 34, 1, 0
-; CHECK-NEXT:    mtfprd 8, 4
-; CHECK-NEXT:    mtfprd 1, 5
-; CHECK-NEXT:    mtfprd 9, 3
-; CHECK-NEXT:    xxspltd 35, 8, 0
-; CHECK-NEXT:    xxspltd 37, 1, 0
-; CHECK-NEXT:    xxspltd 39, 9, 0
-; CHECK-NEXT:    xxlxor 1, 34, 35
-; CHECK-NEXT:    xxsel 34, 32, 3, 0
-; CHECK-NEXT:    xxlxor 2, 36, 37
-; CHECK-NEXT:    xxlxor 4, 38, 39
-; CHECK-NEXT:    xxsel 35, 33, 5, 1
-; CHECK-NEXT:    xxsel 36, 42, 6, 2
-; CHECK-NEXT:    xxsel 37, 43, 7, 4
+; CHECK-NEXT:    xxspltd 40, 5, 0
+; CHECK-NEXT:    xxspltd 35, 1, 0
+; CHECK-NEXT:    xxspltd 36, 2, 0
+; CHECK-NEXT:    xxspltd 38, 3, 0
+; CHECK-NEXT:    xxspltd 39, 4, 0
+; CHECK-NEXT:    xxspltd 41, 8, 0
+; CHECK-NEXT:    xxspltd 44, 10, 0
+; CHECK-NEXT:    xxspltd 47, 11, 0
+; CHECK-NEXT:    xxspltd 48, 12, 0
+; CHECK-NEXT:    xxlxor 0, 34, 41
+; CHECK-NEXT:    xxlxor 1, 35, 44
+; CHECK-NEXT:    xxswapd 37, 9
+; CHECK-NEXT:    xxlxor 2, 39, 37
+; CHECK-NEXT:    xxlxor 3, 40, 37
+; CHECK-NEXT:    xxsel 34, 32, 2, 0
+; CHECK-NEXT:    xxsel 35, 33, 3, 1
+; CHECK-NEXT:    xxlxor 0, 36, 47
+; CHECK-NEXT:    xxlxor 1, 45, 37
+; CHECK-NEXT:    xxsel 36, 42, 1, 0
+; CHECK-NEXT:    xxlxor 0, 38, 48
+; CHECK-NEXT:    xxlxor 1, 46, 37
+; CHECK-NEXT:    xxsel 37, 43, 1, 0
 ; CHECK-NEXT:    blr
   %c = call <4 x i128> @llvm.sadd.sat.v4i128(<4 x i128> %a, <4 x i128> %b)
   ret <4 x i128> %c
@@ -870,8 +872,8 @@ define i64 @unsigned_sat_constant_i64_with_single_use(i64 %x) {
 define i64 @unsigned_sat_constant_i64_with_multiple_use(i64 %x, i64 %y) {
 ; CHECK-LABEL: unsigned_sat_constant_i64_with_multiple_use:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 5, 4
 ; CHECK-NEXT:    cmpldi 3, 4
+; CHECK-NEXT:    li 5, 4
 ; CHECK-NEXT:    isellt 5, 3, 5
 ; CHECK-NEXT:    sub 3, 3, 5
 ; CHECK-NEXT:    add 4, 4, 5

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
index db02004df54dba4..6f68679325c5799 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
@@ -274,9 +274,9 @@ define dso_local double @ld_disjoint_align32_double_uint8_t(i64 %ptr) {
 ; CHECK-P8-LABEL: ld_disjoint_align32_double_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-P8-NEXT:    xscvuxddp f1, f0
@@ -735,9 +735,9 @@ define dso_local double @ld_disjoint_align32_double_int8_t(i64 %ptr) {
 ; CHECK-P8-LABEL: ld_disjoint_align32_double_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-NEXT:    extsb r3, r3
 ; CHECK-P8-NEXT:    mtfprwa f0, r3
@@ -1189,9 +1189,9 @@ define dso_local double @ld_disjoint_align32_double_uint16_t(i64 %ptr) {
 ; CHECK-P8-LABEL: ld_disjoint_align32_double_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    lhzx r3, r3, r4
 ; CHECK-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-P8-NEXT:    xscvuxddp f1, f0
@@ -1641,9 +1641,9 @@ define dso_local double @ld_disjoint_align32_double_int16_t(i64 %ptr) {
 ; CHECK-P8-LABEL: ld_disjoint_align32_double_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    lhax r3, r3, r4
 ; CHECK-P8-NEXT:    mtfprwa f0, r3
 ; CHECK-P8-NEXT:    xscvsxddp f1, f0
@@ -2003,25 +2003,15 @@ define dso_local double @ld_disjoint_align32_double_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    xscvuxddp f1, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_double_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfiwzx f0, r3, r4
-; CHECK-P9-NEXT:    xscvuxddp f1, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_double_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfiwzx f0, r3, r4
-; CHECK-P8-NEXT:    xscvuxddp f1, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_double_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfiwzx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvuxddp f1, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2320,25 +2310,15 @@ define dso_local double @ld_disjoint_align32_double_int32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    xscvsxddp f1, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_double_int32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfiwax f0, r3, r4
-; CHECK-P9-NEXT:    xscvsxddp f1, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_double_int32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfiwax f0, r3, r4
-; CHECK-P8-NEXT:    xscvsxddp f1, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_double_int32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfiwax f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvsxddp f1, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2633,25 +2613,15 @@ define dso_local double @ld_disjoint_align32_double_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    xscvuxddp f1, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_double_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvuxddp f1, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_double_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvuxddp f1, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_double_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvuxddp f1, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2937,25 +2907,15 @@ define dso_local double @ld_disjoint_align32_double_int64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    xscvsxddp f1, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_double_int64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvsxddp f1, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_double_int64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvsxddp f1, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_double_int64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvsxddp f1, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3229,23 +3189,14 @@ define dso_local double @ld_disjoint_align32_double_float(i64 %ptr) {
 ; CHECK-P10-NEXT:    plfs f1, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_double_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfsx f1, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_double_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfsx f1, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_double_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfsx f1, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3502,23 +3453,14 @@ define dso_local double @ld_disjoint_align32_double_double(i64 %ptr) {
 ; CHECK-P10-NEXT:    plfd f1, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_double_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f1, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_double_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f1, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_double_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f1, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3687,10 +3629,10 @@ define dso_local void @st_align32_double_uint8_t(ptr nocapture %ptr, double %str
 ; CHECK-P8-LABEL: st_align32_double_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 1525
-; CHECK-P8-NEXT:    ori r4, r4, 56600
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 1525
+; CHECK-P8-NEXT:    ori r5, r5, 56600
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui double %str to i8
@@ -3721,11 +3663,11 @@ define dso_local void @st_align64_double_uint8_t(ptr nocapture %ptr, double %str
 ; CHECK-P8-LABEL: st_align64_double_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui double %str to i8
@@ -3881,11 +3823,11 @@ define dso_local void @st_disjoint_align32_double_uint8_t(i64 %ptr, double %str)
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
+; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r6, 41712
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 41712
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
@@ -3922,13 +3864,13 @@ define dso_local void @st_not_disjoint64_double_uint8_t(i64 %ptr, double %str) {
 ; CHECK-P8-LABEL: st_not_disjoint64_double_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stb r5, 0(r3)
+; CHECK-P8-NEXT:    li r5, 29
+; CHECK-P8-NEXT:    rldic r5, r5, 35, 24
+; CHECK-P8-NEXT:    oris r5, r5, 54437
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    ori r5, r5, 4097
+; CHECK-P8-NEXT:    or r3, r3, r5
+; CHECK-P8-NEXT:    stb r4, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui double %str to i8
@@ -3962,12 +3904,12 @@ define dso_local void @st_disjoint_align64_double_uint8_t(i64 %ptr, double %str)
 ; CHECK-P8-LABEL: st_disjoint_align64_double_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    lis r5, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
@@ -4051,11 +3993,11 @@ define dso_local void @st_cst_align64_double_uint8_t(double %str) {
 ; CHECK-P8-LABEL: st_cst_align64_double_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r3, 3725
-; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    stb r4, 0(r3)
+; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    ori r4, r4, 19025
+; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    stb r3, 0(r4)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui double %str to i8
@@ -4126,10 +4068,10 @@ define dso_local void @st_align32_double_int8_t(ptr nocapture %ptr, double %str)
 ; CHECK-P8-LABEL: st_align32_double_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 1525
-; CHECK-P8-NEXT:    ori r4, r4, 56600
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 1525
+; CHECK-P8-NEXT:    ori r5, r5, 56600
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi double %str to i8
@@ -4160,11 +4102,11 @@ define dso_local void @st_align64_double_int8_t(ptr nocapture %ptr, double %str)
 ; CHECK-P8-LABEL: st_align64_double_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi double %str to i8
@@ -4320,11 +4262,11 @@ define dso_local void @st_disjoint_align32_double_int8_t(i64 %ptr, double %str)
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
+; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r6, 41712
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 41712
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
@@ -4361,13 +4303,13 @@ define dso_local void @st_not_disjoint64_double_int8_t(i64 %ptr, double %str) {
 ; CHECK-P8-LABEL: st_not_disjoint64_double_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stb r5, 0(r3)
+; CHECK-P8-NEXT:    li r5, 29
+; CHECK-P8-NEXT:    rldic r5, r5, 35, 24
+; CHECK-P8-NEXT:    oris r5, r5, 54437
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    ori r5, r5, 4097
+; CHECK-P8-NEXT:    or r3, r3, r5
+; CHECK-P8-NEXT:    stb r4, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi double %str to i8
@@ -4401,12 +4343,12 @@ define dso_local void @st_disjoint_align64_double_int8_t(i64 %ptr, double %str)
 ; CHECK-P8-LABEL: st_disjoint_align64_double_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    lis r5, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
@@ -4490,11 +4432,11 @@ define dso_local void @st_cst_align64_double_int8_t(double %str) {
 ; CHECK-P8-LABEL: st_cst_align64_double_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r3, 3725
-; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    stb r4, 0(r3)
+; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    ori r4, r4, 19025
+; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    stb r3, 0(r4)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi double %str to i8
@@ -4565,10 +4507,10 @@ define dso_local void @st_align32_double_uint16_t(ptr nocapture %ptr, double %st
 ; CHECK-P8-LABEL: st_align32_double_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 1525
-; CHECK-P8-NEXT:    ori r4, r4, 56600
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 1525
+; CHECK-P8-NEXT:    ori r5, r5, 56600
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui double %str to i16
@@ -4599,11 +4541,11 @@ define dso_local void @st_align64_double_uint16_t(ptr nocapture %ptr, double %st
 ; CHECK-P8-LABEL: st_align64_double_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui double %str to i16
@@ -4759,11 +4701,11 @@ define dso_local void @st_disjoint_align32_double_uint16_t(i64 %ptr, double %str
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
+; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r6, 41712
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 41712
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
@@ -4800,13 +4742,13 @@ define dso_local void @st_not_disjoint64_double_uint16_t(i64 %ptr, double %str)
 ; CHECK-P8-LABEL: st_not_disjoint64_double_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sth r5, 0(r3)
+; CHECK-P8-NEXT:    li r5, 29
+; CHECK-P8-NEXT:    rldic r5, r5, 35, 24
+; CHECK-P8-NEXT:    oris r5, r5, 54437
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    ori r5, r5, 4097
+; CHECK-P8-NEXT:    or r3, r3, r5
+; CHECK-P8-NEXT:    sth r4, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui double %str to i16
@@ -4840,12 +4782,12 @@ define dso_local void @st_disjoint_align64_double_uint16_t(i64 %ptr, double %str
 ; CHECK-P8-LABEL: st_disjoint_align64_double_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    lis r5, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
@@ -4929,11 +4871,11 @@ define dso_local void @st_cst_align64_double_uint16_t(double %str) {
 ; CHECK-P8-LABEL: st_cst_align64_double_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r3, 3725
-; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    sth r4, 0(r3)
+; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    ori r4, r4, 19025
+; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    sth r3, 0(r4)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui double %str to i16
@@ -5004,10 +4946,10 @@ define dso_local void @st_align32_double_int16_t(ptr nocapture %ptr, double %str
 ; CHECK-P8-LABEL: st_align32_double_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 1525
-; CHECK-P8-NEXT:    ori r4, r4, 56600
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 1525
+; CHECK-P8-NEXT:    ori r5, r5, 56600
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi double %str to i16
@@ -5038,11 +4980,11 @@ define dso_local void @st_align64_double_int16_t(ptr nocapture %ptr, double %str
 ; CHECK-P8-LABEL: st_align64_double_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi double %str to i16
@@ -5198,11 +5140,11 @@ define dso_local void @st_disjoint_align32_double_int16_t(i64 %ptr, double %str)
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
+; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r6, 41712
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 41712
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
@@ -5239,13 +5181,13 @@ define dso_local void @st_not_disjoint64_double_int16_t(i64 %ptr, double %str) {
 ; CHECK-P8-LABEL: st_not_disjoint64_double_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sth r5, 0(r3)
+; CHECK-P8-NEXT:    li r5, 29
+; CHECK-P8-NEXT:    rldic r5, r5, 35, 24
+; CHECK-P8-NEXT:    oris r5, r5, 54437
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    ori r5, r5, 4097
+; CHECK-P8-NEXT:    or r3, r3, r5
+; CHECK-P8-NEXT:    sth r4, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi double %str to i16
@@ -5279,12 +5221,12 @@ define dso_local void @st_disjoint_align64_double_int16_t(i64 %ptr, double %str)
 ; CHECK-P8-LABEL: st_disjoint_align64_double_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    lis r5, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
@@ -5368,11 +5310,11 @@ define dso_local void @st_cst_align64_double_int16_t(double %str) {
 ; CHECK-P8-LABEL: st_cst_align64_double_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r3, 3725
-; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    sth r4, 0(r3)
+; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    ori r4, r4, 19025
+; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    sth r3, 0(r4)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi double %str to i16
@@ -5562,11 +5504,11 @@ define dso_local void @st_disjoint_align32_double_uint32_t(i64 %ptr, double %str
 ;
 ; CHECK-P8-LABEL: st_disjoint_align32_double_uint32_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
+; CHECK-P8-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    stfiwx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -5590,27 +5532,16 @@ define dso_local void @st_not_disjoint64_double_uint32_t(i64 %ptr, double %str)
 ; CHECK-P10-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_not_disjoint64_double_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r4, 29
-; CHECK-P9-NEXT:    xscvdpuxws f0, f1
-; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P9-NEXT:    oris r4, r4, 54437
-; CHECK-P9-NEXT:    ori r4, r4, 4097
-; CHECK-P9-NEXT:    or r3, r3, r4
-; CHECK-P9-NEXT:    stfiwx f0, 0, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_not_disjoint64_double_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpuxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    stfiwx f0, 0, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_not_disjoint64_double_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li r4, 29
+; CHECK-PREP10-NEXT:    xscvdpuxws f0, f1
+; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
+; CHECK-PREP10-NEXT:    oris r4, r4, 54437
+; CHECK-PREP10-NEXT:    ori r4, r4, 4097
+; CHECK-PREP10-NEXT:    or r3, r3, r4
+; CHECK-PREP10-NEXT:    stfiwx f0, 0, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %conv = fptoui double %str to i32
   %or = or i64 %ptr, 1000000000001
@@ -5890,11 +5821,11 @@ define dso_local void @st_disjoint_align32_double_int32_t(i64 %ptr, double %str)
 ;
 ; CHECK-P8-LABEL: st_disjoint_align32_double_int32_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    stfiwx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -5918,27 +5849,16 @@ define dso_local void @st_not_disjoint64_double_int32_t(i64 %ptr, double %str) {
 ; CHECK-P10-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_not_disjoint64_double_int32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r4, 29
-; CHECK-P9-NEXT:    xscvdpsxws f0, f1
-; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P9-NEXT:    oris r4, r4, 54437
-; CHECK-P9-NEXT:    ori r4, r4, 4097
-; CHECK-P9-NEXT:    or r3, r3, r4
-; CHECK-P9-NEXT:    stfiwx f0, 0, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_not_disjoint64_double_int32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    stfiwx f0, 0, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_not_disjoint64_double_int32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li r4, 29
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f1
+; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
+; CHECK-PREP10-NEXT:    oris r4, r4, 54437
+; CHECK-PREP10-NEXT:    ori r4, r4, 4097
+; CHECK-PREP10-NEXT:    or r3, r3, r4
+; CHECK-PREP10-NEXT:    stfiwx f0, 0, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %conv = fptosi double %str to i32
   %or = or i64 %ptr, 1000000000001
@@ -6257,11 +6177,11 @@ define dso_local void @st_disjoint_align32_double_uint64_t(i64 %ptr, double %str
 ;
 ; CHECK-P8-LABEL: st_disjoint_align32_double_uint64_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpuxds f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
+; CHECK-P8-NEXT:    xscvdpuxds f0, f1
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    stxsdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6298,8 +6218,8 @@ define dso_local void @st_not_disjoint64_double_uint64_t(i64 %ptr, double %str)
 ;
 ; CHECK-P8-LABEL: st_not_disjoint64_double_uint64_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpuxds f0, f1
 ; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    xscvdpuxds f0, f1
 ; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
 ; CHECK-P8-NEXT:    oris r4, r4, 54437
 ; CHECK-P8-NEXT:    ori r4, r4, 4097
@@ -6639,11 +6559,11 @@ define dso_local void @st_disjoint_align32_double_int64_t(i64 %ptr, double %str)
 ;
 ; CHECK-P8-LABEL: st_disjoint_align32_double_int64_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxds f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
+; CHECK-P8-NEXT:    xscvdpsxds f0, f1
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    stxsdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6680,8 +6600,8 @@ define dso_local void @st_not_disjoint64_double_int64_t(i64 %ptr, double %str) {
 ;
 ; CHECK-P8-LABEL: st_not_disjoint64_double_int64_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxds f0, f1
 ; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    xscvdpsxds f0, f1
 ; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
 ; CHECK-P8-NEXT:    oris r4, r4, 54437
 ; CHECK-P8-NEXT:    ori r4, r4, 4097
@@ -6978,11 +6898,11 @@ define dso_local void @st_disjoint_align32_double_float(i64 %ptr, double %str) {
 ;
 ; CHECK-P8-LABEL: st_disjoint_align32_double_float:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xsrsp f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
+; CHECK-P8-NEXT:    xsrsp f0, f1
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7006,27 +6926,16 @@ define dso_local void @st_not_disjoint64_double_float(i64 %ptr, double %str) {
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_not_disjoint64_double_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r4, 29
-; CHECK-P9-NEXT:    xsrsp f0, f1
-; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P9-NEXT:    oris r4, r4, 54437
-; CHECK-P9-NEXT:    ori r4, r4, 4097
-; CHECK-P9-NEXT:    or r3, r3, r4
-; CHECK-P9-NEXT:    stfs f0, 0(r3)
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_not_disjoint64_double_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xsrsp f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    stfs f0, 0(r3)
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_not_disjoint64_double_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li r4, 29
+; CHECK-PREP10-NEXT:    xsrsp f0, f1
+; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
+; CHECK-PREP10-NEXT:    oris r4, r4, 54437
+; CHECK-PREP10-NEXT:    ori r4, r4, 4097
+; CHECK-PREP10-NEXT:    or r3, r3, r4
+; CHECK-PREP10-NEXT:    stfs f0, 0(r3)
+; CHECK-PREP10-NEXT:    blr
 entry:
   %conv = fptrunc double %str to float
   %or = or i64 %ptr, 1000000000001
@@ -7260,23 +7169,14 @@ define dso_local void @st_disjoint_align32_double_double(i64 %ptr, double %str)
 ; CHECK-P10-NEXT:    pstfd f1, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_double_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    stfdx f1, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_double_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    stfdx f1, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_double_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    stfdx f1, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-equal.ll b/llvm/test/CodeGen/PowerPC/scalar-equal.ll
index 90e1655fd94c3aa..1832475e7795b6b 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-equal.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-equal.ll
@@ -20,9 +20,9 @@ define double @testoeq(double %a, double %b, double %c, double %d) {
 ; FAST-P8-LABEL: testoeq:
 ; FAST-P8:       # %bb.0: # %entry
 ; FAST-P8-NEXT:    xssubdp f0, f1, f2
-; FAST-P8-NEXT:    xsnegdp f1, f0
-; FAST-P8-NEXT:    fsel f0, f0, f3, f4
-; FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; FAST-P8-NEXT:    fsel f1, f0, f3, f4
+; FAST-P8-NEXT:    xsnegdp f0, f0
+; FAST-P8-NEXT:    fsel f1, f0, f1, f4
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: testoeq:
@@ -46,10 +46,11 @@ define double @testoeq(double %a, double %b, double %c, double %d) {
 ; NO-FAST-P8-LABEL: testoeq:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
-; NO-FAST-P8-NEXT:    beqlr cr0
+; NO-FAST-P8-NEXT:    beq cr0, .LBB0_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB0_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp oeq double %a, %b
@@ -61,9 +62,9 @@ define double @testoeq_fast(double %a, double %b, double %c, double %d) {
 ; FAST-P8-LABEL: testoeq_fast:
 ; FAST-P8:       # %bb.0: # %entry
 ; FAST-P8-NEXT:    xssubdp f0, f1, f2
-; FAST-P8-NEXT:    xsnegdp f1, f0
-; FAST-P8-NEXT:    fsel f0, f0, f3, f4
-; FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; FAST-P8-NEXT:    fsel f1, f0, f3, f4
+; FAST-P8-NEXT:    xsnegdp f0, f0
+; FAST-P8-NEXT:    fsel f1, f0, f1, f4
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: testoeq_fast:
@@ -85,9 +86,9 @@ define double @testoeq_fast(double %a, double %b, double %c, double %d) {
 ; NO-FAST-P8-LABEL: testoeq_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    xssubdp f0, f1, f2
-; NO-FAST-P8-NEXT:    xsnegdp f1, f0
-; NO-FAST-P8-NEXT:    fsel f0, f0, f3, f4
-; NO-FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; NO-FAST-P8-NEXT:    fsel f1, f0, f3, f4
+; NO-FAST-P8-NEXT:    xsnegdp f0, f0
+; NO-FAST-P8-NEXT:    fsel f1, f0, f1, f4
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf nsz oeq double %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
index 0a1613de4da1eb9..824dd4c4db6cb70 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
@@ -274,9 +274,9 @@ define dso_local float @ld_disjoint_align32_float_uint8_t(i64 %ptr) {
 ; CHECK-P8-LABEL: ld_disjoint_align32_float_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-P8-NEXT:    xscvuxdsp f1, f0
@@ -735,9 +735,9 @@ define dso_local float @ld_disjoint_align32_float_int8_t(i64 %ptr) {
 ; CHECK-P8-LABEL: ld_disjoint_align32_float_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-NEXT:    extsb r3, r3
 ; CHECK-P8-NEXT:    mtfprwa f0, r3
@@ -1189,9 +1189,9 @@ define dso_local float @ld_disjoint_align32_float_uint16_t(i64 %ptr) {
 ; CHECK-P8-LABEL: ld_disjoint_align32_float_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    lhzx r3, r3, r4
 ; CHECK-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-P8-NEXT:    xscvuxdsp f1, f0
@@ -1641,9 +1641,9 @@ define dso_local float @ld_disjoint_align32_float_int16_t(i64 %ptr) {
 ; CHECK-P8-LABEL: ld_disjoint_align32_float_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    lhax r3, r3, r4
 ; CHECK-P8-NEXT:    mtfprwa f0, r3
 ; CHECK-P8-NEXT:    xscvsxdsp f1, f0
@@ -2003,25 +2003,15 @@ define dso_local float @ld_disjoint_align32_float_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    xscvuxdsp f1, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_float_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfiwzx f0, r3, r4
-; CHECK-P9-NEXT:    xscvuxdsp f1, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_float_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfiwzx f0, r3, r4
-; CHECK-P8-NEXT:    xscvuxdsp f1, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_float_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfiwzx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvuxdsp f1, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2320,25 +2310,15 @@ define dso_local float @ld_disjoint_align32_float_int32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    xscvsxdsp f1, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_float_int32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfiwax f0, r3, r4
-; CHECK-P9-NEXT:    xscvsxdsp f1, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_float_int32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfiwax f0, r3, r4
-; CHECK-P8-NEXT:    xscvsxdsp f1, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_float_int32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfiwax f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvsxdsp f1, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2633,25 +2613,15 @@ define dso_local float @ld_disjoint_align32_float_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    xscvuxdsp f1, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_float_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvuxdsp f1, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_float_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvuxdsp f1, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_float_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvuxdsp f1, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2937,25 +2907,15 @@ define dso_local float @ld_disjoint_align32_float_int64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    xscvsxdsp f1, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_float_int64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvsxdsp f1, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_float_int64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvsxdsp f1, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_float_int64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvsxdsp f1, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3220,23 +3180,14 @@ define dso_local float @ld_disjoint_align32_float_float(i64 %ptr) {
 ; CHECK-P10-NEXT:    plfs f1, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_float_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfsx f1, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_float_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfsx f1, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_float_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfsx f1, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3508,25 +3459,15 @@ define dso_local float @ld_disjoint_align32_float_double(i64 %ptr) {
 ; CHECK-P10-NEXT:    xsrsp f1, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_float_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xsrsp f1, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_float_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xsrsp f1, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_float_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xsrsp f1, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3709,10 +3650,10 @@ define dso_local void @st_align32_float_uint8_t(ptr nocapture %ptr, float %str)
 ; CHECK-P8-LABEL: st_align32_float_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 1525
-; CHECK-P8-NEXT:    ori r4, r4, 56600
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 1525
+; CHECK-P8-NEXT:    ori r5, r5, 56600
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui float %str to i8
@@ -3743,11 +3684,11 @@ define dso_local void @st_align64_float_uint8_t(ptr nocapture %ptr, float %str)
 ; CHECK-P8-LABEL: st_align64_float_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui float %str to i8
@@ -3903,11 +3844,11 @@ define dso_local void @st_disjoint_align32_float_uint8_t(i64 %ptr, float %str) {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
+; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r6, 41712
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 41712
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
@@ -3944,13 +3885,13 @@ define dso_local void @st_not_disjoint64_float_uint8_t(i64 %ptr, float %str) {
 ; CHECK-P8-LABEL: st_not_disjoint64_float_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stb r5, 0(r3)
+; CHECK-P8-NEXT:    li r5, 29
+; CHECK-P8-NEXT:    rldic r5, r5, 35, 24
+; CHECK-P8-NEXT:    oris r5, r5, 54437
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    ori r5, r5, 4097
+; CHECK-P8-NEXT:    or r3, r3, r5
+; CHECK-P8-NEXT:    stb r4, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui float %str to i8
@@ -3984,12 +3925,12 @@ define dso_local void @st_disjoint_align64_float_uint8_t(i64 %ptr, float %str) {
 ; CHECK-P8-LABEL: st_disjoint_align64_float_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    lis r5, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
@@ -4073,11 +4014,11 @@ define dso_local void @st_cst_align64_float_uint8_t(float %str) {
 ; CHECK-P8-LABEL: st_cst_align64_float_uint8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r3, 3725
-; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    stb r4, 0(r3)
+; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    ori r4, r4, 19025
+; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    stb r3, 0(r4)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui float %str to i8
@@ -4148,10 +4089,10 @@ define dso_local void @st_align32_float_int8_t(ptr nocapture %ptr, float %str) {
 ; CHECK-P8-LABEL: st_align32_float_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 1525
-; CHECK-P8-NEXT:    ori r4, r4, 56600
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 1525
+; CHECK-P8-NEXT:    ori r5, r5, 56600
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi float %str to i8
@@ -4182,11 +4123,11 @@ define dso_local void @st_align64_float_int8_t(ptr nocapture %ptr, float %str) {
 ; CHECK-P8-LABEL: st_align64_float_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi float %str to i8
@@ -4342,11 +4283,11 @@ define dso_local void @st_disjoint_align32_float_int8_t(i64 %ptr, float %str) {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
+; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r6, 41712
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 41712
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
@@ -4383,13 +4324,13 @@ define dso_local void @st_not_disjoint64_float_int8_t(i64 %ptr, float %str) {
 ; CHECK-P8-LABEL: st_not_disjoint64_float_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stb r5, 0(r3)
+; CHECK-P8-NEXT:    li r5, 29
+; CHECK-P8-NEXT:    rldic r5, r5, 35, 24
+; CHECK-P8-NEXT:    oris r5, r5, 54437
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    ori r5, r5, 4097
+; CHECK-P8-NEXT:    or r3, r3, r5
+; CHECK-P8-NEXT:    stb r4, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi float %str to i8
@@ -4423,12 +4364,12 @@ define dso_local void @st_disjoint_align64_float_int8_t(i64 %ptr, float %str) {
 ; CHECK-P8-LABEL: st_disjoint_align64_float_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    lis r5, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    stbx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    stbx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
@@ -4512,11 +4453,11 @@ define dso_local void @st_cst_align64_float_int8_t(float %str) {
 ; CHECK-P8-LABEL: st_cst_align64_float_int8_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r3, 3725
-; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    stb r4, 0(r3)
+; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    ori r4, r4, 19025
+; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    stb r3, 0(r4)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi float %str to i8
@@ -4587,10 +4528,10 @@ define dso_local void @st_align32_float_uint16_t(ptr nocapture %ptr, float %str)
 ; CHECK-P8-LABEL: st_align32_float_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 1525
-; CHECK-P8-NEXT:    ori r4, r4, 56600
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 1525
+; CHECK-P8-NEXT:    ori r5, r5, 56600
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui float %str to i16
@@ -4621,11 +4562,11 @@ define dso_local void @st_align64_float_uint16_t(ptr nocapture %ptr, float %str)
 ; CHECK-P8-LABEL: st_align64_float_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui float %str to i16
@@ -4781,11 +4722,11 @@ define dso_local void @st_disjoint_align32_float_uint16_t(i64 %ptr, float %str)
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
+; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r6, 41712
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 41712
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
@@ -4822,13 +4763,13 @@ define dso_local void @st_not_disjoint64_float_uint16_t(i64 %ptr, float %str) {
 ; CHECK-P8-LABEL: st_not_disjoint64_float_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sth r5, 0(r3)
+; CHECK-P8-NEXT:    li r5, 29
+; CHECK-P8-NEXT:    rldic r5, r5, 35, 24
+; CHECK-P8-NEXT:    oris r5, r5, 54437
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    ori r5, r5, 4097
+; CHECK-P8-NEXT:    or r3, r3, r5
+; CHECK-P8-NEXT:    sth r4, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui float %str to i16
@@ -4862,12 +4803,12 @@ define dso_local void @st_disjoint_align64_float_uint16_t(i64 %ptr, float %str)
 ; CHECK-P8-LABEL: st_disjoint_align64_float_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    lis r5, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
@@ -4951,11 +4892,11 @@ define dso_local void @st_cst_align64_float_uint16_t(float %str) {
 ; CHECK-P8-LABEL: st_cst_align64_float_uint16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r3, 3725
-; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    sth r4, 0(r3)
+; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    ori r4, r4, 19025
+; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    sth r3, 0(r4)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptoui float %str to i16
@@ -5026,10 +4967,10 @@ define dso_local void @st_align32_float_int16_t(ptr nocapture %ptr, float %str)
 ; CHECK-P8-LABEL: st_align32_float_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 1525
-; CHECK-P8-NEXT:    ori r4, r4, 56600
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 1525
+; CHECK-P8-NEXT:    ori r5, r5, 56600
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi float %str to i16
@@ -5060,11 +5001,11 @@ define dso_local void @st_align64_float_int16_t(ptr nocapture %ptr, float %str)
 ; CHECK-P8-LABEL: st_align64_float_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi float %str to i16
@@ -5220,11 +5161,11 @@ define dso_local void @st_disjoint_align32_float_int16_t(i64 %ptr, float %str) {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
+; CHECK-P8-NEXT:    lis r5, 15258
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r6, 41712
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 41712
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
@@ -5261,13 +5202,13 @@ define dso_local void @st_not_disjoint64_float_int16_t(i64 %ptr, float %str) {
 ; CHECK-P8-LABEL: st_not_disjoint64_float_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sth r5, 0(r3)
+; CHECK-P8-NEXT:    li r5, 29
+; CHECK-P8-NEXT:    rldic r5, r5, 35, 24
+; CHECK-P8-NEXT:    oris r5, r5, 54437
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    ori r5, r5, 4097
+; CHECK-P8-NEXT:    or r3, r3, r5
+; CHECK-P8-NEXT:    sth r4, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi float %str to i16
@@ -5301,12 +5242,12 @@ define dso_local void @st_disjoint_align64_float_int16_t(i64 %ptr, float %str) {
 ; CHECK-P8-LABEL: st_disjoint_align64_float_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    lis r5, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
-; CHECK-P8-NEXT:    mffprwz r5, f0
-; CHECK-P8-NEXT:    sthx r5, r3, r4
+; CHECK-P8-NEXT:    ori r5, r5, 19025
+; CHECK-P8-NEXT:    rldic r5, r5, 12, 24
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    sthx r4, r3, r5
 ; CHECK-P8-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1099511627776
@@ -5390,11 +5331,11 @@ define dso_local void @st_cst_align64_float_int16_t(float %str) {
 ; CHECK-P8-LABEL: st_cst_align64_float_int16_t:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    lis r3, 3725
-; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    sth r4, 0(r3)
+; CHECK-P8-NEXT:    lis r4, 3725
+; CHECK-P8-NEXT:    ori r4, r4, 19025
+; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    sth r3, 0(r4)
 ; CHECK-P8-NEXT:    blr
 entry:
   %conv = fptosi float %str to i16
@@ -5584,11 +5525,11 @@ define dso_local void @st_disjoint_align32_float_uint32_t(i64 %ptr, float %str)
 ;
 ; CHECK-P8-LABEL: st_disjoint_align32_float_uint32_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
+; CHECK-P8-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    stfiwx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -5612,27 +5553,16 @@ define dso_local void @st_not_disjoint64_float_uint32_t(i64 %ptr, float %str) {
 ; CHECK-P10-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_not_disjoint64_float_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r4, 29
-; CHECK-P9-NEXT:    xscvdpuxws f0, f1
-; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P9-NEXT:    oris r4, r4, 54437
-; CHECK-P9-NEXT:    ori r4, r4, 4097
-; CHECK-P9-NEXT:    or r3, r3, r4
-; CHECK-P9-NEXT:    stfiwx f0, 0, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_not_disjoint64_float_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpuxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    stfiwx f0, 0, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_not_disjoint64_float_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li r4, 29
+; CHECK-PREP10-NEXT:    xscvdpuxws f0, f1
+; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
+; CHECK-PREP10-NEXT:    oris r4, r4, 54437
+; CHECK-PREP10-NEXT:    ori r4, r4, 4097
+; CHECK-PREP10-NEXT:    or r3, r3, r4
+; CHECK-PREP10-NEXT:    stfiwx f0, 0, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %conv = fptoui float %str to i32
   %or = or i64 %ptr, 1000000000001
@@ -5912,11 +5842,11 @@ define dso_local void @st_disjoint_align32_float_int32_t(i64 %ptr, float %str) {
 ;
 ; CHECK-P8-LABEL: st_disjoint_align32_float_int32_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    stfiwx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -5940,27 +5870,16 @@ define dso_local void @st_not_disjoint64_float_int32_t(i64 %ptr, float %str) {
 ; CHECK-P10-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_not_disjoint64_float_int32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r4, 29
-; CHECK-P9-NEXT:    xscvdpsxws f0, f1
-; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P9-NEXT:    oris r4, r4, 54437
-; CHECK-P9-NEXT:    ori r4, r4, 4097
-; CHECK-P9-NEXT:    or r3, r3, r4
-; CHECK-P9-NEXT:    stfiwx f0, 0, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_not_disjoint64_float_int32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxws f0, f1
-; CHECK-P8-NEXT:    li r4, 29
-; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
-; CHECK-P8-NEXT:    oris r4, r4, 54437
-; CHECK-P8-NEXT:    ori r4, r4, 4097
-; CHECK-P8-NEXT:    or r3, r3, r4
-; CHECK-P8-NEXT:    stfiwx f0, 0, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_not_disjoint64_float_int32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    li r4, 29
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f1
+; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
+; CHECK-PREP10-NEXT:    oris r4, r4, 54437
+; CHECK-PREP10-NEXT:    ori r4, r4, 4097
+; CHECK-PREP10-NEXT:    or r3, r3, r4
+; CHECK-PREP10-NEXT:    stfiwx f0, 0, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %conv = fptosi float %str to i32
   %or = or i64 %ptr, 1000000000001
@@ -6279,11 +6198,11 @@ define dso_local void @st_disjoint_align32_float_uint64_t(i64 %ptr, float %str)
 ;
 ; CHECK-P8-LABEL: st_disjoint_align32_float_uint64_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpuxds f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
+; CHECK-P8-NEXT:    xscvdpuxds f0, f1
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    stxsdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6320,8 +6239,8 @@ define dso_local void @st_not_disjoint64_float_uint64_t(i64 %ptr, float %str) {
 ;
 ; CHECK-P8-LABEL: st_not_disjoint64_float_uint64_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpuxds f0, f1
 ; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    xscvdpuxds f0, f1
 ; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
 ; CHECK-P8-NEXT:    oris r4, r4, 54437
 ; CHECK-P8-NEXT:    ori r4, r4, 4097
@@ -6661,11 +6580,11 @@ define dso_local void @st_disjoint_align32_float_int64_t(i64 %ptr, float %str) {
 ;
 ; CHECK-P8-LABEL: st_disjoint_align32_float_int64_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxds f0, f1
 ; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
+; CHECK-P8-NEXT:    xscvdpsxds f0, f1
 ; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    stxsdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6702,8 +6621,8 @@ define dso_local void @st_not_disjoint64_float_int64_t(i64 %ptr, float %str) {
 ;
 ; CHECK-P8-LABEL: st_not_disjoint64_float_int64_t:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xscvdpsxds f0, f1
 ; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    xscvdpsxds f0, f1
 ; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
 ; CHECK-P8-NEXT:    oris r4, r4, 54437
 ; CHECK-P8-NEXT:    ori r4, r4, 4097
@@ -6967,23 +6886,14 @@ define dso_local void @st_disjoint_align32_float_float(i64 %ptr, float %str) {
 ; CHECK-P10-NEXT:    pstfs f1, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_float_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    stfsx f1, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_float_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    stfsx f1, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_float_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    stfsx f1, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -7243,23 +7153,14 @@ define dso_local void @st_disjoint_align32_float_double(i64 %ptr, float %str) {
 ; CHECK-P10-NEXT:    pstfd f1, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_float_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    stfdx f1, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_float_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    stfdx f1, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_float_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    stfdx f1, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = fpext float %str to double

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
index 5f11d98c253b08c..6940b85ff43df87 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
@@ -184,25 +184,15 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int16_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -467,23 +457,14 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plha r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhax r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhax r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int16_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhax r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -872,18 +853,18 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_uint32_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_int16_t_uint32_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lhax r3, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_int16_t_uint32_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41714
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41714
 ; CHECK-P8-BE-NEXT:    lhax r3, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -1402,18 +1383,18 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_uint64_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_int16_t_uint64_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lhax r3, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_int16_t_uint64_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41718
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41718
 ; CHECK-P8-BE-NEXT:    lhax r3, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -1839,29 +1820,17 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_float(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsw r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfsx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    extsw r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfsx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    extsw r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int16_t_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfsx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    extsw r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2188,29 +2157,17 @@ define dso_local signext i16 @ld_disjoint_align32_int16_t_double(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsw r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int16_t_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    extsw r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int16_t_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    extsw r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int16_t_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    extsw r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2500,23 +2457,14 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plbz r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint16_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2806,27 +2754,16 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 48
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    clrldi r3, r3, 48
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    clrldi r3, r3, 48
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint16_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 48
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3099,23 +3036,14 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plhz r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint16_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3504,18 +3432,18 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_uint32_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_uint16_t_uint32_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lhzx r3, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_uint16_t_uint32_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41714
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41714
 ; CHECK-P8-BE-NEXT:    lhzx r3, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -4034,18 +3962,18 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_uint64_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_uint16_t_uint64_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lhzx r3, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_uint16_t_uint64_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41718
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41718
 ; CHECK-P8-BE-NEXT:    lhzx r3, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -4444,27 +4372,16 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_float(i64 %ptr) {
 ; CHECK-P10-NEXT:    mffprwz r3, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfsx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfsx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint16_t_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfsx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4770,27 +4687,16 @@ define dso_local zeroext i16 @ld_disjoint_align32_uint16_t_double(i64 %ptr) {
 ; CHECK-P10-NEXT:    mffprwz r3, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint16_t_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint16_t_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint16_t_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -5072,23 +4978,14 @@ define dso_local void @st_disjoint_align32_uint16_t_uint8_t(i64 %ptr, i16 zeroex
 ; CHECK-P10-NEXT:    pstb r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint16_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stbx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint16_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stbx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint16_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stbx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = trunc i16 %str to i8
@@ -5345,23 +5242,14 @@ define dso_local void @st_disjoint_align32_uint16_t_uint16_t(i64 %ptr, i16 zeroe
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint16_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint16_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint16_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -5621,23 +5509,14 @@ define dso_local void @st_disjoint_align32_uint16_t_uint32_t(i64 %ptr, i16 zeroe
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint16_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint16_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint16_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = zext i16 %str to i32
@@ -5903,23 +5782,14 @@ define dso_local void @st_disjoint_align32_uint16_t_uint64_t(i64 %ptr, i16 zeroe
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint16_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint16_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint16_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = zext i16 %str to i64
@@ -6120,8 +5990,8 @@ define dso_local void @st_align64_uint16_t_float(ptr nocapture %ptr, i16 zeroext
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6260,10 +6130,10 @@ define dso_local void @st_disjoint_align32_uint16_t_float(i64 %ptr, i16 zeroext
 ; CHECK-P8-LABEL: st_disjoint_align32_uint16_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -6289,17 +6159,29 @@ define dso_local void @st_not_disjoint64_uint16_t_float(i64 %ptr, i16 zeroext %s
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_uint16_t_float:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwz f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvuxdsp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfs f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_uint16_t_float:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwz f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfs f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_uint16_t_float:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwz f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfs f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = uitofp i16 %str to float
   %or = or i64 %ptr, 1000000000001
@@ -6334,11 +6216,11 @@ define dso_local void @st_disjoint_align64_uint16_t_float(i64 %ptr, i16 zeroext
 ; CHECK-P8-LABEL: st_disjoint_align64_uint16_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6405,8 +6287,8 @@ define dso_local void @st_cst_align64_uint16_t_float(i16 zeroext %str) {
 ; CHECK-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfs f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6504,8 +6386,8 @@ define dso_local void @st_align64_uint16_t_double(ptr nocapture %ptr, i16 zeroex
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6644,10 +6526,10 @@ define dso_local void @st_disjoint_align32_uint16_t_double(i64 %ptr, i16 zeroext
 ; CHECK-P8-LABEL: st_disjoint_align32_uint16_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -6673,17 +6555,29 @@ define dso_local void @st_not_disjoint64_uint16_t_double(i64 %ptr, i16 zeroext %
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_uint16_t_double:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwz f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvuxddp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfd f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_uint16_t_double:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwz f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvuxddp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfd f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_uint16_t_double:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwz f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfd f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = uitofp i16 %str to double
   %or = or i64 %ptr, 1000000000001
@@ -6718,11 +6612,11 @@ define dso_local void @st_disjoint_align64_uint16_t_double(i64 %ptr, i16 zeroext
 ; CHECK-P8-LABEL: st_disjoint_align64_uint16_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6789,8 +6683,8 @@ define dso_local void @st_cst_align64_uint16_t_double(i16 zeroext %str) {
 ; CHECK-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfd f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6953,23 +6847,14 @@ define dso_local void @st_disjoint_align32_int16_t_uint32_t(i64 %ptr, i16 signex
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int16_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int16_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int16_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = sext i16 %str to i32
@@ -7235,23 +7120,14 @@ define dso_local void @st_disjoint_align32_int16_t_uint64_t(i64 %ptr, i16 signex
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int16_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int16_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int16_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = sext i16 %str to i64
@@ -7452,8 +7328,8 @@ define dso_local void @st_align64_int16_t_float(ptr nocapture %ptr, i16 signext
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7592,10 +7468,10 @@ define dso_local void @st_disjoint_align32_int16_t_float(i64 %ptr, i16 signext %
 ; CHECK-P8-LABEL: st_disjoint_align32_int16_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -7621,17 +7497,29 @@ define dso_local void @st_not_disjoint64_int16_t_float(i64 %ptr, i16 signext %st
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_int16_t_float:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwa f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvsxdsp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfs f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_int16_t_float:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwa f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfs f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_int16_t_float:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwa f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfs f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = sitofp i16 %str to float
   %or = or i64 %ptr, 1000000000001
@@ -7667,11 +7555,11 @@ define dso_local void @st_disjoint_align64_int16_t_float(i64 %ptr, i16 signext %
 ; CHECK-P8-LABEL: st_disjoint_align64_int16_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7738,8 +7626,8 @@ define dso_local void @st_cst_align64_int16_t_float(i16 signext %str) {
 ; CHECK-P8-NEXT:    mtfprwa f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfs f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7837,8 +7725,8 @@ define dso_local void @st_align64_int16_t_double(ptr nocapture %ptr, i16 signext
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7977,10 +7865,10 @@ define dso_local void @st_disjoint_align32_int16_t_double(i64 %ptr, i16 signext
 ; CHECK-P8-LABEL: st_disjoint_align32_int16_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -8006,17 +7894,29 @@ define dso_local void @st_not_disjoint64_int16_t_double(i64 %ptr, i16 signext %s
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_int16_t_double:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwa f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvsxddp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfd f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_int16_t_double:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwa f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvsxddp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfd f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_int16_t_double:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwa f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfd f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = sitofp i16 %str to double
   %or = or i64 %ptr, 1000000000001
@@ -8051,11 +7951,11 @@ define dso_local void @st_disjoint_align64_int16_t_double(i64 %ptr, i16 signext
 ; CHECK-P8-LABEL: st_disjoint_align64_int16_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -8122,8 +8022,8 @@ define dso_local void @st_cst_align64_int16_t_double(i16 signext %str) {
 ; CHECK-P8-NEXT:    mtfprwa f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfd f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
index 73578e57cdc646a..689db32adb4e8b3 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
@@ -202,25 +202,15 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -511,23 +501,14 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_int16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plha r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_int16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhax r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_int16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhax r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_int16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhax r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -769,19 +750,12 @@ define dso_local signext i32 @ld_disjoint_unalign16_int32_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plwa r3, 6(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_unalign16_int32_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    rldicr r3, r3, 0, 51
-; CHECK-P9-NEXT:    li r4, 6
-; CHECK-P9-NEXT:    lwax r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_unalign16_int32_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 6
-; CHECK-P8-NEXT:    rldicr r3, r3, 0, 51
-; CHECK-P8-NEXT:    lwax r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_unalign16_int32_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    rldicr r3, r3, 0, 51
+; CHECK-PREP10-NEXT:    li r4, 6
+; CHECK-PREP10-NEXT:    lwax r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -4096
   %or = or i64 %and, 6
@@ -829,23 +803,14 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plwa r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwax r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwax r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwax r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1248,15 +1213,15 @@ define dso_local signext i32 @ld_disjoint_unalign16_int32_t_uint64_t(i64 %ptr) {
 ;
 ; CHECK-P8-LE-LABEL: ld_disjoint_unalign16_int32_t_uint64_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    li r4, 6
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 51
+; CHECK-P8-LE-NEXT:    li r4, 6
 ; CHECK-P8-LE-NEXT:    lwax r3, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_unalign16_int32_t_uint64_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
-; CHECK-P8-BE-NEXT:    li r4, 10
 ; CHECK-P8-BE-NEXT:    rldicr r3, r3, 0, 51
+; CHECK-P8-BE-NEXT:    li r4, 10
 ; CHECK-P8-BE-NEXT:    lwax r3, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -1350,18 +1315,18 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_uint64_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_int32_t_uint64_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lwax r3, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_int32_t_uint64_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41716
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41716
 ; CHECK-P8-BE-NEXT:    lwax r3, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -1807,29 +1772,17 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_float(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsw r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfsx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    extsw r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfsx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    extsw r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfsx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    extsw r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2175,29 +2128,17 @@ define dso_local signext i32 @ld_disjoint_align32_int32_t_double(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsw r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int32_t_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    extsw r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int32_t_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    extsw r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int32_t_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    extsw r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2487,23 +2428,14 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plbz r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2793,27 +2725,16 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    clrldi r3, r3, 32
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    clrldi r3, r3, 32
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 32
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3095,23 +3016,14 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plhz r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3389,25 +3301,15 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_int16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_int16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhax r3, r3, r4
-; CHECK-P9-NEXT:    clrldi r3, r3, 32
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_int16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhax r3, r3, r4
-; CHECK-P8-NEXT:    clrldi r3, r3, 32
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_int16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhax r3, r3, r4
+; CHECK-PREP10-NEXT:    clrldi r3, r3, 32
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3672,23 +3574,14 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plwz r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4077,18 +3970,18 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_uint64_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_uint32_t_uint64_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lwzx r3, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_uint32_t_uint64_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41716
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41716
 ; CHECK-P8-BE-NEXT:    lwzx r3, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -4502,27 +4395,16 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_float(i64 %ptr) {
 ; CHECK-P10-NEXT:    mffprwz r3, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfsx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpuxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfsx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpuxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfsx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpuxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4828,27 +4710,16 @@ define dso_local zeroext i32 @ld_disjoint_align32_uint32_t_double(i64 %ptr) {
 ; CHECK-P10-NEXT:    mffprwz r3, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint32_t_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpuxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint32_t_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpuxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint32_t_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpuxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -5130,23 +5001,14 @@ define dso_local void @st_disjoint_align32_uint32_t_uint8_t(i64 %ptr, i32 zeroex
 ; CHECK-P10-NEXT:    pstb r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint32_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stbx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint32_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stbx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint32_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stbx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = trunc i32 %str to i8
@@ -5412,23 +5274,14 @@ define dso_local void @st_disjoint_align32_uint32_t_uint16_t(i64 %ptr, i32 zeroe
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint32_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint32_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint32_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = trunc i32 %str to i16
@@ -5685,23 +5538,14 @@ define dso_local void @st_disjoint_align32_uint32_t_uint32_t(i64 %ptr, i32 zeroe
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint32_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint32_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint32_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -5961,23 +5805,14 @@ define dso_local void @st_disjoint_align32_uint32_t_uint64_t(i64 %ptr, i32 zeroe
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint32_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint32_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint32_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = zext i32 %str to i64
@@ -6178,8 +6013,8 @@ define dso_local void @st_align64_uint32_t_float(ptr nocapture %ptr, i32 zeroext
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6318,10 +6153,10 @@ define dso_local void @st_disjoint_align32_uint32_t_float(i64 %ptr, i32 zeroext
 ; CHECK-P8-LABEL: st_disjoint_align32_uint32_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -6347,17 +6182,29 @@ define dso_local void @st_not_disjoint64_uint32_t_float(i64 %ptr, i32 zeroext %s
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_uint32_t_float:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwz f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvuxdsp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfs f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_uint32_t_float:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwz f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfs f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_uint32_t_float:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwz f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfs f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = uitofp i32 %str to float
   %or = or i64 %ptr, 1000000000001
@@ -6392,11 +6239,11 @@ define dso_local void @st_disjoint_align64_uint32_t_float(i64 %ptr, i32 zeroext
 ; CHECK-P8-LABEL: st_disjoint_align64_uint32_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6463,8 +6310,8 @@ define dso_local void @st_cst_align64_uint32_t_float(i32 zeroext %str) {
 ; CHECK-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfs f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6562,8 +6409,8 @@ define dso_local void @st_align64_uint32_t_double(ptr nocapture %ptr, i32 zeroex
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6702,10 +6549,10 @@ define dso_local void @st_disjoint_align32_uint32_t_double(i64 %ptr, i32 zeroext
 ; CHECK-P8-LABEL: st_disjoint_align32_uint32_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -6731,17 +6578,29 @@ define dso_local void @st_not_disjoint64_uint32_t_double(i64 %ptr, i32 zeroext %
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_uint32_t_double:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwz f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvuxddp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfd f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_uint32_t_double:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwz f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvuxddp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfd f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_uint32_t_double:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwz f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfd f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = uitofp i32 %str to double
   %or = or i64 %ptr, 1000000000001
@@ -6776,11 +6635,11 @@ define dso_local void @st_disjoint_align64_uint32_t_double(i64 %ptr, i32 zeroext
 ; CHECK-P8-LABEL: st_disjoint_align64_uint32_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6847,8 +6706,8 @@ define dso_local void @st_cst_align64_uint32_t_double(i32 zeroext %str) {
 ; CHECK-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfd f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7011,23 +6870,14 @@ define dso_local void @st_disjoint_align32_int32_t_uint64_t(i64 %ptr, i32 signex
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int32_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int32_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int32_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = sext i32 %str to i64
@@ -7228,8 +7078,8 @@ define dso_local void @st_align64_int32_t_float(ptr nocapture %ptr, i32 signext
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7368,10 +7218,10 @@ define dso_local void @st_disjoint_align32_int32_t_float(i64 %ptr, i32 signext %
 ; CHECK-P8-LABEL: st_disjoint_align32_int32_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -7397,17 +7247,29 @@ define dso_local void @st_not_disjoint64_int32_t_float(i64 %ptr, i32 signext %st
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_int32_t_float:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwa f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvsxdsp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfs f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_int32_t_float:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwa f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfs f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_int32_t_float:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwa f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfs f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = sitofp i32 %str to float
   %or = or i64 %ptr, 1000000000001
@@ -7442,11 +7304,11 @@ define dso_local void @st_disjoint_align64_int32_t_float(i64 %ptr, i32 signext %
 ; CHECK-P8-LABEL: st_disjoint_align64_int32_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7513,8 +7375,8 @@ define dso_local void @st_cst_align64_int32_t_float(i32 signext %str) {
 ; CHECK-P8-NEXT:    mtfprwa f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfs f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7612,8 +7474,8 @@ define dso_local void @st_align64_int32_t_double(ptr nocapture %ptr, i32 signext
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7752,10 +7614,10 @@ define dso_local void @st_disjoint_align32_int32_t_double(i64 %ptr, i32 signext
 ; CHECK-P8-LABEL: st_disjoint_align32_int32_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -7781,17 +7643,29 @@ define dso_local void @st_not_disjoint64_int32_t_double(i64 %ptr, i32 signext %s
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_int32_t_double:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwa f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvsxddp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfd f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_int32_t_double:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwa f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvsxddp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfd f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_int32_t_double:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwa f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfd f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = sitofp i32 %str to double
   %or = or i64 %ptr, 1000000000001
@@ -7826,11 +7700,11 @@ define dso_local void @st_disjoint_align64_int32_t_double(i64 %ptr, i32 signext
 ; CHECK-P8-LABEL: st_disjoint_align64_int32_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7897,8 +7771,8 @@ define dso_local void @st_cst_align64_int32_t_double(i32 signext %str) {
 ; CHECK-P8-NEXT:    mtfprwa f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfd f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
index ab0711577f35387..b09f96ab241fd4f 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
@@ -196,27 +196,16 @@ define dso_local i64 @ld_disjoint_align32_int64_t_float(i64 %ptr) {
 ; CHECK-P10-NEXT:    mffprd r3, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int64_t_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfsx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxds f0, f0
-; CHECK-P9-NEXT:    mffprd r3, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int64_t_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfsx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxds f0, f0
-; CHECK-P8-NEXT:    mffprd r3, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int64_t_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfsx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxds f0, f0
+; CHECK-PREP10-NEXT:    mffprd r3, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -522,27 +511,16 @@ define dso_local i64 @ld_disjoint_align32_int64_t_double(i64 %ptr) {
 ; CHECK-P10-NEXT:    mffprd r3, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int64_t_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxds f0, f0
-; CHECK-P9-NEXT:    mffprd r3, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int64_t_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxds f0, f0
-; CHECK-P8-NEXT:    mffprd r3, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int64_t_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxds f0, f0
+; CHECK-PREP10-NEXT:    mffprd r3, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -939,23 +917,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plbz r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1435,25 +1404,15 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_int8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_int8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_int8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_int8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -1926,23 +1885,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_uint16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plhz r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2401,23 +2351,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_int16_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plha r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_int16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lhax r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_int16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lhax r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_int16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lhax r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2876,23 +2817,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_uint32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plwz r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3284,19 +3216,12 @@ define dso_local i64 @ld_disjoint_unalign16_uint64_t_int32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plwa r3, 6(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_unalign16_uint64_t_int32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    rldicr r3, r3, 0, 51
-; CHECK-P9-NEXT:    li r4, 6
-; CHECK-P9-NEXT:    lwax r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_unalign16_uint64_t_int32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 6
-; CHECK-P8-NEXT:    rldicr r3, r3, 0, 51
-; CHECK-P8-NEXT:    lwax r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_unalign16_uint64_t_int32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    rldicr r3, r3, 0, 51
+; CHECK-PREP10-NEXT:    li r4, 6
+; CHECK-PREP10-NEXT:    lwax r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -4096
   %or = or i64 %and, 6
@@ -3371,23 +3296,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_int32_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plwa r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_int32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lwax r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_int32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lwax r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_int32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lwax r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3776,19 +3692,12 @@ define dso_local i64 @ld_disjoint_unalign16_uint64_t_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    pld r3, 6(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_unalign16_uint64_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    rldicr r3, r3, 0, 51
-; CHECK-P9-NEXT:    li r4, 6
-; CHECK-P9-NEXT:    ldx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_unalign16_uint64_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 6
-; CHECK-P8-NEXT:    rldicr r3, r3, 0, 51
-; CHECK-P8-NEXT:    ldx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_unalign16_uint64_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    rldicr r3, r3, 0, 51
+; CHECK-PREP10-NEXT:    li r4, 6
+; CHECK-PREP10-NEXT:    ldx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -4096
   %or = or i64 %and, 6
@@ -3859,23 +3768,14 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_uint64_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    pld r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    ldx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    ldx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    ldx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4389,27 +4289,16 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_float(i64 %ptr) {
 ; CHECK-P10-NEXT:    mffprd r3, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfsx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpuxds f0, f0
-; CHECK-P9-NEXT:    mffprd r3, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfsx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpuxds f0, f0
-; CHECK-P8-NEXT:    mffprd r3, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfsx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpuxds f0, f0
+; CHECK-PREP10-NEXT:    mffprd r3, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -4986,27 +4875,16 @@ define dso_local i64 @ld_disjoint_align32_uint64_t_double(i64 %ptr) {
 ; CHECK-P10-NEXT:    mffprd r3, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint64_t_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpuxds f0, f0
-; CHECK-P9-NEXT:    mffprd r3, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint64_t_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpuxds f0, f0
-; CHECK-P8-NEXT:    mffprd r3, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint64_t_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpuxds f0, f0
+; CHECK-PREP10-NEXT:    mffprd r3, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -5428,23 +5306,14 @@ define dso_local void @st_disjoint_align32_uint64_t_uint8_t(i64 %ptr, i64 %str)
 ; CHECK-P10-NEXT:    pstb r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint64_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stbx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stbx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint64_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stbx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = trunc i64 %str to i8
@@ -5727,23 +5596,14 @@ define dso_local void @st_disjoint_align32_uint64_t_uint16_t(i64 %ptr, i64 %str)
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint64_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint64_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = trunc i64 %str to i16
@@ -6026,23 +5886,14 @@ define dso_local void @st_disjoint_align32_uint64_t_int16_t(i64 %ptr, i64 %str)
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint64_t_int16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_int16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint64_t_int16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = trunc i64 %str to i16
@@ -6325,23 +6176,14 @@ define dso_local void @st_disjoint_align32_uint64_t_uint32_t(i64 %ptr, i64 %str)
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint64_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint64_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = trunc i64 %str to i32
@@ -6614,23 +6456,14 @@ define dso_local void @st_disjoint_align32_uint64_t_uint64_t(i64 %ptr, i64 %str)
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint64_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint64_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -6825,8 +6658,8 @@ define dso_local void @st_align64_uint64_t_float(ptr nocapture %ptr, i64 %str) {
 ; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -6984,10 +6817,10 @@ define dso_local void @st_disjoint_align32_uint64_t_float(i64 %ptr, i64 %str) {
 ; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprd f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -7013,17 +6846,29 @@ define dso_local void @st_not_disjoint64_uint64_t_float(i64 %ptr, i64 %str) {
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_uint64_t_float:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprd f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvuxdsp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfs f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_uint64_t_float:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprd f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfs f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_uint64_t_float:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprd f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfs f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = uitofp i64 %str to float
   %or = or i64 %ptr, 1000000000001
@@ -7058,11 +6903,11 @@ define dso_local void @st_disjoint_align64_uint64_t_float(i64 %ptr, i64 %str) {
 ; CHECK-P8-LABEL: st_disjoint_align64_uint64_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprd f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7129,8 +6974,8 @@ define dso_local void @st_cst_align64_uint64_t_float(i64 %str) {
 ; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfs f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7228,8 +7073,8 @@ define dso_local void @st_align64_uint64_t_double(ptr nocapture %ptr, i64 %str)
 ; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7387,10 +7232,10 @@ define dso_local void @st_disjoint_align32_uint64_t_double(i64 %ptr, i64 %str) {
 ; CHECK-P8-LABEL: st_disjoint_align32_uint64_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprd f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -7416,17 +7261,29 @@ define dso_local void @st_not_disjoint64_uint64_t_double(i64 %ptr, i64 %str) {
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_uint64_t_double:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprd f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvuxddp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfd f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_uint64_t_double:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprd f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvuxddp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfd f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_uint64_t_double:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprd f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfd f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = uitofp i64 %str to double
   %or = or i64 %ptr, 1000000000001
@@ -7461,11 +7318,11 @@ define dso_local void @st_disjoint_align64_uint64_t_double(i64 %ptr, i64 %str) {
 ; CHECK-P8-LABEL: st_disjoint_align64_uint64_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprd f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7532,8 +7389,8 @@ define dso_local void @st_cst_align64_uint64_t_double(i64 %str) {
 ; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfd f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7631,8 +7488,8 @@ define dso_local void @st_align64_int64_t_float(ptr nocapture %ptr, i64 %str) {
 ; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7790,10 +7647,10 @@ define dso_local void @st_disjoint_align32_int64_t_float(i64 %ptr, i64 %str) {
 ; CHECK-P8-LABEL: st_disjoint_align32_int64_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprd f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -7819,17 +7676,29 @@ define dso_local void @st_not_disjoint64_int64_t_float(i64 %ptr, i64 %str) {
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_int64_t_float:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprd f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvsxdsp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfs f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_int64_t_float:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprd f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfs f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_int64_t_float:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprd f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfs f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = sitofp i64 %str to float
   %or = or i64 %ptr, 1000000000001
@@ -7864,11 +7733,11 @@ define dso_local void @st_disjoint_align64_int64_t_float(i64 %ptr, i64 %str) {
 ; CHECK-P8-LABEL: st_disjoint_align64_int64_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprd f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7935,8 +7804,8 @@ define dso_local void @st_cst_align64_int64_t_float(i64 %str) {
 ; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfs f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -8034,8 +7903,8 @@ define dso_local void @st_align64_int64_t_double(ptr nocapture %ptr, i64 %str) {
 ; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -8193,10 +8062,10 @@ define dso_local void @st_disjoint_align32_int64_t_double(i64 %ptr, i64 %str) {
 ; CHECK-P8-LABEL: st_disjoint_align32_int64_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprd f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -8222,17 +8091,29 @@ define dso_local void @st_not_disjoint64_int64_t_double(i64 %ptr, i64 %str) {
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_int64_t_double:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprd f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvsxddp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfd f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_int64_t_double:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprd f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvsxddp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfd f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_int64_t_double:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprd f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfd f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = sitofp i64 %str to double
   %or = or i64 %ptr, 1000000000001
@@ -8267,11 +8148,11 @@ define dso_local void @st_disjoint_align64_int64_t_double(i64 %ptr, i64 %str) {
 ; CHECK-P8-LABEL: st_disjoint_align64_int64_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprd f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -8338,8 +8219,8 @@ define dso_local void @st_cst_align64_int64_t_double(i64 %str) {
 ; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfd f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
index 42a2cf6a466476c..59a8455b16fd96a 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
@@ -175,25 +175,15 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsb r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int8_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    extsb r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int8_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    extsb r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int8_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    extsb r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -620,9 +610,9 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint16_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_int8_t_uint16_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-LE-NEXT:    extsb r3, r3
 ; CHECK-P8-LE-NEXT:    blr
@@ -630,9 +620,9 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint16_t(i64 %ptr) {
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_int8_t_uint16_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41713
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41713
 ; CHECK-P8-BE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-BE-NEXT:    extsb r3, r3
 ; CHECK-P8-BE-NEXT:    blr
@@ -1204,9 +1194,9 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint32_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_int8_t_uint32_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-LE-NEXT:    extsb r3, r3
 ; CHECK-P8-LE-NEXT:    blr
@@ -1214,9 +1204,9 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint32_t(i64 %ptr) {
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_int8_t_uint32_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41715
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41715
 ; CHECK-P8-BE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-BE-NEXT:    extsb r3, r3
 ; CHECK-P8-BE-NEXT:    blr
@@ -1788,9 +1778,9 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint64_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_int8_t_uint64_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-LE-NEXT:    extsb r3, r3
 ; CHECK-P8-LE-NEXT:    blr
@@ -1798,9 +1788,9 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_uint64_t(i64 %ptr) {
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_int8_t_uint64_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41719
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41719
 ; CHECK-P8-BE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-BE-NEXT:    extsb r3, r3
 ; CHECK-P8-BE-NEXT:    blr
@@ -2249,29 +2239,17 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_float(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsw r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int8_t_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfsx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    extsw r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int8_t_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfsx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    extsw r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int8_t_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfsx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    extsw r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2597,29 +2575,17 @@ define dso_local signext i8 @ld_disjoint_align32_int8_t_double(i64 %ptr) {
 ; CHECK-P10-NEXT:    extsw r3, r3
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_int8_t_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    extsw r3, r3
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_int8_t_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    extsw r3, r3
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_int8_t_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    extsw r3, r3
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -2924,23 +2890,14 @@ define dso_local zeroext i8 @ld_disjoint_align32_uint8_t_uint8_t(i64 %ptr) {
 ; CHECK-P10-NEXT:    plbz r3, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint8_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lbzx r3, r3, r4
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint8_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lbzx r3, r3, r4
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint8_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lbzx r3, r3, r4
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -3440,18 +3397,18 @@ define dso_local zeroext i8 @ld_disjoint_align32_uint8_t_uint16_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_uint8_t_uint16_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_uint8_t_uint16_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41713
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41713
 ; CHECK-P8-BE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -4157,18 +4114,18 @@ define dso_local zeroext i8 @ld_disjoint_align32_uint8_t_uint32_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_uint8_t_uint32_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_uint8_t_uint32_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41715
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41715
 ; CHECK-P8-BE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -4874,18 +4831,18 @@ define dso_local zeroext i8 @ld_disjoint_align32_uint8_t_uint64_t(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_uint8_t_uint64_t:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_uint8_t_uint64_t:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41719
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41719
 ; CHECK-P8-BE-NEXT:    lbzx r3, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -5455,27 +5412,16 @@ define dso_local zeroext i8 @ld_disjoint_align32_uint8_t_float(i64 %ptr) {
 ; CHECK-P10-NEXT:    mffprwz r3, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint8_t_float:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfsx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint8_t_float:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfsx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint8_t_float:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfsx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -5871,27 +5817,16 @@ define dso_local zeroext i8 @ld_disjoint_align32_uint8_t_double(i64 %ptr) {
 ; CHECK-P10-NEXT:    mffprwz r3, f0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: ld_disjoint_align32_uint8_t_double:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r4, -15264
-; CHECK-P9-NEXT:    and r3, r3, r4
-; CHECK-P9-NEXT:    lis r4, 15258
-; CHECK-P9-NEXT:    ori r4, r4, 41712
-; CHECK-P9-NEXT:    lfdx f0, r3, r4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r3, f0
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: ld_disjoint_align32_uint8_t_double:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
-; CHECK-P8-NEXT:    lfdx f0, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: ld_disjoint_align32_uint8_t_double:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r4, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r4
+; CHECK-PREP10-NEXT:    lis r4, 15258
+; CHECK-PREP10-NEXT:    ori r4, r4, 41712
+; CHECK-PREP10-NEXT:    lfdx f0, r3, r4
+; CHECK-PREP10-NEXT:    xscvdpsxws f0, f0
+; CHECK-PREP10-NEXT:    mffprwz r3, f0
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -6225,23 +6160,14 @@ define dso_local void @st_disjoint_align32_uint8_t_uint8_t(i64 %ptr, i8 zeroext
 ; CHECK-P10-NEXT:    pstb r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint8_t_uint8_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stbx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint8_t_uint8_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stbx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint8_t_uint8_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stbx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %or = or i64 %and, 999990000
@@ -6501,23 +6427,14 @@ define dso_local void @st_disjoint_align32_uint8_t_uint16_t(i64 %ptr, i8 zeroext
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint8_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint8_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint8_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = zext i8 %str to i16
@@ -6783,23 +6700,14 @@ define dso_local void @st_disjoint_align32_uint8_t_uint32_t(i64 %ptr, i8 zeroext
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint8_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint8_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint8_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = zext i8 %str to i32
@@ -7065,23 +6973,14 @@ define dso_local void @st_disjoint_align32_uint8_t_uint64_t(i64 %ptr, i8 zeroext
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_uint8_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_uint8_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_uint8_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = zext i8 %str to i64
@@ -7282,8 +7181,8 @@ define dso_local void @st_align64_uint8_t_float(ptr nocapture %ptr, i8 zeroext %
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7422,10 +7321,10 @@ define dso_local void @st_disjoint_align32_uint8_t_float(i64 %ptr, i8 zeroext %s
 ; CHECK-P8-LABEL: st_disjoint_align32_uint8_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -7451,17 +7350,29 @@ define dso_local void @st_not_disjoint64_uint8_t_float(i64 %ptr, i8 zeroext %str
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_uint8_t_float:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwz f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvuxdsp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfs f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_uint8_t_float:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwz f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfs f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_uint8_t_float:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwz f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfs f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = uitofp i8 %str to float
   %or = or i64 %ptr, 1000000000001
@@ -7496,11 +7407,11 @@ define dso_local void @st_disjoint_align64_uint8_t_float(i64 %ptr, i8 zeroext %s
 ; CHECK-P8-LABEL: st_disjoint_align64_uint8_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7567,8 +7478,8 @@ define dso_local void @st_cst_align64_uint8_t_float(i8 zeroext %str) {
 ; CHECK-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    stfs f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7666,8 +7577,8 @@ define dso_local void @st_align64_uint8_t_double(ptr nocapture %ptr, i8 zeroext
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7806,10 +7717,10 @@ define dso_local void @st_disjoint_align32_uint8_t_double(i64 %ptr, i8 zeroext %
 ; CHECK-P8-LABEL: st_disjoint_align32_uint8_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -7835,17 +7746,29 @@ define dso_local void @st_not_disjoint64_uint8_t_double(i64 %ptr, i8 zeroext %st
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_uint8_t_double:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwz f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvuxddp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfd f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_uint8_t_double:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwz f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvuxddp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfd f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_uint8_t_double:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwz f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfd f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = uitofp i8 %str to double
   %or = or i64 %ptr, 1000000000001
@@ -7880,11 +7803,11 @@ define dso_local void @st_disjoint_align64_uint8_t_double(i64 %ptr, i8 zeroext %
 ; CHECK-P8-LABEL: st_disjoint_align64_uint8_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -7951,8 +7874,8 @@ define dso_local void @st_cst_align64_uint8_t_double(i8 zeroext %str) {
 ; CHECK-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvuxddp f0, f0
 ; CHECK-P8-NEXT:    stfd f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -8115,23 +8038,14 @@ define dso_local void @st_disjoint_align32_int8_t_uint16_t(i64 %ptr, i8 signext
 ; CHECK-P10-NEXT:    psth r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int8_t_uint16_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    sthx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int8_t_uint16_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    sthx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int8_t_uint16_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    sthx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = sext i8 %str to i16
@@ -8397,23 +8311,14 @@ define dso_local void @st_disjoint_align32_int8_t_uint32_t(i64 %ptr, i8 signext
 ; CHECK-P10-NEXT:    pstw r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int8_t_uint32_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stwx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int8_t_uint32_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stwx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int8_t_uint32_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stwx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = sext i8 %str to i32
@@ -8679,23 +8584,14 @@ define dso_local void @st_disjoint_align32_int8_t_uint64_t(i64 %ptr, i8 signext
 ; CHECK-P10-NEXT:    pstd r4, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-P9-LABEL: st_disjoint_align32_int8_t_uint64_t:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lis r5, -15264
-; CHECK-P9-NEXT:    and r3, r3, r5
-; CHECK-P9-NEXT:    lis r5, 15258
-; CHECK-P9-NEXT:    ori r5, r5, 41712
-; CHECK-P9-NEXT:    stdx r4, r3, r5
-; CHECK-P9-NEXT:    blr
-;
-; CHECK-P8-LABEL: st_disjoint_align32_int8_t_uint64_t:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    lis r5, -15264
-; CHECK-P8-NEXT:    lis r6, 15258
-; CHECK-P8-NEXT:    and r3, r3, r5
-; CHECK-P8-NEXT:    ori r5, r6, 41712
-; CHECK-P8-NEXT:    stdx r4, r3, r5
-; CHECK-P8-NEXT:    blr
+; CHECK-PREP10-LABEL: st_disjoint_align32_int8_t_uint64_t:
+; CHECK-PREP10:       # %bb.0: # %entry
+; CHECK-PREP10-NEXT:    lis r5, -15264
+; CHECK-PREP10-NEXT:    and r3, r3, r5
+; CHECK-PREP10-NEXT:    lis r5, 15258
+; CHECK-PREP10-NEXT:    ori r5, r5, 41712
+; CHECK-PREP10-NEXT:    stdx r4, r3, r5
+; CHECK-PREP10-NEXT:    blr
 entry:
   %and = and i64 %ptr, -1000341504
   %conv = sext i8 %str to i64
@@ -8896,8 +8792,8 @@ define dso_local void @st_align64_int8_t_float(ptr nocapture %ptr, i8 signext %s
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -9036,10 +8932,10 @@ define dso_local void @st_disjoint_align32_int8_t_float(i64 %ptr, i8 signext %st
 ; CHECK-P8-LABEL: st_disjoint_align32_int8_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -9065,17 +8961,29 @@ define dso_local void @st_not_disjoint64_int8_t_float(i64 %ptr, i8 signext %str)
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_int8_t_float:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwa f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvsxdsp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfs f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_int8_t_float:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwa f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfs f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_int8_t_float:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwa f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfs f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = sitofp i8 %str to float
   %or = or i64 %ptr, 1000000000001
@@ -9110,11 +9018,11 @@ define dso_local void @st_disjoint_align64_int8_t_float(i64 %ptr, i8 signext %st
 ; CHECK-P8-LABEL: st_disjoint_align64_int8_t_float:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfsx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -9181,8 +9089,8 @@ define dso_local void @st_cst_align64_int8_t_float(i8 signext %str) {
 ; CHECK-P8-NEXT:    mtfprwa f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    stfs f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -9280,8 +9188,8 @@ define dso_local void @st_align64_int8_t_double(ptr nocapture %ptr, i8 signext %
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
 ; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    ori r4, r4, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -9420,10 +9328,10 @@ define dso_local void @st_disjoint_align32_int8_t_double(i64 %ptr, i8 signext %s
 ; CHECK-P8-LABEL: st_disjoint_align32_int8_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r4, -15264
-; CHECK-P8-NEXT:    lis r5, 15258
-; CHECK-P8-NEXT:    and r3, r3, r4
-; CHECK-P8-NEXT:    ori r4, r5, 41712
+; CHECK-P8-NEXT:    lis r5, -15264
+; CHECK-P8-NEXT:    lis r4, 15258
+; CHECK-P8-NEXT:    and r3, r3, r5
+; CHECK-P8-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
@@ -9449,17 +9357,29 @@ define dso_local void @st_not_disjoint64_int8_t_double(i64 %ptr, i8 signext %str
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
-; CHECK-PREP10-LABEL: st_not_disjoint64_int8_t_double:
-; CHECK-PREP10:       # %bb.0: # %entry
-; CHECK-PREP10-NEXT:    mtfprwa f0, r4
-; CHECK-PREP10-NEXT:    li r4, 29
-; CHECK-PREP10-NEXT:    rldic r4, r4, 35, 24
-; CHECK-PREP10-NEXT:    xscvsxddp f0, f0
-; CHECK-PREP10-NEXT:    oris r4, r4, 54437
-; CHECK-PREP10-NEXT:    ori r4, r4, 4097
-; CHECK-PREP10-NEXT:    or r3, r3, r4
-; CHECK-PREP10-NEXT:    stfd f0, 0(r3)
-; CHECK-PREP10-NEXT:    blr
+; CHECK-P9-LABEL: st_not_disjoint64_int8_t_double:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtfprwa f0, r4
+; CHECK-P9-NEXT:    li r4, 29
+; CHECK-P9-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P9-NEXT:    xscvsxddp f0, f0
+; CHECK-P9-NEXT:    oris r4, r4, 54437
+; CHECK-P9-NEXT:    ori r4, r4, 4097
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    stfd f0, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: st_not_disjoint64_int8_t_double:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprwa f0, r4
+; CHECK-P8-NEXT:    li r4, 29
+; CHECK-P8-NEXT:    rldic r4, r4, 35, 24
+; CHECK-P8-NEXT:    oris r4, r4, 54437
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 4097
+; CHECK-P8-NEXT:    or r3, r3, r4
+; CHECK-P8-NEXT:    stfd f0, 0(r3)
+; CHECK-P8-NEXT:    blr
 entry:
   %conv = sitofp i8 %str to double
   %or = or i64 %ptr, 1000000000001
@@ -9494,11 +9414,11 @@ define dso_local void @st_disjoint_align64_int8_t_double(i64 %ptr, i8 signext %s
 ; CHECK-P8-LABEL: st_disjoint_align64_int8_t_double:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mtfprwa f0, r4
-; CHECK-P8-NEXT:    lis r5, 3725
+; CHECK-P8-NEXT:    lis r4, 3725
 ; CHECK-P8-NEXT:    rldicr r3, r3, 0, 23
-; CHECK-P8-NEXT:    ori r4, r5, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
+; CHECK-P8-NEXT:    ori r4, r4, 19025
 ; CHECK-P8-NEXT:    rldic r4, r4, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfdx f0, r3, r4
 ; CHECK-P8-NEXT:    blr
 entry:
@@ -9565,8 +9485,8 @@ define dso_local void @st_cst_align64_int8_t_double(i8 signext %str) {
 ; CHECK-P8-NEXT:    mtfprwa f0, r3
 ; CHECK-P8-NEXT:    lis r3, 3725
 ; CHECK-P8-NEXT:    ori r3, r3, 19025
-; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    rldic r3, r3, 12, 24
+; CHECK-P8-NEXT:    xscvsxddp f0, f0
 ; CHECK-P8-NEXT:    stfd f0, 0(r3)
 ; CHECK-P8-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
index 308a6e97a0b4977..aaabd76e163bbeb 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
@@ -20,10 +20,10 @@
 define float @select_oeq_float(float %a, float %b, float %c, float %d) {
 ; FAST-P8-LABEL: select_oeq_float:
 ; FAST-P8:       # %bb.0: # %entry
-; FAST-P8-NEXT:    xssubsp f0, f1, f2
-; FAST-P8-NEXT:    xssubsp f1, f2, f1
-; FAST-P8-NEXT:    fsel f0, f0, f3, f4
-; FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; FAST-P8-NEXT:    xssubsp f0, f2, f1
+; FAST-P8-NEXT:    xssubsp f1, f1, f2
+; FAST-P8-NEXT:    fsel f1, f1, f3, f4
+; FAST-P8-NEXT:    fsel f1, f0, f1, f4
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: select_oeq_float:
@@ -37,10 +37,11 @@ define float @select_oeq_float(float %a, float %b, float %c, float %d) {
 ; NO-FAST-P8-LABEL: select_oeq_float:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
-; NO-FAST-P8-NEXT:    beqlr cr0
+; NO-FAST-P8-NEXT:    beq cr0, .LBB0_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB0_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_oeq_float:
@@ -62,9 +63,9 @@ define double @select_oeq_double(double %a, double %b, double %c, double %d) {
 ; FAST-P8-LABEL: select_oeq_double:
 ; FAST-P8:       # %bb.0: # %entry
 ; FAST-P8-NEXT:    xssubdp f0, f1, f2
-; FAST-P8-NEXT:    xsnegdp f1, f0
-; FAST-P8-NEXT:    fsel f0, f0, f3, f4
-; FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; FAST-P8-NEXT:    fsel f1, f0, f3, f4
+; FAST-P8-NEXT:    xsnegdp f0, f0
+; FAST-P8-NEXT:    fsel f1, f0, f1, f4
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: select_oeq_double:
@@ -78,10 +79,11 @@ define double @select_oeq_double(double %a, double %b, double %c, double %d) {
 ; NO-FAST-P8-LABEL: select_oeq_double:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
-; NO-FAST-P8-NEXT:    beqlr cr0
+; NO-FAST-P8-NEXT:    beq cr0, .LBB1_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB1_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_oeq_double:
@@ -102,10 +104,10 @@ entry:
 define float @select_fast_oeq_float(float %a, float %b, float %c, float %d) {
 ; FAST-P8-LABEL: select_fast_oeq_float:
 ; FAST-P8:       # %bb.0: # %entry
-; FAST-P8-NEXT:    xssubsp f0, f1, f2
-; FAST-P8-NEXT:    xssubsp f1, f2, f1
-; FAST-P8-NEXT:    fsel f0, f0, f3, f4
-; FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; FAST-P8-NEXT:    xssubsp f0, f2, f1
+; FAST-P8-NEXT:    xssubsp f1, f1, f2
+; FAST-P8-NEXT:    fsel f1, f1, f3, f4
+; FAST-P8-NEXT:    fsel f1, f0, f1, f4
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: select_fast_oeq_float:
@@ -118,10 +120,10 @@ define float @select_fast_oeq_float(float %a, float %b, float %c, float %d) {
 ;
 ; NO-FAST-P8-LABEL: select_fast_oeq_float:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xssubsp f0, f1, f2
-; NO-FAST-P8-NEXT:    xssubsp f1, f2, f1
-; NO-FAST-P8-NEXT:    fsel f0, f0, f3, f4
-; NO-FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; NO-FAST-P8-NEXT:    xssubsp f0, f2, f1
+; NO-FAST-P8-NEXT:    xssubsp f1, f1, f2
+; NO-FAST-P8-NEXT:    fsel f1, f1, f3, f4
+; NO-FAST-P8-NEXT:    fsel f1, f0, f1, f4
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_fast_oeq_float:
@@ -141,9 +143,9 @@ define double @select_fast_oeq_double(double %a, double %b, double %c, double %d
 ; FAST-P8-LABEL: select_fast_oeq_double:
 ; FAST-P8:       # %bb.0: # %entry
 ; FAST-P8-NEXT:    xssubdp f0, f1, f2
-; FAST-P8-NEXT:    xsnegdp f1, f0
-; FAST-P8-NEXT:    fsel f0, f0, f3, f4
-; FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; FAST-P8-NEXT:    fsel f1, f0, f3, f4
+; FAST-P8-NEXT:    xsnegdp f0, f0
+; FAST-P8-NEXT:    fsel f1, f0, f1, f4
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: select_fast_oeq_double:
@@ -157,9 +159,9 @@ define double @select_fast_oeq_double(double %a, double %b, double %c, double %d
 ; NO-FAST-P8-LABEL: select_fast_oeq_double:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    xssubdp f0, f1, f2
-; NO-FAST-P8-NEXT:    xsnegdp f1, f0
-; NO-FAST-P8-NEXT:    fsel f0, f0, f3, f4
-; NO-FAST-P8-NEXT:    fsel f1, f1, f0, f4
+; NO-FAST-P8-NEXT:    fsel f1, f0, f3, f4
+; NO-FAST-P8-NEXT:    xsnegdp f0, f0
+; NO-FAST-P8-NEXT:    fsel f1, f0, f1, f4
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_fast_oeq_double:
@@ -180,10 +182,10 @@ entry:
 define float @select_one_float(float %a, float %b, float %c, float %d) {
 ; FAST-P8-LABEL: select_one_float:
 ; FAST-P8:       # %bb.0: # %entry
-; FAST-P8-NEXT:    xssubsp f0, f1, f2
-; FAST-P8-NEXT:    xssubsp f1, f2, f1
-; FAST-P8-NEXT:    fsel f0, f0, f4, f3
-; FAST-P8-NEXT:    fsel f1, f1, f0, f3
+; FAST-P8-NEXT:    xssubsp f0, f2, f1
+; FAST-P8-NEXT:    xssubsp f1, f1, f2
+; FAST-P8-NEXT:    fsel f1, f1, f4, f3
+; FAST-P8-NEXT:    fsel f1, f0, f1, f3
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: select_one_float:
@@ -197,11 +199,12 @@ define float @select_one_float(float %a, float %b, float %c, float %d) {
 ; NO-FAST-P8-LABEL: select_one_float:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    crnor 4*cr5+lt, un, eq
-; NO-FAST-P8-NEXT:    bclr 12, 4*cr5+lt, 0
+; NO-FAST-P8-NEXT:    bc 12, 4*cr5+lt, .LBB4_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB4_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_one_float:
@@ -224,9 +227,9 @@ define double @select_one_double(double %a, double %b, double %c, double %d) {
 ; FAST-P8-LABEL: select_one_double:
 ; FAST-P8:       # %bb.0: # %entry
 ; FAST-P8-NEXT:    xssubdp f0, f1, f2
-; FAST-P8-NEXT:    xsnegdp f1, f0
-; FAST-P8-NEXT:    fsel f0, f0, f4, f3
-; FAST-P8-NEXT:    fsel f1, f1, f0, f3
+; FAST-P8-NEXT:    fsel f1, f0, f4, f3
+; FAST-P8-NEXT:    xsnegdp f0, f0
+; FAST-P8-NEXT:    fsel f1, f0, f1, f3
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: select_one_double:
@@ -240,11 +243,12 @@ define double @select_one_double(double %a, double %b, double %c, double %d) {
 ; NO-FAST-P8-LABEL: select_one_double:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    crnor 4*cr5+lt, un, eq
-; NO-FAST-P8-NEXT:    bclr 12, 4*cr5+lt, 0
+; NO-FAST-P8-NEXT:    bc 12, 4*cr5+lt, .LBB5_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB5_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_one_double:
@@ -266,10 +270,10 @@ entry:
 define float @select_fast_one_float(float %a, float %b, float %c, float %d) {
 ; FAST-P8-LABEL: select_fast_one_float:
 ; FAST-P8:       # %bb.0: # %entry
-; FAST-P8-NEXT:    xssubsp f0, f1, f2
-; FAST-P8-NEXT:    xssubsp f1, f2, f1
-; FAST-P8-NEXT:    fsel f0, f0, f4, f3
-; FAST-P8-NEXT:    fsel f1, f1, f0, f3
+; FAST-P8-NEXT:    xssubsp f0, f2, f1
+; FAST-P8-NEXT:    xssubsp f1, f1, f2
+; FAST-P8-NEXT:    fsel f1, f1, f4, f3
+; FAST-P8-NEXT:    fsel f1, f0, f1, f3
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: select_fast_one_float:
@@ -282,10 +286,10 @@ define float @select_fast_one_float(float %a, float %b, float %c, float %d) {
 ;
 ; NO-FAST-P8-LABEL: select_fast_one_float:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xssubsp f0, f1, f2
-; NO-FAST-P8-NEXT:    xssubsp f1, f2, f1
-; NO-FAST-P8-NEXT:    fsel f0, f0, f4, f3
-; NO-FAST-P8-NEXT:    fsel f1, f1, f0, f3
+; NO-FAST-P8-NEXT:    xssubsp f0, f2, f1
+; NO-FAST-P8-NEXT:    xssubsp f1, f1, f2
+; NO-FAST-P8-NEXT:    fsel f1, f1, f4, f3
+; NO-FAST-P8-NEXT:    fsel f1, f0, f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_fast_one_float:
@@ -305,9 +309,9 @@ define double @select_fast_one_double(double %a, double %b, double %c, double %d
 ; FAST-P8-LABEL: select_fast_one_double:
 ; FAST-P8:       # %bb.0: # %entry
 ; FAST-P8-NEXT:    xssubdp f0, f1, f2
-; FAST-P8-NEXT:    xsnegdp f1, f0
-; FAST-P8-NEXT:    fsel f0, f0, f4, f3
-; FAST-P8-NEXT:    fsel f1, f1, f0, f3
+; FAST-P8-NEXT:    fsel f1, f0, f4, f3
+; FAST-P8-NEXT:    xsnegdp f0, f0
+; FAST-P8-NEXT:    fsel f1, f0, f1, f3
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: select_fast_one_double:
@@ -321,9 +325,9 @@ define double @select_fast_one_double(double %a, double %b, double %c, double %d
 ; NO-FAST-P8-LABEL: select_fast_one_double:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    xssubdp f0, f1, f2
-; NO-FAST-P8-NEXT:    xsnegdp f1, f0
-; NO-FAST-P8-NEXT:    fsel f0, f0, f4, f3
-; NO-FAST-P8-NEXT:    fsel f1, f1, f0, f3
+; NO-FAST-P8-NEXT:    fsel f1, f0, f4, f3
+; NO-FAST-P8-NEXT:    xsnegdp f0, f0
+; NO-FAST-P8-NEXT:    fsel f1, f0, f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_fast_one_double:
@@ -357,11 +361,12 @@ define float @select_oge_float(float %a, float %b, float %c, float %d) {
 ; NO-FAST-P8-LABEL: select_oge_float:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    crnor 4*cr5+lt, un, lt
-; NO-FAST-P8-NEXT:    bclr 12, 4*cr5+lt, 0
+; NO-FAST-P8-NEXT:    bc 12, 4*cr5+lt, .LBB8_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB8_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_oge_float:
@@ -396,11 +401,12 @@ define double @select_oge_double(double %a, double %b, double %c, double %d) {
 ; NO-FAST-P8-LABEL: select_oge_double:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    crnor 4*cr5+lt, un, lt
-; NO-FAST-P8-NEXT:    bclr 12, 4*cr5+lt, 0
+; NO-FAST-P8-NEXT:    bc 12, 4*cr5+lt, .LBB9_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB9_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_oge_double:
@@ -497,10 +503,11 @@ define float @select_olt_float(float %a, float %b, float %c, float %d) {
 ; NO-FAST-P8-LABEL: select_olt_float:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
-; NO-FAST-P8-NEXT:    bltlr cr0
+; NO-FAST-P8-NEXT:    blt cr0, .LBB12_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB12_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_olt_float:
@@ -534,10 +541,11 @@ define double @select_olt_double(double %a, double %b, double %c, double %d) {
 ; NO-FAST-P8-LABEL: select_olt_double:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
-; NO-FAST-P8-NEXT:    bltlr cr0
+; NO-FAST-P8-NEXT:    blt cr0, .LBB13_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB13_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_olt_double:
@@ -633,10 +641,11 @@ define float @select_ogt_float(float %a, float %b, float %c, float %d) {
 ; NO-FAST-P8-LABEL: select_ogt_float:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
-; NO-FAST-P8-NEXT:    bgtlr cr0
+; NO-FAST-P8-NEXT:    bgt cr0, .LBB16_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB16_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_ogt_float:
@@ -670,10 +679,11 @@ define double @select_ogt_double(double %a, double %b, double %c, double %d) {
 ; NO-FAST-P8-LABEL: select_ogt_double:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
-; NO-FAST-P8-NEXT:    bgtlr cr0
+; NO-FAST-P8-NEXT:    bgt cr0, .LBB17_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB17_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_ogt_double:
@@ -769,11 +779,12 @@ define float @select_ole_float(float %a, float %b, float %c, float %d) {
 ; NO-FAST-P8-LABEL: select_ole_float:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    crnor 4*cr5+lt, un, gt
-; NO-FAST-P8-NEXT:    bclr 12, 4*cr5+lt, 0
+; NO-FAST-P8-NEXT:    bc 12, 4*cr5+lt, .LBB20_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB20_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_ole_float:
@@ -808,11 +819,12 @@ define double @select_ole_double(double %a, double %b, double %c, double %d) {
 ; NO-FAST-P8-LABEL: select_ole_double:
 ; NO-FAST-P8:       # %bb.0: # %entry
 ; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
-; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    crnor 4*cr5+lt, un, gt
-; NO-FAST-P8-NEXT:    bclr 12, 4*cr5+lt, 0
+; NO-FAST-P8-NEXT:    bc 12, 4*cr5+lt, .LBB21_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f4
+; NO-FAST-P8-NEXT:    fmr f3, f4
+; NO-FAST-P8-NEXT:  .LBB21_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f3
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: select_ole_double:
@@ -966,10 +978,11 @@ define double @onecmp2(double %a, double %y, double %z) {
 ; NO-FAST-P8-NEXT:    vspltisw v2, 1
 ; NO-FAST-P8-NEXT:    xvcvsxwdp vs0, vs34
 ; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f0
-; NO-FAST-P8-NEXT:    fmr f1, f2
-; NO-FAST-P8-NEXT:    bgtlr cr0
+; NO-FAST-P8-NEXT:    bgt cr0, .LBB25_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f3
+; NO-FAST-P8-NEXT:    fmr f2, f3
+; NO-FAST-P8-NEXT:  .LBB25_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: onecmp2:
@@ -995,9 +1008,9 @@ define double @onecmp3(double %a, double %y, double %z) {
 ; FAST-P8-NEXT:    vspltisw v2, -1
 ; FAST-P8-NEXT:    xvcvsxwdp vs0, vs34
 ; FAST-P8-NEXT:    xsadddp f0, f1, f0
-; FAST-P8-NEXT:    xsnegdp f1, f0
-; FAST-P8-NEXT:    fsel f0, f0, f2, f3
-; FAST-P8-NEXT:    fsel f1, f1, f0, f3
+; FAST-P8-NEXT:    fsel f1, f0, f2, f3
+; FAST-P8-NEXT:    xsnegdp f0, f0
+; FAST-P8-NEXT:    fsel f1, f0, f1, f3
 ; FAST-P8-NEXT:    blr
 ;
 ; FAST-P9-LABEL: onecmp3:
@@ -1015,10 +1028,11 @@ define double @onecmp3(double %a, double %y, double %z) {
 ; NO-FAST-P8-NEXT:    vspltisw v2, 1
 ; NO-FAST-P8-NEXT:    xvcvsxwdp vs0, vs34
 ; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f0
-; NO-FAST-P8-NEXT:    fmr f1, f2
-; NO-FAST-P8-NEXT:    beqlr cr0
+; NO-FAST-P8-NEXT:    beq cr0, .LBB26_2
 ; NO-FAST-P8-NEXT:  # %bb.1: # %entry
-; NO-FAST-P8-NEXT:    fmr f1, f3
+; NO-FAST-P8-NEXT:    fmr f2, f3
+; NO-FAST-P8-NEXT:  .LBB26_2: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
 ; NO-FAST-P8-NEXT:    blr
 ;
 ; NO-FAST-P9-LABEL: onecmp3:

diff  --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index d74dc81722e00d5..25e1baa28f7ef3a 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -40,11 +40,11 @@ define <4 x i32> @s2v_test1(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8LE-LABEL: s2v_test1:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; P8LE-NEXT:    lxsiwzx v3, 0, r3
+; P8LE-NEXT:    lxsiwzx v4, 0, r3
 ; P8LE-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
 ; P8LE-NEXT:    lxvd2x vs0, 0, r4
-; P8LE-NEXT:    xxswapd v4, vs0
-; P8LE-NEXT:    vperm v2, v2, v3, v4
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v2, v4, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test1:
@@ -66,18 +66,18 @@ define <4 x i32> @s2v_test1(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8-AIX-64-LABEL: s2v_test1:
 ; P8-AIX-64:       # %bb.0: # %entry
 ; P8-AIX-64-NEXT:    ld r4, L..C0(r2) # %const.0
-; P8-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX-64-NEXT:    vperm v2, v3, v2, v4
+; P8-AIX-64-NEXT:    lxsiwzx v4, 0, r3
+; P8-AIX-64-NEXT:    lxvw4x v3, 0, r4
+; P8-AIX-64-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-64-NEXT:    blr
 ;
 ; P8-AIX-32-LABEL: s2v_test1:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    lwz r4, L..C0(r2) # %const.0
 ; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lwz r3, L..C0(r2) # %const.0
+; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r4
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-32-NEXT:    blr
@@ -108,10 +108,10 @@ define <4 x i32> @s2v_test2(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8LE-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
 ; P8LE-NEXT:    addi r3, r3, 4
 ; P8LE-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
-; P8LE-NEXT:    lxsiwzx v3, 0, r3
+; P8LE-NEXT:    lxsiwzx v4, 0, r3
 ; P8LE-NEXT:    lxvd2x vs0, 0, r4
-; P8LE-NEXT:    xxswapd v4, vs0
-; P8LE-NEXT:    vperm v2, v2, v3, v4
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v2, v4, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test2:
@@ -135,18 +135,18 @@ define <4 x i32> @s2v_test2(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8-AIX-64:       # %bb.0: # %entry
 ; P8-AIX-64-NEXT:    ld r4, L..C1(r2) # %const.0
 ; P8-AIX-64-NEXT:    addi r3, r3, 4
-; P8-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX-64-NEXT:    vperm v2, v3, v2, v4
+; P8-AIX-64-NEXT:    lxsiwzx v4, 0, r3
+; P8-AIX-64-NEXT:    lxvw4x v3, 0, r4
+; P8-AIX-64-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-64-NEXT:    blr
 ;
 ; P8-AIX-32-LABEL: s2v_test2:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    lwz r3, 4(r3)
-; P8-AIX-32-NEXT:    lwz r4, L..C1(r2) # %const.0
 ; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lwz r3, L..C1(r2) # %const.0
+; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r4
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-32-NEXT:    blr
@@ -181,18 +181,18 @@ define <4 x i32> @s2v_test3(ptr nocapture readonly %int32, <4 x i32> %vec, i32 s
 ; P8LE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
 ; P8LE-NEXT:    lxvd2x vs0, 0, r4
 ; P8LE-NEXT:    sldi r4, r7, 2
-; P8LE-NEXT:    lxsiwzx v3, r3, r4
-; P8LE-NEXT:    xxswapd v4, vs0
-; P8LE-NEXT:    vperm v2, v2, v3, v4
+; P8LE-NEXT:    lxsiwzx v4, r3, r4
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v2, v4, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test3:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; P8BE-NEXT:    sldi r5, r7, 2
-; P8BE-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; P8BE-NEXT:    lxsiwzx v3, r3, r5
-; P8BE-NEXT:    lxvw4x v4, 0, r4
+; P8BE-NEXT:    sldi r4, r7, 2
+; P8BE-NEXT:    lxsiwzx v3, r3, r4
+; P8BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; P8BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; P8BE-NEXT:    lxvw4x v4, 0, r3
 ; P8BE-NEXT:    vperm v2, v3, v2, v4
 ; P8BE-NEXT:    blr
 ;
@@ -214,10 +214,10 @@ define <4 x i32> @s2v_test3(ptr nocapture readonly %int32, <4 x i32> %vec, i32 s
 ;
 ; P8-AIX-64-LABEL: s2v_test3:
 ; P8-AIX-64:       # %bb.0: # %entry
-; P8-AIX-64-NEXT:    ld r5, L..C2(r2) # %const.0
 ; P8-AIX-64-NEXT:    sldi r4, r4, 2
 ; P8-AIX-64-NEXT:    lxsiwzx v3, r3, r4
-; P8-AIX-64-NEXT:    lxvw4x v4, 0, r5
+; P8-AIX-64-NEXT:    ld r3, L..C2(r2) # %const.0
+; P8-AIX-64-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-64-NEXT:    vperm v2, v3, v2, v4
 ; P8-AIX-64-NEXT:    blr
 ;
@@ -225,10 +225,10 @@ define <4 x i32> @s2v_test3(ptr nocapture readonly %int32, <4 x i32> %vec, i32 s
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    slwi r4, r4, 2
 ; P8-AIX-32-NEXT:    lwzx r3, r3, r4
-; P8-AIX-32-NEXT:    lwz r4, L..C2(r2) # %const.0
 ; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lwz r3, L..C2(r2) # %const.0
+; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r4
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-32-NEXT:    blr
@@ -261,10 +261,10 @@ define <4 x i32> @s2v_test4(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8LE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
 ; P8LE-NEXT:    addi r3, r3, 4
 ; P8LE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; P8LE-NEXT:    lxsiwzx v3, 0, r3
+; P8LE-NEXT:    lxsiwzx v4, 0, r3
 ; P8LE-NEXT:    lxvd2x vs0, 0, r4
-; P8LE-NEXT:    xxswapd v4, vs0
-; P8LE-NEXT:    vperm v2, v2, v3, v4
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v2, v4, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test4:
@@ -288,18 +288,18 @@ define <4 x i32> @s2v_test4(ptr nocapture readonly %int32, <4 x i32> %vec)  {
 ; P8-AIX-64:       # %bb.0: # %entry
 ; P8-AIX-64-NEXT:    ld r4, L..C3(r2) # %const.0
 ; P8-AIX-64-NEXT:    addi r3, r3, 4
-; P8-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX-64-NEXT:    vperm v2, v3, v2, v4
+; P8-AIX-64-NEXT:    lxsiwzx v4, 0, r3
+; P8-AIX-64-NEXT:    lxvw4x v3, 0, r4
+; P8-AIX-64-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-64-NEXT:    blr
 ;
 ; P8-AIX-32-LABEL: s2v_test4:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    lwz r3, 4(r3)
-; P8-AIX-32-NEXT:    lwz r4, L..C3(r2) # %const.0
 ; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lwz r3, L..C3(r2) # %const.0
+; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r4
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-32-NEXT:    blr
@@ -329,11 +329,11 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, ptr nocapture readonly %ptr1)  {
 ; P8LE-LABEL: s2v_test5:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; P8LE-NEXT:    lxsiwzx v3, 0, r5
+; P8LE-NEXT:    lxsiwzx v4, 0, r5
 ; P8LE-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
 ; P8LE-NEXT:    lxvd2x vs0, 0, r3
-; P8LE-NEXT:    xxswapd v4, vs0
-; P8LE-NEXT:    vperm v2, v2, v3, v4
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v2, v4, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test5:
@@ -355,18 +355,18 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, ptr nocapture readonly %ptr1)  {
 ; P8-AIX-64-LABEL: s2v_test5:
 ; P8-AIX-64:       # %bb.0: # %entry
 ; P8-AIX-64-NEXT:    ld r4, L..C4(r2) # %const.0
-; P8-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX-64-NEXT:    vperm v2, v3, v2, v4
+; P8-AIX-64-NEXT:    lxsiwzx v4, 0, r3
+; P8-AIX-64-NEXT:    lxvw4x v3, 0, r4
+; P8-AIX-64-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-64-NEXT:    blr
 ;
 ; P8-AIX-32-LABEL: s2v_test5:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    lwz r4, L..C4(r2) # %const.0
 ; P8-AIX-32-NEXT:    stw r3, -16(r1)
+; P8-AIX-32-NEXT:    lwz r3, L..C4(r2) # %const.0
+; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r4
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r3
 ; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-32-NEXT:    blr
@@ -395,11 +395,11 @@ define <4 x float> @s2v_test_f1(ptr nocapture readonly %f64, <4 x float> %vec)
 ; P8LE-LABEL: s2v_test_f1:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
-; P8LE-NEXT:    lxsiwzx v3, 0, r3
+; P8LE-NEXT:    lxsiwzx v4, 0, r3
 ; P8LE-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
 ; P8LE-NEXT:    lxvd2x vs0, 0, r4
-; P8LE-NEXT:    xxswapd v4, vs0
-; P8LE-NEXT:    vperm v2, v2, v3, v4
+; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    vperm v2, v2, v4, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test_f1:
@@ -421,17 +421,17 @@ define <4 x float> @s2v_test_f1(ptr nocapture readonly %f64, <4 x float> %vec)
 ; P8-AIX-64-LABEL: s2v_test_f1:
 ; P8-AIX-64:       # %bb.0: # %entry
 ; P8-AIX-64-NEXT:    ld r4, L..C5(r2) # %const.0
-; P8-AIX-64-NEXT:    lxsiwzx v3, 0, r3
-; P8-AIX-64-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX-64-NEXT:    vperm v2, v3, v2, v4
+; P8-AIX-64-NEXT:    lxsiwzx v4, 0, r3
+; P8-AIX-64-NEXT:    lxvw4x v3, 0, r4
+; P8-AIX-64-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-64-NEXT:    blr
 ;
 ; P8-AIX-32-LABEL: s2v_test_f1:
 ; P8-AIX-32:       # %bb.0: # %entry
 ; P8-AIX-32-NEXT:    lwz r4, L..C5(r2) # %const.0
-; P8-AIX-32-NEXT:    lxsiwzx v3, 0, r3
-; P8-AIX-32-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX-32-NEXT:    vperm v2, v3, v2, v4
+; P8-AIX-32-NEXT:    lxsiwzx v4, 0, r3
+; P8-AIX-32-NEXT:    lxvw4x v3, 0, r4
+; P8-AIX-32-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX-32-NEXT:    blr
 entry:
   %0 = load float, ptr %f64, align 4
@@ -459,9 +459,9 @@ define <2 x float> @s2v_test_f2(ptr nocapture readonly %f64, <2 x float> %vec)
 ; P8LE-LABEL: s2v_test_f2:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addi r3, r3, 4
-; P8LE-NEXT:    xxmrglw vs1, v2, v2
-; P8LE-NEXT:    lfiwzx f0, 0, r3
-; P8LE-NEXT:    xxmrghw v2, vs1, vs0
+; P8LE-NEXT:    xxmrglw vs0, v2, v2
+; P8LE-NEXT:    lfiwzx f1, 0, r3
+; P8LE-NEXT:    xxmrghw v2, vs0, vs1
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test_f2:
@@ -504,9 +504,9 @@ define <2 x float> @s2v_test_f3(ptr nocapture readonly %f64, <2 x float> %vec, i
 ; P8LE-LABEL: s2v_test_f3:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    sldi r4, r7, 2
-; P8LE-NEXT:    xxmrglw vs1, v2, v2
-; P8LE-NEXT:    lfiwzx f0, r3, r4
-; P8LE-NEXT:    xxmrghw v2, vs1, vs0
+; P8LE-NEXT:    xxmrglw vs0, v2, v2
+; P8LE-NEXT:    lfiwzx f1, r3, r4
+; P8LE-NEXT:    xxmrghw v2, vs0, vs1
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test_f3:
@@ -571,9 +571,9 @@ define <2 x float> @s2v_test_f4(ptr nocapture readonly %f64, <2 x float> %vec)
 ; P8LE-LABEL: s2v_test_f4:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addi r3, r3, 4
-; P8LE-NEXT:    xxmrglw vs1, v2, v2
-; P8LE-NEXT:    lfiwzx f0, 0, r3
-; P8LE-NEXT:    xxmrghw v2, vs1, vs0
+; P8LE-NEXT:    xxmrglw vs0, v2, v2
+; P8LE-NEXT:    lfiwzx f1, 0, r3
+; P8LE-NEXT:    xxmrghw v2, vs0, vs1
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test_f4:
@@ -613,9 +613,9 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, ptr nocapture readonly %ptr1)
 ;
 ; P8LE-LABEL: s2v_test_f5:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    lfiwzx f0, 0, r5
-; P8LE-NEXT:    xxmrglw vs1, v2, v2
-; P8LE-NEXT:    xxmrghw v2, vs1, vs0
+; P8LE-NEXT:    lfiwzx f1, 0, r5
+; P8LE-NEXT:    xxmrglw vs0, v2, v2
+; P8LE-NEXT:    xxmrghw v2, vs0, vs1
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: s2v_test_f5:

diff  --git a/llvm/test/CodeGen/PowerPC/scalars-in-altivec-regs.ll b/llvm/test/CodeGen/PowerPC/scalars-in-altivec-regs.ll
index 06164b7ff5b9b60..bed1e32109a9a35 100644
--- a/llvm/test/CodeGen/PowerPC/scalars-in-altivec-regs.ll
+++ b/llvm/test/CodeGen/PowerPC/scalars-in-altivec-regs.ll
@@ -52,9 +52,9 @@ define dso_local void @test2(ptr %v, i32 signext %a) local_unnamed_addr #0 {
 ;
 ; AIX32-LABEL: test2:
 ; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    li r5, 0
 ; AIX32-NEXT:    stw r4, -4(r1)
-; AIX32-NEXT:    stw r5, -8(r1)
+; AIX32-NEXT:    li r4, 0
+; AIX32-NEXT:    stw r4, -8(r1)
 ; AIX32-NEXT:    lfd f0, -8(r1)
 ; AIX32-NEXT:    xxlor vs34, f0, f0
 ; AIX32-NEXT:    #APP
@@ -158,9 +158,9 @@ define dso_local void @test6(ptr %v, i32 zeroext %a) local_unnamed_addr #0 {
 ;
 ; AIX32-LABEL: test6:
 ; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    li r5, 0
 ; AIX32-NEXT:    stw r4, -4(r1)
-; AIX32-NEXT:    stw r5, -8(r1)
+; AIX32-NEXT:    li r4, 0
+; AIX32-NEXT:    stw r4, -8(r1)
 ; AIX32-NEXT:    lfd f0, -8(r1)
 ; AIX32-NEXT:    xxlor vs34, f0, f0
 ; AIX32-NEXT:    #APP
@@ -191,9 +191,9 @@ define dso_local void @test7(ptr %v, i16 zeroext %a) local_unnamed_addr #0 {
 ;
 ; AIX32-LABEL: test7:
 ; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    li r5, 0
 ; AIX32-NEXT:    stw r4, -4(r1)
-; AIX32-NEXT:    stw r5, -8(r1)
+; AIX32-NEXT:    li r4, 0
+; AIX32-NEXT:    stw r4, -8(r1)
 ; AIX32-NEXT:    lfd f0, -8(r1)
 ; AIX32-NEXT:    xxlor vs34, f0, f0
 ; AIX32-NEXT:    #APP
@@ -224,9 +224,9 @@ define dso_local void @test8(ptr %v, i8 zeroext %a) local_unnamed_addr #0 {
 ;
 ; AIX32-LABEL: test8:
 ; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    li r5, 0
 ; AIX32-NEXT:    stw r4, -4(r1)
-; AIX32-NEXT:    stw r5, -8(r1)
+; AIX32-NEXT:    li r4, 0
+; AIX32-NEXT:    stw r4, -8(r1)
 ; AIX32-NEXT:    lfd f0, -8(r1)
 ; AIX32-NEXT:    xxlor vs34, f0, f0
 ; AIX32-NEXT:    #APP

diff  --git a/llvm/test/CodeGen/PowerPC/scheduling-mem-dependency.ll b/llvm/test/CodeGen/PowerPC/scheduling-mem-dependency.ll
index 4e3403587c9f3e3..4b3affcbe2d8e26 100644
--- a/llvm/test/CodeGen/PowerPC/scheduling-mem-dependency.ll
+++ b/llvm/test/CodeGen/PowerPC/scheduling-mem-dependency.ll
@@ -7,10 +7,12 @@ entry:
 ; CHECK: ********** MI Scheduling **********
 ; CHECK-LABEL: store_disjoint_memory:%bb.0
 ; CHECK:SU([[REG2:[0-9]+]]):   STD renamable $x{{[0-9]+}}, 24, renamable $x[[REG5:[0-9]+]]
-; CHECK-NOT: Successors:
+; CHECK-NOT: Predecessors:
+; CHECK: Successors:
 ; CHECK-NOT:    SU([[REG3]]): Ord  Latency=0 Memory
 ; CHECK:SU([[REG3:[0-9]+]]):   STD renamable $x{{[0-9]+}}, 16, renamable $x[[REG5]]
-; CHECK: Predecessors:
+; CHECK-NOT: Predecessors:
+; CHECK: Successors:
 ; CHECK-NOT:    SU([[REG2]]): Ord  Latency=0 Memory
   %arrayidx = getelementptr inbounds i64, ptr %P, i64 3
   store i64 %v, ptr %arrayidx

diff  --git a/llvm/test/CodeGen/PowerPC/select-constant-xor.ll b/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
index 10418e805999531..b40a21b82e836be 100644
--- a/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
@@ -125,11 +125,11 @@ define i32 @icmpasrne(i32 %input, i32 %a, i32 %b) {
 define i32 @oneusecmp(i32 %a, i32 %b, i32 %d) {
 ; CHECK-LABEL: oneusecmp:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    srawi 6, 3, 31
 ; CHECK-NEXT:    cmpwi 3, 0
-; CHECK-NEXT:    xori 3, 6, 127
-; CHECK-NEXT:    isellt 4, 5, 4
-; CHECK-NEXT:    add 3, 3, 4
+; CHECK-NEXT:    srawi 6, 3, 31
+; CHECK-NEXT:    xori 6, 6, 127
+; CHECK-NEXT:    isellt 3, 5, 4
+; CHECK-NEXT:    add 3, 6, 3
 ; CHECK-NEXT:    blr
   %c = icmp sle i32 %a, -1
   %s = select i1 %c, i32 -128, i32 127

diff  --git a/llvm/test/CodeGen/PowerPC/select.ll b/llvm/test/CodeGen/PowerPC/select.ll
index bbbffe4d62cde1b..49d55c7df524af6 100644
--- a/llvm/test/CodeGen/PowerPC/select.ll
+++ b/llvm/test/CodeGen/PowerPC/select.ll
@@ -9,10 +9,10 @@
 define i64 @f0(i64 %x) {
 ; CHECK-LE-LABEL: f0:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    li r4, 125
 ; CHECK-LE-NEXT:    cmpdi r3, 0
-; CHECK-LE-NEXT:    li r3, -3
-; CHECK-LE-NEXT:    isellt r3, r3, r4
+; CHECK-LE-NEXT:    li r3, 125
+; CHECK-LE-NEXT:    li r4, -3
+; CHECK-LE-NEXT:    isellt r3, r4, r3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-32-LABEL: f0:
@@ -35,10 +35,10 @@ define i64 @f0(i64 %x) {
 define i64 @f1(i64 %x) {
 ; CHECK-LE-LABEL: f1:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    li r4, 512
 ; CHECK-LE-NEXT:    cmpdi r3, 0
-; CHECK-LE-NEXT:    li r3, 64
-; CHECK-LE-NEXT:    isellt r3, r3, r4
+; CHECK-LE-NEXT:    li r3, 512
+; CHECK-LE-NEXT:    li r4, 64
+; CHECK-LE-NEXT:    isellt r3, r4, r3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-32-LABEL: f1:
@@ -61,9 +61,9 @@ define i64 @f1(i64 %x) {
 define i64 @f2(i64 %x) {
 ; CHECK-LE-LABEL: f2:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    li r4, 1024
 ; CHECK-LE-NEXT:    cmpdi r3, 0
-; CHECK-LE-NEXT:    iseleq r3, 0, r4
+; CHECK-LE-NEXT:    li r3, 1024
+; CHECK-LE-NEXT:    iseleq r3, 0, r3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-32-LABEL: f2:
@@ -133,8 +133,8 @@ define i64 @f4(i64 %x) {
 define i64 @f4_sge_0(i64 %x) {
 ; CHECK-LE-LABEL: f4_sge_0:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    neg r4, r3
 ; CHECK-LE-NEXT:    cmpdi r3, -1
+; CHECK-LE-NEXT:    neg r4, r3
 ; CHECK-LE-NEXT:    iselgt r3, r4, r3
 ; CHECK-LE-NEXT:    blr
 ;
@@ -180,8 +180,8 @@ define i64 @f4_slt_0(i64 %x) {
 define i64 @f4_sle_0(i64 %x) {
 ; CHECK-LE-LABEL: f4_sle_0:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    neg r4, r3
 ; CHECK-LE-NEXT:    cmpdi r3, 1
+; CHECK-LE-NEXT:    neg r4, r3
 ; CHECK-LE-NEXT:    isellt r3, r3, r4
 ; CHECK-LE-NEXT:    blr
 ;
@@ -231,9 +231,9 @@ define i64 @f4_sgt_m1(i64 %x) {
 define i64 @f5(i64 %x, i64 %y) {
 ; CHECK-LE-LABEL: f5:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    li r5, 0
 ; CHECK-LE-NEXT:    cmpldi r3, 0
-; CHECK-LE-NEXT:    iseleq r3, r4, r5
+; CHECK-LE-NEXT:    li r3, 0
+; CHECK-LE-NEXT:    iseleq r3, r4, r3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-32-LABEL: f5:
@@ -257,9 +257,9 @@ define i64 @f5(i64 %x, i64 %y) {
 define i32 @f5_i32(i32 %x, i32 %y) {
 ; CHECK-LE-LABEL: f5_i32:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    li r5, 0
 ; CHECK-LE-NEXT:    cmplwi r3, 0
-; CHECK-LE-NEXT:    iseleq r3, r4, r5
+; CHECK-LE-NEXT:    li r3, 0
+; CHECK-LE-NEXT:    iseleq r3, r4, r3
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-32-LABEL: f5_i32:

diff  --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll
index da2f73d6576dccb..606cfe22887802e 100644
--- a/llvm/test/CodeGen/PowerPC/select_const.ll
+++ b/llvm/test/CodeGen/PowerPC/select_const.ll
@@ -190,16 +190,16 @@ define i32 @select_C1_C2(i1 %cond) {
 ; ISEL-LABEL: select_C1_C2:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, 421
 ; ISEL-NEXT:    li 3, 42
+; ISEL-NEXT:    li 4, 421
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: select_C1_C2:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, 421
 ; NO_ISEL-NEXT:    li 3, 42
+; NO_ISEL-NEXT:    li 4, 421
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB18_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB18_1:
@@ -213,16 +213,16 @@ define i32 @select_C1_C2_zeroext(i1 zeroext %cond) {
 ; ISEL-LABEL: select_C1_C2_zeroext:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, 421
 ; ISEL-NEXT:    li 3, 42
+; ISEL-NEXT:    li 4, 421
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: select_C1_C2_zeroext:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, 421
 ; NO_ISEL-NEXT:    li 3, 42
+; NO_ISEL-NEXT:    li 4, 421
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB19_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB19_1:
@@ -236,16 +236,16 @@ define i32 @select_C1_C2_signext(i1 signext %cond) {
 ; ISEL-LABEL: select_C1_C2_signext:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, 421
 ; ISEL-NEXT:    li 3, 42
+; ISEL-NEXT:    li 4, 421
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: select_C1_C2_signext:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, 421
 ; NO_ISEL-NEXT:    li 3, 42
+; NO_ISEL-NEXT:    li 4, 421
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB20_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB20_1:
@@ -261,16 +261,16 @@ define i8 @sel_constants_add_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_add_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, 1
 ; ISEL-NEXT:    li 3, 28
+; ISEL-NEXT:    li 4, 1
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_add_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, 1
 ; NO_ISEL-NEXT:    li 3, 28
+; NO_ISEL-NEXT:    li 4, 1
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB21_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB21_1:
@@ -285,16 +285,16 @@ define i8 @sel_constants_sub_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_sub_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, -9
 ; ISEL-NEXT:    li 3, 18
+; ISEL-NEXT:    li 4, -9
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_sub_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, -9
 ; NO_ISEL-NEXT:    li 3, 18
+; NO_ISEL-NEXT:    li 4, -9
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB22_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB22_1:
@@ -309,16 +309,16 @@ define i8 @sel_constants_sub_constant_sel_constants(i1 %cond) {
 ; ISEL-LABEL: sel_constants_sub_constant_sel_constants:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, 9
 ; ISEL-NEXT:    li 3, 2
+; ISEL-NEXT:    li 4, 9
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_sub_constant_sel_constants:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, 9
 ; NO_ISEL-NEXT:    li 3, 2
+; NO_ISEL-NEXT:    li 4, 9
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB23_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB23_1:
@@ -333,16 +333,16 @@ define i8 @sel_constants_mul_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_mul_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, -20
 ; ISEL-NEXT:    li 3, 115
+; ISEL-NEXT:    li 4, -20
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_mul_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, -20
 ; NO_ISEL-NEXT:    li 3, 115
+; NO_ISEL-NEXT:    li 4, -20
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB24_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB24_1:
@@ -401,16 +401,16 @@ define i8 @sel_constants_udiv_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_udiv_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, 50
 ; ISEL-NEXT:    li 3, 4
+; ISEL-NEXT:    li 4, 50
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_udiv_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, 50
 ; NO_ISEL-NEXT:    li 3, 4
+; NO_ISEL-NEXT:    li 4, 50
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB27_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB27_1:
@@ -447,16 +447,16 @@ define i8 @sel_constants_srem_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_srem_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, -4
 ; ISEL-NEXT:    li 3, 3
+; ISEL-NEXT:    li 4, -4
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_srem_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, -4
 ; NO_ISEL-NEXT:    li 3, 3
+; NO_ISEL-NEXT:    li 4, -4
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB29_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB29_1:
@@ -471,16 +471,16 @@ define i8 @srem_constant_sel_constants(i1 %cond) {
 ; ISEL-LABEL: srem_constant_sel_constants:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, 120
 ; ISEL-NEXT:    li 3, 5
+; ISEL-NEXT:    li 4, 120
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: srem_constant_sel_constants:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, 120
 ; NO_ISEL-NEXT:    li 3, 5
+; NO_ISEL-NEXT:    li 4, 120
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB30_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB30_1:
@@ -506,16 +506,16 @@ define i8 @urem_constant_sel_constants(i1 %cond) {
 ; ISEL-LABEL: urem_constant_sel_constants:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, 120
 ; ISEL-NEXT:    li 3, 5
+; ISEL-NEXT:    li 4, 120
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: urem_constant_sel_constants:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, 120
 ; NO_ISEL-NEXT:    li 3, 5
+; NO_ISEL-NEXT:    li 4, 120
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB32_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB32_1:
@@ -541,16 +541,16 @@ define i8 @sel_constants_or_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_or_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, -3
 ; ISEL-NEXT:    li 3, 23
+; ISEL-NEXT:    li 4, -3
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_or_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, -3
 ; NO_ISEL-NEXT:    li 3, 23
+; NO_ISEL-NEXT:    li 4, -3
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB34_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB34_1:
@@ -565,16 +565,16 @@ define i8 @sel_constants_xor_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_xor_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, -7
 ; ISEL-NEXT:    li 3, 18
+; ISEL-NEXT:    li 4, -7
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_xor_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, -7
 ; NO_ISEL-NEXT:    li 3, 18
+; NO_ISEL-NEXT:    li 4, -7
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB35_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB35_1:
@@ -589,16 +589,16 @@ define i8 @sel_constants_shl_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_shl_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, -128
 ; ISEL-NEXT:    li 3, -32
+; ISEL-NEXT:    li 4, -128
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_shl_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, -128
 ; NO_ISEL-NEXT:    li 3, -32
+; NO_ISEL-NEXT:    li 4, -128
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB36_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB36_1:
@@ -626,16 +626,16 @@ define i8 @sel_constants_lshr_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_lshr_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    li 4, 7
 ; ISEL-NEXT:    li 3, 0
+; ISEL-NEXT:    li 4, 7
 ; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_lshr_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    li 4, 7
 ; NO_ISEL-NEXT:    li 3, 0
+; NO_ISEL-NEXT:    li 4, 7
 ; NO_ISEL-NEXT:    bc 12, 1, .LBB38_1
 ; NO_ISEL-NEXT:    blr
 ; NO_ISEL-NEXT:  .LBB38_1:
@@ -688,25 +688,25 @@ define double @sel_constants_fadd_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_fadd_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    addis 4, 2, .LCPI42_0 at toc@ha
-; ISEL-NEXT:    addis 3, 2, .LCPI42_1 at toc@ha
-; ISEL-NEXT:    addi 4, 4, .LCPI42_0 at toc@l
-; ISEL-NEXT:    addi 3, 3, .LCPI42_1 at toc@l
-; ISEL-NEXT:    iselgt 3, 3, 4
+; ISEL-NEXT:    addis 3, 2, .LCPI42_0 at toc@ha
+; ISEL-NEXT:    addis 4, 2, .LCPI42_1 at toc@ha
+; ISEL-NEXT:    addi 3, 3, .LCPI42_0 at toc@l
+; ISEL-NEXT:    addi 4, 4, .LCPI42_1 at toc@l
+; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    lfd 1, 0(3)
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_fadd_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    addis 4, 2, .LCPI42_0 at toc@ha
-; NO_ISEL-NEXT:    addis 3, 2, .LCPI42_1 at toc@ha
-; NO_ISEL-NEXT:    addi 4, 4, .LCPI42_0 at toc@l
-; NO_ISEL-NEXT:    addi 3, 3, .LCPI42_1 at toc@l
-; NO_ISEL-NEXT:    bc 12, 1, .LBB42_2
-; NO_ISEL-NEXT:  # %bb.1:
-; NO_ISEL-NEXT:    ori 3, 4, 0
+; NO_ISEL-NEXT:    addis 3, 2, .LCPI42_0 at toc@ha
+; NO_ISEL-NEXT:    addis 4, 2, .LCPI42_1 at toc@ha
+; NO_ISEL-NEXT:    addi 3, 3, .LCPI42_0 at toc@l
+; NO_ISEL-NEXT:    addi 4, 4, .LCPI42_1 at toc@l
+; NO_ISEL-NEXT:    bc 12, 1, .LBB42_1
 ; NO_ISEL-NEXT:    b .LBB42_2
+; NO_ISEL-NEXT:  .LBB42_1:
+; NO_ISEL-NEXT:    addi 3, 4, 0
 ; NO_ISEL-NEXT:  .LBB42_2:
 ; NO_ISEL-NEXT:    lfd 1, 0(3)
 ; NO_ISEL-NEXT:    blr
@@ -719,25 +719,25 @@ define double @sel_constants_fsub_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_fsub_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    addis 4, 2, .LCPI43_0 at toc@ha
-; ISEL-NEXT:    addis 3, 2, .LCPI43_1 at toc@ha
-; ISEL-NEXT:    addi 4, 4, .LCPI43_0 at toc@l
-; ISEL-NEXT:    addi 3, 3, .LCPI43_1 at toc@l
-; ISEL-NEXT:    iselgt 3, 3, 4
+; ISEL-NEXT:    addis 3, 2, .LCPI43_0 at toc@ha
+; ISEL-NEXT:    addis 4, 2, .LCPI43_1 at toc@ha
+; ISEL-NEXT:    addi 3, 3, .LCPI43_0 at toc@l
+; ISEL-NEXT:    addi 4, 4, .LCPI43_1 at toc@l
+; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    lfd 1, 0(3)
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_fsub_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    addis 4, 2, .LCPI43_0 at toc@ha
-; NO_ISEL-NEXT:    addis 3, 2, .LCPI43_1 at toc@ha
-; NO_ISEL-NEXT:    addi 4, 4, .LCPI43_0 at toc@l
-; NO_ISEL-NEXT:    addi 3, 3, .LCPI43_1 at toc@l
-; NO_ISEL-NEXT:    bc 12, 1, .LBB43_2
-; NO_ISEL-NEXT:  # %bb.1:
-; NO_ISEL-NEXT:    ori 3, 4, 0
+; NO_ISEL-NEXT:    addis 3, 2, .LCPI43_0 at toc@ha
+; NO_ISEL-NEXT:    addis 4, 2, .LCPI43_1 at toc@ha
+; NO_ISEL-NEXT:    addi 3, 3, .LCPI43_0 at toc@l
+; NO_ISEL-NEXT:    addi 4, 4, .LCPI43_1 at toc@l
+; NO_ISEL-NEXT:    bc 12, 1, .LBB43_1
 ; NO_ISEL-NEXT:    b .LBB43_2
+; NO_ISEL-NEXT:  .LBB43_1:
+; NO_ISEL-NEXT:    addi 3, 4, 0
 ; NO_ISEL-NEXT:  .LBB43_2:
 ; NO_ISEL-NEXT:    lfd 1, 0(3)
 ; NO_ISEL-NEXT:    blr
@@ -750,25 +750,25 @@ define double @fsub_constant_sel_constants(i1 %cond) {
 ; ISEL-LABEL: fsub_constant_sel_constants:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    addis 4, 2, .LCPI44_0 at toc@ha
-; ISEL-NEXT:    addis 3, 2, .LCPI44_1 at toc@ha
-; ISEL-NEXT:    addi 4, 4, .LCPI44_0 at toc@l
-; ISEL-NEXT:    addi 3, 3, .LCPI44_1 at toc@l
-; ISEL-NEXT:    iselgt 3, 3, 4
+; ISEL-NEXT:    addis 3, 2, .LCPI44_0 at toc@ha
+; ISEL-NEXT:    addis 4, 2, .LCPI44_1 at toc@ha
+; ISEL-NEXT:    addi 3, 3, .LCPI44_0 at toc@l
+; ISEL-NEXT:    addi 4, 4, .LCPI44_1 at toc@l
+; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    lfd 1, 0(3)
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: fsub_constant_sel_constants:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    addis 4, 2, .LCPI44_0 at toc@ha
-; NO_ISEL-NEXT:    addis 3, 2, .LCPI44_1 at toc@ha
-; NO_ISEL-NEXT:    addi 4, 4, .LCPI44_0 at toc@l
-; NO_ISEL-NEXT:    addi 3, 3, .LCPI44_1 at toc@l
-; NO_ISEL-NEXT:    bc 12, 1, .LBB44_2
-; NO_ISEL-NEXT:  # %bb.1:
-; NO_ISEL-NEXT:    ori 3, 4, 0
+; NO_ISEL-NEXT:    addis 3, 2, .LCPI44_0 at toc@ha
+; NO_ISEL-NEXT:    addis 4, 2, .LCPI44_1 at toc@ha
+; NO_ISEL-NEXT:    addi 3, 3, .LCPI44_0 at toc@l
+; NO_ISEL-NEXT:    addi 4, 4, .LCPI44_1 at toc@l
+; NO_ISEL-NEXT:    bc 12, 1, .LBB44_1
 ; NO_ISEL-NEXT:    b .LBB44_2
+; NO_ISEL-NEXT:  .LBB44_1:
+; NO_ISEL-NEXT:    addi 3, 4, 0
 ; NO_ISEL-NEXT:  .LBB44_2:
 ; NO_ISEL-NEXT:    lfd 1, 0(3)
 ; NO_ISEL-NEXT:    blr
@@ -781,25 +781,25 @@ define double @sel_constants_fmul_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_fmul_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    addis 4, 2, .LCPI45_0 at toc@ha
-; ISEL-NEXT:    addis 3, 2, .LCPI45_1 at toc@ha
-; ISEL-NEXT:    addi 4, 4, .LCPI45_0 at toc@l
-; ISEL-NEXT:    addi 3, 3, .LCPI45_1 at toc@l
-; ISEL-NEXT:    iselgt 3, 3, 4
+; ISEL-NEXT:    addis 3, 2, .LCPI45_0 at toc@ha
+; ISEL-NEXT:    addis 4, 2, .LCPI45_1 at toc@ha
+; ISEL-NEXT:    addi 3, 3, .LCPI45_0 at toc@l
+; ISEL-NEXT:    addi 4, 4, .LCPI45_1 at toc@l
+; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    lfd 1, 0(3)
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_fmul_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    addis 4, 2, .LCPI45_0 at toc@ha
-; NO_ISEL-NEXT:    addis 3, 2, .LCPI45_1 at toc@ha
-; NO_ISEL-NEXT:    addi 4, 4, .LCPI45_0 at toc@l
-; NO_ISEL-NEXT:    addi 3, 3, .LCPI45_1 at toc@l
-; NO_ISEL-NEXT:    bc 12, 1, .LBB45_2
-; NO_ISEL-NEXT:  # %bb.1:
-; NO_ISEL-NEXT:    ori 3, 4, 0
+; NO_ISEL-NEXT:    addis 3, 2, .LCPI45_0 at toc@ha
+; NO_ISEL-NEXT:    addis 4, 2, .LCPI45_1 at toc@ha
+; NO_ISEL-NEXT:    addi 3, 3, .LCPI45_0 at toc@l
+; NO_ISEL-NEXT:    addi 4, 4, .LCPI45_1 at toc@l
+; NO_ISEL-NEXT:    bc 12, 1, .LBB45_1
 ; NO_ISEL-NEXT:    b .LBB45_2
+; NO_ISEL-NEXT:  .LBB45_1:
+; NO_ISEL-NEXT:    addi 3, 4, 0
 ; NO_ISEL-NEXT:  .LBB45_2:
 ; NO_ISEL-NEXT:    lfd 1, 0(3)
 ; NO_ISEL-NEXT:    blr
@@ -812,25 +812,25 @@ define double @sel_constants_fdiv_constant(i1 %cond) {
 ; ISEL-LABEL: sel_constants_fdiv_constant:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    addis 4, 2, .LCPI46_0 at toc@ha
-; ISEL-NEXT:    addis 3, 2, .LCPI46_1 at toc@ha
-; ISEL-NEXT:    addi 4, 4, .LCPI46_0 at toc@l
-; ISEL-NEXT:    addi 3, 3, .LCPI46_1 at toc@l
-; ISEL-NEXT:    iselgt 3, 3, 4
+; ISEL-NEXT:    addis 3, 2, .LCPI46_0 at toc@ha
+; ISEL-NEXT:    addis 4, 2, .LCPI46_1 at toc@ha
+; ISEL-NEXT:    addi 3, 3, .LCPI46_0 at toc@l
+; ISEL-NEXT:    addi 4, 4, .LCPI46_1 at toc@l
+; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    lfd 1, 0(3)
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: sel_constants_fdiv_constant:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    addis 4, 2, .LCPI46_0 at toc@ha
-; NO_ISEL-NEXT:    addis 3, 2, .LCPI46_1 at toc@ha
-; NO_ISEL-NEXT:    addi 4, 4, .LCPI46_0 at toc@l
-; NO_ISEL-NEXT:    addi 3, 3, .LCPI46_1 at toc@l
-; NO_ISEL-NEXT:    bc 12, 1, .LBB46_2
-; NO_ISEL-NEXT:  # %bb.1:
-; NO_ISEL-NEXT:    ori 3, 4, 0
+; NO_ISEL-NEXT:    addis 3, 2, .LCPI46_0 at toc@ha
+; NO_ISEL-NEXT:    addis 4, 2, .LCPI46_1 at toc@ha
+; NO_ISEL-NEXT:    addi 3, 3, .LCPI46_0 at toc@l
+; NO_ISEL-NEXT:    addi 4, 4, .LCPI46_1 at toc@l
+; NO_ISEL-NEXT:    bc 12, 1, .LBB46_1
 ; NO_ISEL-NEXT:    b .LBB46_2
+; NO_ISEL-NEXT:  .LBB46_1:
+; NO_ISEL-NEXT:    addi 3, 4, 0
 ; NO_ISEL-NEXT:  .LBB46_2:
 ; NO_ISEL-NEXT:    lfd 1, 0(3)
 ; NO_ISEL-NEXT:    blr
@@ -843,25 +843,25 @@ define double @fdiv_constant_sel_constants(i1 %cond) {
 ; ISEL-LABEL: fdiv_constant_sel_constants:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    addis 4, 2, .LCPI47_0 at toc@ha
-; ISEL-NEXT:    addis 3, 2, .LCPI47_1 at toc@ha
-; ISEL-NEXT:    addi 4, 4, .LCPI47_0 at toc@l
-; ISEL-NEXT:    addi 3, 3, .LCPI47_1 at toc@l
-; ISEL-NEXT:    iselgt 3, 3, 4
+; ISEL-NEXT:    addis 3, 2, .LCPI47_0 at toc@ha
+; ISEL-NEXT:    addis 4, 2, .LCPI47_1 at toc@ha
+; ISEL-NEXT:    addi 3, 3, .LCPI47_0 at toc@l
+; ISEL-NEXT:    addi 4, 4, .LCPI47_1 at toc@l
+; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    lfd 1, 0(3)
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: fdiv_constant_sel_constants:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    addis 4, 2, .LCPI47_0 at toc@ha
-; NO_ISEL-NEXT:    addis 3, 2, .LCPI47_1 at toc@ha
-; NO_ISEL-NEXT:    addi 4, 4, .LCPI47_0 at toc@l
-; NO_ISEL-NEXT:    addi 3, 3, .LCPI47_1 at toc@l
-; NO_ISEL-NEXT:    bc 12, 1, .LBB47_2
-; NO_ISEL-NEXT:  # %bb.1:
-; NO_ISEL-NEXT:    ori 3, 4, 0
+; NO_ISEL-NEXT:    addis 3, 2, .LCPI47_0 at toc@ha
+; NO_ISEL-NEXT:    addis 4, 2, .LCPI47_1 at toc@ha
+; NO_ISEL-NEXT:    addi 3, 3, .LCPI47_0 at toc@l
+; NO_ISEL-NEXT:    addi 4, 4, .LCPI47_1 at toc@l
+; NO_ISEL-NEXT:    bc 12, 1, .LBB47_1
 ; NO_ISEL-NEXT:    b .LBB47_2
+; NO_ISEL-NEXT:  .LBB47_1:
+; NO_ISEL-NEXT:    addi 3, 4, 0
 ; NO_ISEL-NEXT:  .LBB47_2:
 ; NO_ISEL-NEXT:    lfd 1, 0(3)
 ; NO_ISEL-NEXT:    blr
@@ -894,25 +894,25 @@ define double @frem_constant_sel_constants(i1 %cond) {
 ; ISEL-LABEL: frem_constant_sel_constants:
 ; ISEL:       # %bb.0:
 ; ISEL-NEXT:    andi. 3, 3, 1
-; ISEL-NEXT:    addis 4, 2, .LCPI49_0 at toc@ha
-; ISEL-NEXT:    addis 3, 2, .LCPI49_1 at toc@ha
-; ISEL-NEXT:    addi 4, 4, .LCPI49_0 at toc@l
-; ISEL-NEXT:    addi 3, 3, .LCPI49_1 at toc@l
-; ISEL-NEXT:    iselgt 3, 3, 4
+; ISEL-NEXT:    addis 3, 2, .LCPI49_0 at toc@ha
+; ISEL-NEXT:    addis 4, 2, .LCPI49_1 at toc@ha
+; ISEL-NEXT:    addi 3, 3, .LCPI49_0 at toc@l
+; ISEL-NEXT:    addi 4, 4, .LCPI49_1 at toc@l
+; ISEL-NEXT:    iselgt 3, 4, 3
 ; ISEL-NEXT:    lfd 1, 0(3)
 ; ISEL-NEXT:    blr
 ;
 ; NO_ISEL-LABEL: frem_constant_sel_constants:
 ; NO_ISEL:       # %bb.0:
 ; NO_ISEL-NEXT:    andi. 3, 3, 1
-; NO_ISEL-NEXT:    addis 4, 2, .LCPI49_0 at toc@ha
-; NO_ISEL-NEXT:    addis 3, 2, .LCPI49_1 at toc@ha
-; NO_ISEL-NEXT:    addi 4, 4, .LCPI49_0 at toc@l
-; NO_ISEL-NEXT:    addi 3, 3, .LCPI49_1 at toc@l
-; NO_ISEL-NEXT:    bc 12, 1, .LBB49_2
-; NO_ISEL-NEXT:  # %bb.1:
-; NO_ISEL-NEXT:    ori 3, 4, 0
+; NO_ISEL-NEXT:    addis 3, 2, .LCPI49_0 at toc@ha
+; NO_ISEL-NEXT:    addis 4, 2, .LCPI49_1 at toc@ha
+; NO_ISEL-NEXT:    addi 3, 3, .LCPI49_0 at toc@l
+; NO_ISEL-NEXT:    addi 4, 4, .LCPI49_1 at toc@l
+; NO_ISEL-NEXT:    bc 12, 1, .LBB49_1
 ; NO_ISEL-NEXT:    b .LBB49_2
+; NO_ISEL-NEXT:  .LBB49_1:
+; NO_ISEL-NEXT:    addi 3, 4, 0
 ; NO_ISEL-NEXT:  .LBB49_2:
 ; NO_ISEL-NEXT:    lfd 1, 0(3)
 ; NO_ISEL-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
index 7dca47128a5b481..1c3ac17666e26c8 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
@@ -30,9 +30,9 @@ define zeroext i1 @all_sign_bits_clear(i32 %P, i32 %Q)  {
 define zeroext i1 @all_bits_set(i32 %P, i32 %Q)  {
 ; CHECK-LABEL: all_bits_set:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 5, -1
 ; CHECK-NEXT:    and 3, 3, 4
-; CHECK-NEXT:    xor 3, 3, 5
+; CHECK-NEXT:    li 4, -1
+; CHECK-NEXT:    xor 3, 3, 4
 ; CHECK-NEXT:    cntlzw 3, 3
 ; CHECK-NEXT:    srwi 3, 3, 5
 ; CHECK-NEXT:    blr
@@ -83,9 +83,9 @@ define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q)  {
 define zeroext i1 @any_bits_clear(i32 %P, i32 %Q)  {
 ; CHECK-LABEL: any_bits_clear:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 5, -1
 ; CHECK-NEXT:    and 3, 3, 4
-; CHECK-NEXT:    xor 3, 3, 5
+; CHECK-NEXT:    li 4, -1
+; CHECK-NEXT:    xor 3, 3, 4
 ; CHECK-NEXT:    cntlzw 3, 3
 ; CHECK-NEXT:    srwi 3, 3, 5
 ; CHECK-NEXT:    xori 3, 3, 1
@@ -312,9 +312,9 @@ return:
 define <4 x i1> @all_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: all_bits_clear_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxlxor 36, 36, 36
 ; CHECK-NEXT:    xxlor 34, 34, 35
-; CHECK-NEXT:    vcmpequw 2, 2, 4
+; CHECK-NEXT:    xxlxor 35, 35, 35
+; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = icmp eq <4 x i32> %P, zeroinitializer
   %b = icmp eq <4 x i32> %Q, zeroinitializer
@@ -325,9 +325,9 @@ define <4 x i1> @all_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: all_sign_bits_clear_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxleqv 36, 36, 36
 ; CHECK-NEXT:    xxlor 34, 34, 35
-; CHECK-NEXT:    vcmpgtsw 2, 2, 4
+; CHECK-NEXT:    xxleqv 35, 35, 35
+; CHECK-NEXT:    vcmpgtsw 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -338,9 +338,9 @@ define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @all_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: all_bits_set_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxleqv 36, 36, 36
 ; CHECK-NEXT:    xxland 34, 34, 35
-; CHECK-NEXT:    vcmpequw 2, 2, 4
+; CHECK-NEXT:    xxleqv 35, 35, 35
+; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = icmp eq <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = icmp eq <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -351,9 +351,9 @@ define <4 x i1> @all_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: all_sign_bits_set_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxlxor 36, 36, 36
 ; CHECK-NEXT:    xxland 34, 34, 35
-; CHECK-NEXT:    vcmpgtsw 2, 4, 2
+; CHECK-NEXT:    xxlxor 35, 35, 35
+; CHECK-NEXT:    vcmpgtsw 2, 3, 2
 ; CHECK-NEXT:    blr
   %a = icmp slt <4 x i32> %P, zeroinitializer
   %b = icmp slt <4 x i32> %Q, zeroinitializer
@@ -364,9 +364,9 @@ define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: any_bits_set_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxlxor 36, 36, 36
 ; CHECK-NEXT:    xxlor 34, 34, 35
-; CHECK-NEXT:    vcmpequw 2, 2, 4
+; CHECK-NEXT:    xxlxor 35, 35, 35
+; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    xxlnor 34, 34, 34
 ; CHECK-NEXT:    blr
   %a = icmp ne <4 x i32> %P, zeroinitializer
@@ -378,9 +378,9 @@ define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: any_sign_bits_set_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxlxor 36, 36, 36
 ; CHECK-NEXT:    xxlor 34, 34, 35
-; CHECK-NEXT:    vcmpgtsw 2, 4, 2
+; CHECK-NEXT:    xxlxor 35, 35, 35
+; CHECK-NEXT:    vcmpgtsw 2, 3, 2
 ; CHECK-NEXT:    blr
   %a = icmp slt <4 x i32> %P, zeroinitializer
   %b = icmp slt <4 x i32> %Q, zeroinitializer
@@ -391,9 +391,9 @@ define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @any_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: any_bits_clear_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxleqv 36, 36, 36
 ; CHECK-NEXT:    xxland 34, 34, 35
-; CHECK-NEXT:    vcmpequw 2, 2, 4
+; CHECK-NEXT:    xxleqv 35, 35, 35
+; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    xxlnor 34, 34, 34
 ; CHECK-NEXT:    blr
   %a = icmp ne <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -405,9 +405,9 @@ define <4 x i1> @any_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: any_sign_bits_clear_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxleqv 36, 36, 36
 ; CHECK-NEXT:    xxland 34, 34, 35
-; CHECK-NEXT:    vcmpgtsw 2, 2, 4
+; CHECK-NEXT:    xxleqv 35, 35, 35
+; CHECK-NEXT:    vcmpgtsw 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>

diff  --git a/llvm/test/CodeGen/PowerPC/setcc-vector.ll b/llvm/test/CodeGen/PowerPC/setcc-vector.ll
index 5917ccabf84ed24..c9775fbef3ae85d 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-vector.ll
@@ -20,12 +20,12 @@ define <1 x i64> @setcc_v1i128(<1 x i128> %a) {
 ;
 ; CHECK-PWR8-LABEL: setcc_v1i128:
 ; CHECK-PWR8:       # %bb.0: # %entry
-; CHECK-PWR8-NEXT:    xxswapd vs0, vs34
 ; CHECK-PWR8-NEXT:    mfvsrd r3, vs34
+; CHECK-PWR8-NEXT:    xxswapd vs0, vs34
 ; CHECK-PWR8-NEXT:    cmpdi r3, 0
+; CHECK-PWR8-NEXT:    mffprd r3, f0
+; CHECK-PWR8-NEXT:    cmpldi cr1, r3, 35708
 ; CHECK-PWR8-NEXT:    li r3, 1
-; CHECK-PWR8-NEXT:    mffprd r4, f0
-; CHECK-PWR8-NEXT:    cmpldi cr1, r4, 35708
 ; CHECK-PWR8-NEXT:    crnand 4*cr5+lt, eq, 4*cr1+lt
 ; CHECK-PWR8-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; CHECK-PWR8-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/sext-vector-inreg.ll b/llvm/test/CodeGen/PowerPC/sext-vector-inreg.ll
index 11d4d15293d4ade..0725eb27b57b024 100644
--- a/llvm/test/CodeGen/PowerPC/sext-vector-inreg.ll
+++ b/llvm/test/CodeGen/PowerPC/sext-vector-inreg.ll
@@ -10,8 +10,8 @@ define <4 x i32> @test_signext_vector_inreg(<4 x i16> %n) {
 ;
 ; CHECK-P8-LABEL: test_signext_vector_inreg:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vmrglh 2, 2, 2
 ; CHECK-P8-NEXT:    vspltisw 3, 8
+; CHECK-P8-NEXT:    vmrglh 2, 2, 2
 ; CHECK-P8-NEXT:    vadduwm 3, 3, 3
 ; CHECK-P8-NEXT:    vslw 2, 2, 3
 ; CHECK-P8-NEXT:    vsraw 2, 2, 3

diff  --git a/llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll b/llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll
index 8a2927d0f451d89..128d546d176f8b7 100644
--- a/llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll
@@ -68,8 +68,8 @@ top:
 define i16 @noSEXTLoad(ptr %p) #0 {
 ; CHECK-LABEL: noSEXTLoad:
 ; CHECK:       # %bb.0: # %top
-; CHECK-NEXT:    lha 3, 0(3)
 ; CHECK-NEXT:    li 4, 0
+; CHECK-NEXT:    lha 3, 0(3)
 ; CHECK-NEXT:    sth 4, -4(1)
 ; CHECK-NEXT:    addi 4, 1, -4
 ; CHECK-NEXT:    lwsync

diff  --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
index 4f1009c00edde06..e8cedd47d812db6 100644
--- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
@@ -29,10 +29,10 @@ define i32 @add_zext_ifpos(i32 %x) {
 define <4 x i32> @add_zext_ifpos_vec_splat(<4 x i32> %x) {
 ; CHECK-LABEL: add_zext_ifpos_vec_splat:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxleqv 35, 35, 35
 ; CHECK-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
+; CHECK-NEXT:    xxleqv 35, 35, 35
 ; CHECK-NEXT:    vcmpgtsw 2, 2, 3
+; CHECK-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
 ; CHECK-NEXT:    lxvd2x 35, 0, 3
 ; CHECK-NEXT:    vsubuwm 2, 3, 2
 ; CHECK-NEXT:    blr
@@ -80,10 +80,10 @@ define i32 @add_sext_ifpos(i32 %x) {
 define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) {
 ; CHECK-LABEL: add_sext_ifpos_vec_splat:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xxleqv 35, 35, 35
 ; CHECK-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
+; CHECK-NEXT:    xxleqv 35, 35, 35
 ; CHECK-NEXT:    vcmpgtsw 2, 2, 3
+; CHECK-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
 ; CHECK-NEXT:    lxvd2x 35, 0, 3
 ; CHECK-NEXT:    vadduwm 2, 2, 3
 ; CHECK-NEXT:    blr
@@ -191,8 +191,8 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
 ; CHECK-NEXT:    vspltisw 3, -16
 ; CHECK-NEXT:    vspltisw 4, 15
 ; CHECK-NEXT:    addis 3, 2, .LCPI15_0 at toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI15_0 at toc@l
 ; CHECK-NEXT:    vsubuwm 3, 4, 3
+; CHECK-NEXT:    addi 3, 3, .LCPI15_0 at toc@l
 ; CHECK-NEXT:    vsraw 2, 2, 3
 ; CHECK-NEXT:    lxvd2x 35, 0, 3
 ; CHECK-NEXT:    vadduwm 2, 2, 3
@@ -221,8 +221,8 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
 ; CHECK-NEXT:    vspltisw 3, -16
 ; CHECK-NEXT:    vspltisw 4, 15
 ; CHECK-NEXT:    addis 3, 2, .LCPI17_0 at toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI17_0 at toc@l
 ; CHECK-NEXT:    vsubuwm 3, 4, 3
+; CHECK-NEXT:    addi 3, 3, .LCPI17_0 at toc@l
 ; CHECK-NEXT:    vsrw 2, 2, 3
 ; CHECK-NEXT:    lxvd2x 35, 0, 3
 ; CHECK-NEXT:    vadduwm 2, 2, 3
@@ -275,8 +275,8 @@ define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
 ; CHECK-NEXT:    vspltisw 3, -16
 ; CHECK-NEXT:    vspltisw 4, 15
 ; CHECK-NEXT:    addis 3, 2, .LCPI21_0 at toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI21_0 at toc@l
 ; CHECK-NEXT:    vsubuwm 3, 4, 3
+; CHECK-NEXT:    addi 3, 3, .LCPI21_0 at toc@l
 ; CHECK-NEXT:    vsraw 2, 2, 3
 ; CHECK-NEXT:    lxvd2x 35, 0, 3
 ; CHECK-NEXT:    vadduwm 2, 2, 3

diff  --git a/llvm/test/CodeGen/PowerPC/sms-remark.ll b/llvm/test/CodeGen/PowerPC/sms-remark.ll
index 11e324b4f1ba346..7fb0ffd6dc4d222 100644
--- a/llvm/test/CodeGen/PowerPC/sms-remark.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-remark.ll
@@ -5,6 +5,10 @@
 ; RUN: llc < %s -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:       -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr8 --ppc-enable-pipeliner \
 ; RUN:       -pass-remarks-analysis=pipeliner -pass-remarks=pipeliner -o /dev/null 2>&1 \
+; RUN:       | FileCheck %s --check-prefix=ENABLED
+; RUN: llc < %s -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:       -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr7 --ppc-enable-pipeliner \
+; RUN:       -pass-remarks-analysis=pipeliner -pass-remarks=pipeliner -o /dev/null 2>&1 \
 ; RUN:       | FileCheck %s --allow-empty --check-prefix=DISABLED
 
 @x = dso_local local_unnamed_addr global <{ i32, i32, i32, i32, [1020 x i32] }> <{ i32 1, i32 2, i32 3, i32 4, [1020 x i32] zeroinitializer }>, align 4

diff  --git a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
index e7a0f149ac976d9..65068d14e160055 100644
--- a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
@@ -64,20 +64,20 @@ define i1 @test_srem_even(i4 %X) nounwind {
 ;
 ; PPC64LE-LABEL: test_srem_even:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    slwi 5, 3, 28
-; PPC64LE-NEXT:    li 4, 0
-; PPC64LE-NEXT:    srawi 5, 5, 28
-; PPC64LE-NEXT:    slwi 6, 5, 1
-; PPC64LE-NEXT:    add 5, 5, 6
-; PPC64LE-NEXT:    rlwinm 6, 5, 25, 31, 31
-; PPC64LE-NEXT:    srwi 5, 5, 4
-; PPC64LE-NEXT:    add 5, 5, 6
-; PPC64LE-NEXT:    mulli 5, 5, 6
-; PPC64LE-NEXT:    sub 3, 3, 5
+; PPC64LE-NEXT:    slwi 4, 3, 28
+; PPC64LE-NEXT:    srawi 4, 4, 28
+; PPC64LE-NEXT:    slwi 5, 4, 1
+; PPC64LE-NEXT:    add 4, 4, 5
+; PPC64LE-NEXT:    rlwinm 5, 4, 25, 31, 31
+; PPC64LE-NEXT:    srwi 4, 4, 4
+; PPC64LE-NEXT:    add 4, 4, 5
+; PPC64LE-NEXT:    mulli 4, 4, 6
+; PPC64LE-NEXT:    sub 3, 3, 4
+; PPC64LE-NEXT:    li 4, 1
 ; PPC64LE-NEXT:    clrlwi 3, 3, 28
 ; PPC64LE-NEXT:    cmpwi 3, 1
-; PPC64LE-NEXT:    li 3, 1
-; PPC64LE-NEXT:    iseleq 3, 3, 4
+; PPC64LE-NEXT:    li 3, 0
+; PPC64LE-NEXT:    iseleq 3, 4, 3
 ; PPC64LE-NEXT:    blr
   %srem = srem i4 %X, 6
   %cmp = icmp eq i4 %srem, 1
@@ -187,60 +187,60 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    lis 6, 1820
 ; PPC64LE-NEXT:    sldi 3, 3, 31
-; PPC64LE-NEXT:    ori 6, 6, 29127
 ; PPC64LE-NEXT:    sldi 4, 4, 31
-; PPC64LE-NEXT:    rldic 6, 6, 34, 3
+; PPC64LE-NEXT:    sldi 5, 5, 31
+; PPC64LE-NEXT:    ori 6, 6, 29127
 ; PPC64LE-NEXT:    sradi 3, 3, 31
-; PPC64LE-NEXT:    oris 6, 6, 29127
 ; PPC64LE-NEXT:    sradi 4, 4, 31
+; PPC64LE-NEXT:    sradi 5, 5, 31
+; PPC64LE-NEXT:    rldic 6, 6, 34, 3
+; PPC64LE-NEXT:    oris 6, 6, 29127
 ; PPC64LE-NEXT:    ori 7, 6, 7282
-; PPC64LE-NEXT:    sldi 5, 5, 31
-; PPC64LE-NEXT:    ori 6, 6, 7281
 ; PPC64LE-NEXT:    mulhd 8, 3, 7
-; PPC64LE-NEXT:    mulhd 7, 4, 7
-; PPC64LE-NEXT:    sradi 5, 5, 31
-; PPC64LE-NEXT:    mulhd 6, 5, 6
 ; PPC64LE-NEXT:    rldicl 9, 8, 1, 63
-; PPC64LE-NEXT:    rldicl 10, 7, 1, 63
 ; PPC64LE-NEXT:    add 8, 8, 9
-; PPC64LE-NEXT:    add 7, 7, 10
-; PPC64LE-NEXT:    sldi 10, 8, 3
-; PPC64LE-NEXT:    sub 6, 6, 5
-; PPC64LE-NEXT:    add 8, 8, 10
-; PPC64LE-NEXT:    sldi 10, 7, 3
-; PPC64LE-NEXT:    rldicl 9, 6, 1, 63
-; PPC64LE-NEXT:    add 7, 7, 10
+; PPC64LE-NEXT:    sldi 9, 8, 3
+; PPC64LE-NEXT:    add 8, 8, 9
 ; PPC64LE-NEXT:    sub 3, 3, 8
-; PPC64LE-NEXT:    addis 8, 2, .LCPI3_1 at toc@ha
-; PPC64LE-NEXT:    sradi 6, 6, 3
-; PPC64LE-NEXT:    sub 4, 4, 7
 ; PPC64LE-NEXT:    mtfprd 0, 3
-; PPC64LE-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
-; PPC64LE-NEXT:    addi 7, 8, .LCPI3_1 at toc@l
-; PPC64LE-NEXT:    add 6, 6, 9
-; PPC64LE-NEXT:    mtfprd 1, 4
-; PPC64LE-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
-; PPC64LE-NEXT:    lxvd2x 2, 0, 7
-; PPC64LE-NEXT:    sldi 8, 6, 3
-; PPC64LE-NEXT:    lxvd2x 3, 0, 3
-; PPC64LE-NEXT:    add 4, 6, 8
-; PPC64LE-NEXT:    addis 6, 2, .LCPI3_2 at toc@ha
+; PPC64LE-NEXT:    mulhd 3, 4, 7
+; PPC64LE-NEXT:    rldicl 7, 3, 1, 63
+; PPC64LE-NEXT:    add 3, 3, 7
+; PPC64LE-NEXT:    sldi 7, 3, 3
+; PPC64LE-NEXT:    add 3, 3, 7
+; PPC64LE-NEXT:    sub 3, 4, 3
+; PPC64LE-NEXT:    mtfprd 1, 3
+; PPC64LE-NEXT:    ori 3, 6, 7281
+; PPC64LE-NEXT:    mulhd 3, 5, 3
+; PPC64LE-NEXT:    sub 3, 3, 5
+; PPC64LE-NEXT:    rldicl 4, 3, 1, 63
+; PPC64LE-NEXT:    sradi 3, 3, 3
+; PPC64LE-NEXT:    add 3, 3, 4
+; PPC64LE-NEXT:    sldi 4, 3, 3
+; PPC64LE-NEXT:    add 3, 3, 4
+; PPC64LE-NEXT:    add 3, 5, 3
 ; PPC64LE-NEXT:    xxmrghd 34, 1, 0
-; PPC64LE-NEXT:    add 3, 5, 4
-; PPC64LE-NEXT:    addi 4, 6, .LCPI3_2 at toc@l
-; PPC64LE-NEXT:    xxswapd 35, 2
 ; PPC64LE-NEXT:    mtfprd 0, 3
-; PPC64LE-NEXT:    lxvd2x 1, 0, 4
-; PPC64LE-NEXT:    xxland 34, 34, 3
+; PPC64LE-NEXT:    addis 3, 2, .LCPI3_1 at toc@ha
+; PPC64LE-NEXT:    addi 3, 3, .LCPI3_1 at toc@l
+; PPC64LE-NEXT:    xxswapd 35, 0
+; PPC64LE-NEXT:    lxvd2x 0, 0, 3
+; PPC64LE-NEXT:    addis 3, 2, .LCPI3_2 at toc@ha
+; PPC64LE-NEXT:    addi 3, 3, .LCPI3_2 at toc@l
 ; PPC64LE-NEXT:    xxswapd 36, 0
-; PPC64LE-NEXT:    vcmpequd 2, 2, 3
-; PPC64LE-NEXT:    xxswapd 35, 1
-; PPC64LE-NEXT:    xxland 36, 36, 3
-; PPC64LE-NEXT:    vcmpequd 3, 4, 3
+; PPC64LE-NEXT:    lxvd2x 0, 0, 3
+; PPC64LE-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
+; PPC64LE-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
+; PPC64LE-NEXT:    xxswapd 37, 0
+; PPC64LE-NEXT:    lxvd2x 0, 0, 3
+; PPC64LE-NEXT:    xxland 34, 34, 0
+; PPC64LE-NEXT:    xxland 35, 35, 0
+; PPC64LE-NEXT:    vcmpequd 2, 2, 4
 ; PPC64LE-NEXT:    xxlnor 0, 34, 34
-; PPC64LE-NEXT:    xxswapd 1, 0
+; PPC64LE-NEXT:    vcmpequd 2, 3, 5
+; PPC64LE-NEXT:    xxlnor 34, 34, 34
 ; PPC64LE-NEXT:    mffprwz 4, 0
-; PPC64LE-NEXT:    xxlnor 34, 35, 35
+; PPC64LE-NEXT:    xxswapd 1, 0
 ; PPC64LE-NEXT:    mffprwz 3, 1
 ; PPC64LE-NEXT:    xxswapd 2, 34
 ; PPC64LE-NEXT:    mffprwz 5, 2

diff  --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
index 558ab57e1ecb053..a263b56ce70ceb9 100644
--- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
@@ -129,113 +129,113 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P8LE-LABEL: fold_srem_vec_1:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r3, 21399
-; P8LE-NEXT:    lis r8, -16728
-; P8LE-NEXT:    lis r9, -21386
-; P8LE-NEXT:    lis r10, 31710
-; P8LE-NEXT:    ori r3, r3, 33437
-; P8LE-NEXT:    ori r8, r8, 63249
-; P8LE-NEXT:    ori r9, r9, 37253
-; P8LE-NEXT:    ori r10, r10, 63421
-; P8LE-NEXT:    mffprd r4, f0
-; P8LE-NEXT:    rldicl r5, r4, 32, 48
-; P8LE-NEXT:    rldicl r6, r4, 16, 48
-; P8LE-NEXT:    clrldi r7, r4, 48
-; P8LE-NEXT:    extsh r5, r5
-; P8LE-NEXT:    extsh r6, r6
-; P8LE-NEXT:    rldicl r4, r4, 48, 48
+; P8LE-NEXT:    lis r4, 21399
+; P8LE-NEXT:    lis r5, -16728
+; P8LE-NEXT:    lis r6, -21386
+; P8LE-NEXT:    mffprd r3, f0
+; P8LE-NEXT:    ori r4, r4, 33437
+; P8LE-NEXT:    ori r5, r5, 63249
+; P8LE-NEXT:    ori r6, r6, 37253
+; P8LE-NEXT:    rldicl r7, r3, 32, 48
+; P8LE-NEXT:    rldicl r8, r3, 16, 48
+; P8LE-NEXT:    clrldi r9, r3, 48
+; P8LE-NEXT:    rldicl r3, r3, 48, 48
 ; P8LE-NEXT:    extsh r7, r7
-; P8LE-NEXT:    mulhw r3, r5, r3
-; P8LE-NEXT:    extsh r4, r4
-; P8LE-NEXT:    mulhw r8, r6, r8
-; P8LE-NEXT:    mulhw r9, r7, r9
-; P8LE-NEXT:    mulhw r10, r4, r10
-; P8LE-NEXT:    srwi r11, r3, 31
-; P8LE-NEXT:    srawi r3, r3, 5
-; P8LE-NEXT:    add r3, r3, r11
-; P8LE-NEXT:    srwi r11, r8, 31
-; P8LE-NEXT:    add r9, r9, r7
-; P8LE-NEXT:    srawi r8, r8, 8
-; P8LE-NEXT:    sub r10, r10, r4
-; P8LE-NEXT:    add r8, r8, r11
-; P8LE-NEXT:    srwi r11, r9, 31
-; P8LE-NEXT:    srawi r9, r9, 6
-; P8LE-NEXT:    mulli r3, r3, 98
-; P8LE-NEXT:    add r9, r9, r11
-; P8LE-NEXT:    srwi r11, r10, 31
-; P8LE-NEXT:    srawi r10, r10, 6
-; P8LE-NEXT:    mulli r8, r8, -1003
-; P8LE-NEXT:    add r10, r10, r11
-; P8LE-NEXT:    mulli r9, r9, 95
-; P8LE-NEXT:    mulli r10, r10, -124
-; P8LE-NEXT:    sub r3, r5, r3
-; P8LE-NEXT:    mtvsrd v2, r3
-; P8LE-NEXT:    sub r5, r6, r8
-; P8LE-NEXT:    sub r3, r7, r9
+; P8LE-NEXT:    extsh r8, r8
+; P8LE-NEXT:    extsh r9, r9
+; P8LE-NEXT:    extsh r3, r3
+; P8LE-NEXT:    mulhw r4, r7, r4
+; P8LE-NEXT:    mulhw r5, r8, r5
+; P8LE-NEXT:    mulhw r6, r9, r6
+; P8LE-NEXT:    srwi r10, r4, 31
+; P8LE-NEXT:    srawi r4, r4, 5
+; P8LE-NEXT:    add r6, r6, r9
+; P8LE-NEXT:    add r4, r4, r10
+; P8LE-NEXT:    srwi r10, r5, 31
+; P8LE-NEXT:    srawi r5, r5, 8
+; P8LE-NEXT:    mulli r4, r4, 98
+; P8LE-NEXT:    add r5, r5, r10
+; P8LE-NEXT:    srwi r10, r6, 31
+; P8LE-NEXT:    srawi r6, r6, 6
+; P8LE-NEXT:    add r6, r6, r10
+; P8LE-NEXT:    mulli r5, r5, -1003
+; P8LE-NEXT:    sub r4, r7, r4
+; P8LE-NEXT:    mtvsrd v2, r4
+; P8LE-NEXT:    mulli r4, r6, 95
+; P8LE-NEXT:    sub r5, r8, r5
 ; P8LE-NEXT:    mtvsrd v3, r5
-; P8LE-NEXT:    sub r4, r4, r10
-; P8LE-NEXT:    mtvsrd v4, r3
-; P8LE-NEXT:    mtvsrd v5, r4
+; P8LE-NEXT:    sub r4, r9, r4
+; P8LE-NEXT:    mtvsrd v4, r4
+; P8LE-NEXT:    lis r4, 31710
+; P8LE-NEXT:    ori r4, r4, 63421
+; P8LE-NEXT:    mulhw r4, r3, r4
+; P8LE-NEXT:    sub r4, r4, r3
+; P8LE-NEXT:    srwi r5, r4, 31
+; P8LE-NEXT:    srawi r4, r4, 6
+; P8LE-NEXT:    add r4, r4, r5
+; P8LE-NEXT:    mulli r4, r4, -124
+; P8LE-NEXT:    sub r3, r3, r4
 ; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    vmrghh v3, v5, v4
+; P8LE-NEXT:    mtvsrd v3, r3
+; P8LE-NEXT:    vmrghh v3, v3, v4
 ; P8LE-NEXT:    xxmrglw v2, v2, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_srem_vec_1:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r4, v2
-; P8BE-NEXT:    lis r3, -16728
-; P8BE-NEXT:    lis r8, 21399
-; P8BE-NEXT:    lis r9, 31710
-; P8BE-NEXT:    lis r10, -21386
-; P8BE-NEXT:    ori r3, r3, 63249
-; P8BE-NEXT:    ori r8, r8, 33437
-; P8BE-NEXT:    ori r9, r9, 63421
-; P8BE-NEXT:    ori r10, r10, 37253
-; P8BE-NEXT:    clrldi r5, r4, 48
-; P8BE-NEXT:    rldicl r6, r4, 48, 48
-; P8BE-NEXT:    rldicl r7, r4, 32, 48
-; P8BE-NEXT:    extsh r5, r5
+; P8BE-NEXT:    mfvsrd r3, v2
+; P8BE-NEXT:    addis r6, r2, .LCPI0_0 at toc@ha
+; P8BE-NEXT:    lis r4, -16728
+; P8BE-NEXT:    lis r5, 21399
+; P8BE-NEXT:    lis r7, 31710
+; P8BE-NEXT:    addi r6, r6, .LCPI0_0 at toc@l
+; P8BE-NEXT:    ori r4, r4, 63249
+; P8BE-NEXT:    ori r5, r5, 33437
+; P8BE-NEXT:    ori r7, r7, 63421
+; P8BE-NEXT:    lxvw4x v2, 0, r6
+; P8BE-NEXT:    clrldi r6, r3, 48
+; P8BE-NEXT:    rldicl r8, r3, 48, 48
+; P8BE-NEXT:    rldicl r9, r3, 32, 48
+; P8BE-NEXT:    rldicl r3, r3, 16, 48
 ; P8BE-NEXT:    extsh r6, r6
-; P8BE-NEXT:    rldicl r4, r4, 16, 48
-; P8BE-NEXT:    extsh r7, r7
-; P8BE-NEXT:    mulhw r3, r5, r3
-; P8BE-NEXT:    extsh r4, r4
-; P8BE-NEXT:    mulhw r8, r6, r8
-; P8BE-NEXT:    mulhw r9, r7, r9
-; P8BE-NEXT:    mulhw r10, r4, r10
-; P8BE-NEXT:    srwi r11, r3, 31
-; P8BE-NEXT:    srawi r3, r3, 8
-; P8BE-NEXT:    add r3, r3, r11
-; P8BE-NEXT:    srwi r11, r8, 31
-; P8BE-NEXT:    sub r9, r9, r7
-; P8BE-NEXT:    srawi r8, r8, 5
-; P8BE-NEXT:    add r10, r10, r4
-; P8BE-NEXT:    add r8, r8, r11
-; P8BE-NEXT:    srwi r11, r9, 31
-; P8BE-NEXT:    srawi r9, r9, 6
-; P8BE-NEXT:    mulli r3, r3, -1003
-; P8BE-NEXT:    add r9, r9, r11
-; P8BE-NEXT:    srwi r11, r10, 31
-; P8BE-NEXT:    srawi r10, r10, 6
-; P8BE-NEXT:    mulli r8, r8, 98
-; P8BE-NEXT:    add r10, r10, r11
-; P8BE-NEXT:    mulli r9, r9, -124
-; P8BE-NEXT:    mulli r10, r10, 95
-; P8BE-NEXT:    sub r3, r5, r3
-; P8BE-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
-; P8BE-NEXT:    mtvsrwz v2, r3
-; P8BE-NEXT:    addi r3, r5, .LCPI0_0 at toc@l
-; P8BE-NEXT:    sub r6, r6, r8
-; P8BE-NEXT:    lxvw4x v3, 0, r3
-; P8BE-NEXT:    sub r3, r7, r9
-; P8BE-NEXT:    mtvsrwz v4, r6
-; P8BE-NEXT:    sub r4, r4, r10
-; P8BE-NEXT:    mtvsrwz v5, r3
-; P8BE-NEXT:    mtvsrwz v0, r4
-; P8BE-NEXT:    vperm v2, v4, v2, v3
-; P8BE-NEXT:    vperm v3, v0, v5, v3
-; P8BE-NEXT:    xxmrghw v2, v3, v2
+; P8BE-NEXT:    extsh r8, r8
+; P8BE-NEXT:    extsh r9, r9
+; P8BE-NEXT:    extsh r3, r3
+; P8BE-NEXT:    mulhw r4, r6, r4
+; P8BE-NEXT:    mulhw r5, r8, r5
+; P8BE-NEXT:    mulhw r7, r9, r7
+; P8BE-NEXT:    srwi r10, r4, 31
+; P8BE-NEXT:    srawi r4, r4, 8
+; P8BE-NEXT:    sub r7, r7, r9
+; P8BE-NEXT:    add r4, r4, r10
+; P8BE-NEXT:    srwi r10, r5, 31
+; P8BE-NEXT:    srawi r5, r5, 5
+; P8BE-NEXT:    mulli r4, r4, -1003
+; P8BE-NEXT:    add r5, r5, r10
+; P8BE-NEXT:    srwi r10, r7, 31
+; P8BE-NEXT:    srawi r7, r7, 6
+; P8BE-NEXT:    add r7, r7, r10
+; P8BE-NEXT:    mulli r5, r5, 98
+; P8BE-NEXT:    sub r4, r6, r4
+; P8BE-NEXT:    mtvsrwz v3, r4
+; P8BE-NEXT:    mulli r4, r7, -124
+; P8BE-NEXT:    sub r5, r8, r5
+; P8BE-NEXT:    mtvsrwz v4, r5
+; P8BE-NEXT:    sub r4, r9, r4
+; P8BE-NEXT:    mtvsrwz v5, r4
+; P8BE-NEXT:    lis r4, -21386
+; P8BE-NEXT:    ori r4, r4, 37253
+; P8BE-NEXT:    mulhw r4, r3, r4
+; P8BE-NEXT:    add r4, r4, r3
+; P8BE-NEXT:    srwi r5, r4, 31
+; P8BE-NEXT:    srawi r4, r4, 6
+; P8BE-NEXT:    add r4, r4, r5
+; P8BE-NEXT:    mulli r4, r4, 95
+; P8BE-NEXT:    sub r3, r3, r4
+; P8BE-NEXT:    vperm v3, v4, v3, v2
+; P8BE-NEXT:    mtvsrwz v4, r3
+; P8BE-NEXT:    vperm v2, v4, v5, v2
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
   ret <4 x i16> %1
@@ -354,27 +354,27 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P8LE-LABEL: fold_srem_vec_2:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r3, -21386
-; P8LE-NEXT:    ori r3, r3, 37253
-; P8LE-NEXT:    mffprd r4, f0
-; P8LE-NEXT:    clrldi r5, r4, 48
-; P8LE-NEXT:    rldicl r6, r4, 48, 48
+; P8LE-NEXT:    lis r4, -21386
+; P8LE-NEXT:    mffprd r3, f0
+; P8LE-NEXT:    ori r4, r4, 37253
+; P8LE-NEXT:    clrldi r5, r3, 48
+; P8LE-NEXT:    rldicl r6, r3, 48, 48
+; P8LE-NEXT:    rldicl r7, r3, 32, 48
+; P8LE-NEXT:    rldicl r3, r3, 16, 48
 ; P8LE-NEXT:    extsh r5, r5
-; P8LE-NEXT:    rldicl r7, r4, 32, 48
 ; P8LE-NEXT:    extsh r6, r6
-; P8LE-NEXT:    mulhw r8, r5, r3
-; P8LE-NEXT:    rldicl r4, r4, 16, 48
 ; P8LE-NEXT:    extsh r7, r7
-; P8LE-NEXT:    mulhw r9, r6, r3
-; P8LE-NEXT:    extsh r4, r4
-; P8LE-NEXT:    mulhw r10, r7, r3
-; P8LE-NEXT:    mulhw r3, r4, r3
+; P8LE-NEXT:    extsh r3, r3
+; P8LE-NEXT:    mulhw r8, r5, r4
+; P8LE-NEXT:    mulhw r9, r6, r4
+; P8LE-NEXT:    mulhw r10, r7, r4
+; P8LE-NEXT:    mulhw r4, r3, r4
 ; P8LE-NEXT:    add r8, r8, r5
 ; P8LE-NEXT:    add r9, r9, r6
+; P8LE-NEXT:    add r10, r10, r7
+; P8LE-NEXT:    add r4, r4, r3
 ; P8LE-NEXT:    srwi r11, r8, 31
 ; P8LE-NEXT:    srawi r8, r8, 6
-; P8LE-NEXT:    add r10, r10, r7
-; P8LE-NEXT:    add r3, r3, r4
 ; P8LE-NEXT:    add r8, r8, r11
 ; P8LE-NEXT:    srwi r11, r9, 31
 ; P8LE-NEXT:    srawi r9, r9, 6
@@ -382,49 +382,49 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P8LE-NEXT:    add r9, r9, r11
 ; P8LE-NEXT:    srwi r11, r10, 31
 ; P8LE-NEXT:    srawi r10, r10, 6
-; P8LE-NEXT:    mulli r9, r9, 95
 ; P8LE-NEXT:    add r10, r10, r11
-; P8LE-NEXT:    srwi r11, r3, 31
-; P8LE-NEXT:    srawi r3, r3, 6
-; P8LE-NEXT:    mulli r10, r10, 95
+; P8LE-NEXT:    srwi r11, r4, 31
+; P8LE-NEXT:    srawi r4, r4, 6
+; P8LE-NEXT:    add r4, r4, r11
 ; P8LE-NEXT:    sub r5, r5, r8
-; P8LE-NEXT:    add r3, r3, r11
+; P8LE-NEXT:    mulli r8, r9, 95
+; P8LE-NEXT:    mulli r4, r4, 95
 ; P8LE-NEXT:    mtvsrd v2, r5
-; P8LE-NEXT:    mulli r3, r3, 95
-; P8LE-NEXT:    sub r6, r6, r9
+; P8LE-NEXT:    sub r6, r6, r8
+; P8LE-NEXT:    mulli r8, r10, 95
+; P8LE-NEXT:    sub r3, r3, r4
 ; P8LE-NEXT:    mtvsrd v3, r6
-; P8LE-NEXT:    sub r5, r7, r10
-; P8LE-NEXT:    mtvsrd v4, r5
-; P8LE-NEXT:    sub r3, r4, r3
+; P8LE-NEXT:    sub r7, r7, r8
+; P8LE-NEXT:    mtvsrd v4, r7
 ; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mtvsrd v5, r3
-; P8LE-NEXT:    vmrghh v3, v5, v4
+; P8LE-NEXT:    mtvsrd v3, r3
+; P8LE-NEXT:    vmrghh v3, v3, v4
 ; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_srem_vec_2:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r4, v2
-; P8BE-NEXT:    lis r3, -21386
-; P8BE-NEXT:    ori r3, r3, 37253
-; P8BE-NEXT:    clrldi r5, r4, 48
-; P8BE-NEXT:    rldicl r6, r4, 48, 48
+; P8BE-NEXT:    mfvsrd r3, v2
+; P8BE-NEXT:    lis r4, -21386
+; P8BE-NEXT:    ori r4, r4, 37253
+; P8BE-NEXT:    clrldi r5, r3, 48
+; P8BE-NEXT:    rldicl r6, r3, 48, 48
+; P8BE-NEXT:    rldicl r7, r3, 32, 48
+; P8BE-NEXT:    rldicl r3, r3, 16, 48
 ; P8BE-NEXT:    extsh r5, r5
-; P8BE-NEXT:    rldicl r7, r4, 32, 48
 ; P8BE-NEXT:    extsh r6, r6
-; P8BE-NEXT:    mulhw r8, r5, r3
-; P8BE-NEXT:    rldicl r4, r4, 16, 48
 ; P8BE-NEXT:    extsh r7, r7
-; P8BE-NEXT:    mulhw r9, r6, r3
-; P8BE-NEXT:    extsh r4, r4
-; P8BE-NEXT:    mulhw r10, r7, r3
-; P8BE-NEXT:    mulhw r3, r4, r3
+; P8BE-NEXT:    extsh r3, r3
+; P8BE-NEXT:    mulhw r8, r5, r4
+; P8BE-NEXT:    mulhw r9, r6, r4
+; P8BE-NEXT:    mulhw r10, r7, r4
+; P8BE-NEXT:    mulhw r4, r3, r4
 ; P8BE-NEXT:    add r8, r8, r5
 ; P8BE-NEXT:    add r9, r9, r6
+; P8BE-NEXT:    add r10, r10, r7
+; P8BE-NEXT:    add r4, r4, r3
 ; P8BE-NEXT:    srwi r11, r8, 31
 ; P8BE-NEXT:    srawi r8, r8, 6
-; P8BE-NEXT:    add r10, r10, r7
-; P8BE-NEXT:    add r3, r3, r4
 ; P8BE-NEXT:    add r8, r8, r11
 ; P8BE-NEXT:    srwi r11, r9, 31
 ; P8BE-NEXT:    srawi r9, r9, 6
@@ -432,27 +432,27 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P8BE-NEXT:    add r9, r9, r11
 ; P8BE-NEXT:    srwi r11, r10, 31
 ; P8BE-NEXT:    srawi r10, r10, 6
-; P8BE-NEXT:    mulli r9, r9, 95
 ; P8BE-NEXT:    add r10, r10, r11
-; P8BE-NEXT:    srwi r11, r3, 31
-; P8BE-NEXT:    srawi r3, r3, 6
-; P8BE-NEXT:    mulli r10, r10, 95
+; P8BE-NEXT:    srwi r11, r4, 31
+; P8BE-NEXT:    srawi r4, r4, 6
+; P8BE-NEXT:    add r4, r4, r11
+; P8BE-NEXT:    addis r11, r2, .LCPI1_0 at toc@ha
 ; P8BE-NEXT:    sub r5, r5, r8
-; P8BE-NEXT:    addis r8, r2, .LCPI1_0 at toc@ha
-; P8BE-NEXT:    add r3, r3, r11
-; P8BE-NEXT:    mtvsrwz v2, r5
-; P8BE-NEXT:    addi r5, r8, .LCPI1_0 at toc@l
-; P8BE-NEXT:    mulli r3, r3, 95
-; P8BE-NEXT:    sub r6, r6, r9
-; P8BE-NEXT:    lxvw4x v3, 0, r5
+; P8BE-NEXT:    mulli r8, r9, 95
+; P8BE-NEXT:    addi r11, r11, .LCPI1_0 at toc@l
+; P8BE-NEXT:    mulli r4, r4, 95
+; P8BE-NEXT:    mtvsrwz v3, r5
+; P8BE-NEXT:    lxvw4x v2, 0, r11
+; P8BE-NEXT:    sub r6, r6, r8
+; P8BE-NEXT:    mulli r8, r10, 95
+; P8BE-NEXT:    sub r3, r3, r4
 ; P8BE-NEXT:    mtvsrwz v4, r6
-; P8BE-NEXT:    sub r5, r7, r10
-; P8BE-NEXT:    mtvsrwz v5, r5
-; P8BE-NEXT:    sub r3, r4, r3
-; P8BE-NEXT:    vperm v2, v4, v2, v3
-; P8BE-NEXT:    mtvsrwz v0, r3
-; P8BE-NEXT:    vperm v3, v0, v5, v3
-; P8BE-NEXT:    xxmrghw v2, v3, v2
+; P8BE-NEXT:    sub r7, r7, r8
+; P8BE-NEXT:    mtvsrwz v5, r7
+; P8BE-NEXT:    vperm v3, v4, v3, v2
+; P8BE-NEXT:    mtvsrwz v4, r3
+; P8BE-NEXT:    vperm v2, v4, v5, v2
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   ret <4 x i16> %1
@@ -589,121 +589,119 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P8LE-LABEL: combine_srem_sdiv:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r3, -21386
-; P8LE-NEXT:    ori r3, r3, 37253
-; P8LE-NEXT:    mffprd r4, f0
-; P8LE-NEXT:    clrldi r5, r4, 48
-; P8LE-NEXT:    rldicl r6, r4, 48, 48
-; P8LE-NEXT:    rldicl r7, r4, 32, 48
-; P8LE-NEXT:    extsh r5, r5
+; P8LE-NEXT:    lis r4, -21386
+; P8LE-NEXT:    mffprd r3, f0
+; P8LE-NEXT:    ori r4, r4, 37253
+; P8LE-NEXT:    rldicl r6, r3, 48, 48
+; P8LE-NEXT:    rldicl r7, r3, 32, 48
+; P8LE-NEXT:    clrldi r5, r3, 48
+; P8LE-NEXT:    rldicl r3, r3, 16, 48
 ; P8LE-NEXT:    extsh r8, r6
 ; P8LE-NEXT:    extsh r9, r7
-; P8LE-NEXT:    mulhw r10, r5, r3
-; P8LE-NEXT:    mulhw r11, r8, r3
-; P8LE-NEXT:    rldicl r4, r4, 16, 48
-; P8LE-NEXT:    mulhw r12, r9, r3
-; P8LE-NEXT:    extsh r0, r4
-; P8LE-NEXT:    mulhw r3, r0, r3
-; P8LE-NEXT:    add r10, r10, r5
+; P8LE-NEXT:    extsh r5, r5
+; P8LE-NEXT:    extsh r10, r3
+; P8LE-NEXT:    mulhw r11, r8, r4
 ; P8LE-NEXT:    add r8, r11, r8
-; P8LE-NEXT:    srwi r11, r10, 31
-; P8LE-NEXT:    add r9, r12, r9
-; P8LE-NEXT:    srawi r10, r10, 6
-; P8LE-NEXT:    srawi r12, r8, 6
-; P8LE-NEXT:    srwi r8, r8, 31
-; P8LE-NEXT:    add r10, r10, r11
-; P8LE-NEXT:    add r3, r3, r0
-; P8LE-NEXT:    srawi r11, r9, 6
-; P8LE-NEXT:    srwi r9, r9, 31
-; P8LE-NEXT:    add r8, r12, r8
-; P8LE-NEXT:    mtvsrd v2, r10
-; P8LE-NEXT:    mulli r12, r10, 95
+; P8LE-NEXT:    mulhw r11, r9, r4
 ; P8LE-NEXT:    add r9, r11, r9
-; P8LE-NEXT:    srwi r11, r3, 31
+; P8LE-NEXT:    mulhw r11, r5, r4
+; P8LE-NEXT:    mulhw r4, r10, r4
+; P8LE-NEXT:    add r11, r11, r5
+; P8LE-NEXT:    add r4, r4, r10
+; P8LE-NEXT:    srwi r10, r11, 31
+; P8LE-NEXT:    srawi r11, r11, 6
+; P8LE-NEXT:    add r10, r11, r10
+; P8LE-NEXT:    srwi r11, r8, 31
+; P8LE-NEXT:    srawi r8, r8, 6
+; P8LE-NEXT:    add r8, r8, r11
+; P8LE-NEXT:    srwi r11, r9, 31
+; P8LE-NEXT:    srawi r9, r9, 6
+; P8LE-NEXT:    mtvsrd v2, r10
+; P8LE-NEXT:    add r9, r9, r11
+; P8LE-NEXT:    srwi r11, r4, 31
+; P8LE-NEXT:    srawi r4, r4, 6
 ; P8LE-NEXT:    mtvsrd v3, r8
-; P8LE-NEXT:    srawi r3, r3, 6
-; P8LE-NEXT:    mulli r10, r8, 95
-; P8LE-NEXT:    mtvsrd v4, r9
-; P8LE-NEXT:    add r3, r3, r11
-; P8LE-NEXT:    mulli r8, r9, 95
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    mulli r9, r3, 95
-; P8LE-NEXT:    sub r5, r5, r12
-; P8LE-NEXT:    sub r6, r6, r10
-; P8LE-NEXT:    mtvsrd v3, r5
+; P8LE-NEXT:    add r4, r4, r11
+; P8LE-NEXT:    mulli r11, r10, 95
+; P8LE-NEXT:    sub r5, r5, r11
+; P8LE-NEXT:    mulli r11, r8, 95
+; P8LE-NEXT:    mtvsrd v4, r5
+; P8LE-NEXT:    sub r6, r6, r11
+; P8LE-NEXT:    mulli r11, r9, 95
 ; P8LE-NEXT:    mtvsrd v5, r6
-; P8LE-NEXT:    sub r5, r7, r8
-; P8LE-NEXT:    sub r4, r4, r9
-; P8LE-NEXT:    mtvsrd v0, r5
-; P8LE-NEXT:    mtvsrd v1, r4
-; P8LE-NEXT:    vmrghh v3, v5, v3
-; P8LE-NEXT:    mtvsrd v5, r3
-; P8LE-NEXT:    vmrghh v0, v1, v0
+; P8LE-NEXT:    sub r7, r7, r11
+; P8LE-NEXT:    mulli r11, r4, 95
+; P8LE-NEXT:    mtvsrd v0, r7
+; P8LE-NEXT:    sub r3, r3, r11
+; P8LE-NEXT:    vmrghh v2, v3, v2
+; P8LE-NEXT:    mtvsrd v3, r9
 ; P8LE-NEXT:    vmrghh v4, v5, v4
-; P8LE-NEXT:    xxmrglw v3, v0, v3
-; P8LE-NEXT:    xxmrglw v2, v4, v2
-; P8LE-NEXT:    vadduhm v2, v3, v2
+; P8LE-NEXT:    mtvsrd v5, r3
+; P8LE-NEXT:    vmrghh v5, v5, v0
+; P8LE-NEXT:    mtvsrd v0, r4
+; P8LE-NEXT:    xxmrglw v4, v5, v4
+; P8LE-NEXT:    vmrghh v3, v0, v3
+; P8LE-NEXT:    xxmrglw v2, v3, v2
+; P8LE-NEXT:    vadduhm v2, v4, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: combine_srem_sdiv:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r5, v2
+; P8BE-NEXT:    mfvsrd r3, v2
 ; P8BE-NEXT:    lis r4, -21386
-; P8BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8BE-NEXT:    addis r30, r2, .LCPI2_0 at toc@ha
 ; P8BE-NEXT:    ori r4, r4, 37253
-; P8BE-NEXT:    clrldi r3, r5, 48
-; P8BE-NEXT:    rldicl r6, r5, 48, 48
-; P8BE-NEXT:    rldicl r7, r5, 32, 48
-; P8BE-NEXT:    extsh r8, r3
+; P8BE-NEXT:    clrldi r5, r3, 48
+; P8BE-NEXT:    rldicl r6, r3, 48, 48
+; P8BE-NEXT:    rldicl r7, r3, 32, 48
+; P8BE-NEXT:    rldicl r3, r3, 16, 48
+; P8BE-NEXT:    extsh r8, r5
 ; P8BE-NEXT:    extsh r9, r6
 ; P8BE-NEXT:    extsh r10, r7
+; P8BE-NEXT:    extsh r3, r3
 ; P8BE-NEXT:    mulhw r11, r8, r4
-; P8BE-NEXT:    mulhw r12, r9, r4
-; P8BE-NEXT:    rldicl r5, r5, 16, 48
-; P8BE-NEXT:    mulhw r0, r10, r4
-; P8BE-NEXT:    extsh r5, r5
-; P8BE-NEXT:    mulhw r4, r5, r4
 ; P8BE-NEXT:    add r8, r11, r8
-; P8BE-NEXT:    add r9, r12, r9
+; P8BE-NEXT:    mulhw r11, r9, r4
+; P8BE-NEXT:    add r9, r11, r9
+; P8BE-NEXT:    mulhw r11, r10, r4
+; P8BE-NEXT:    mulhw r4, r3, r4
+; P8BE-NEXT:    add r10, r11, r10
 ; P8BE-NEXT:    srwi r11, r8, 31
-; P8BE-NEXT:    add r10, r0, r10
 ; P8BE-NEXT:    srawi r8, r8, 6
-; P8BE-NEXT:    addi r0, r30, .LCPI2_0 at toc@l
-; P8BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; P8BE-NEXT:    srawi r12, r9, 6
-; P8BE-NEXT:    srwi r9, r9, 31
+; P8BE-NEXT:    add r4, r4, r3
 ; P8BE-NEXT:    add r8, r8, r11
-; P8BE-NEXT:    add r4, r4, r5
-; P8BE-NEXT:    lxvw4x v2, 0, r0
-; P8BE-NEXT:    srawi r11, r10, 6
-; P8BE-NEXT:    srwi r10, r10, 31
-; P8BE-NEXT:    add r9, r12, r9
+; P8BE-NEXT:    srwi r11, r9, 31
+; P8BE-NEXT:    srawi r9, r9, 6
+; P8BE-NEXT:    add r9, r9, r11
+; P8BE-NEXT:    srwi r11, r10, 31
+; P8BE-NEXT:    srawi r10, r10, 6
 ; P8BE-NEXT:    mtvsrwz v3, r8
-; P8BE-NEXT:    mulli r12, r8, 95
-; P8BE-NEXT:    add r10, r11, r10
+; P8BE-NEXT:    add r10, r10, r11
 ; P8BE-NEXT:    srwi r11, r4, 31
-; P8BE-NEXT:    mtvsrwz v4, r9
 ; P8BE-NEXT:    srawi r4, r4, 6
-; P8BE-NEXT:    mulli r8, r9, 95
-; P8BE-NEXT:    mtvsrwz v5, r10
+; P8BE-NEXT:    mtvsrwz v4, r9
 ; P8BE-NEXT:    add r4, r4, r11
-; P8BE-NEXT:    mulli r9, r10, 95
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    mulli r10, r4, 95
-; P8BE-NEXT:    sub r3, r3, r12
-; P8BE-NEXT:    sub r6, r6, r8
-; P8BE-NEXT:    mtvsrwz v4, r3
+; P8BE-NEXT:    mulli r11, r8, 95
+; P8BE-NEXT:    sub r5, r5, r11
+; P8BE-NEXT:    mulli r11, r9, 95
+; P8BE-NEXT:    mtvsrwz v5, r5
+; P8BE-NEXT:    sub r6, r6, r11
+; P8BE-NEXT:    mulli r11, r10, 95
 ; P8BE-NEXT:    mtvsrwz v0, r6
-; P8BE-NEXT:    sub r3, r7, r9
-; P8BE-NEXT:    sub r5, r5, r10
-; P8BE-NEXT:    mtvsrwz v1, r3
-; P8BE-NEXT:    mtvsrwz v6, r5
-; P8BE-NEXT:    vperm v4, v0, v4, v2
-; P8BE-NEXT:    mtvsrwz v0, r4
-; P8BE-NEXT:    vperm v1, v6, v1, v2
-; P8BE-NEXT:    vperm v2, v0, v5, v2
-; P8BE-NEXT:    xxmrghw v4, v1, v4
+; P8BE-NEXT:    sub r7, r7, r11
+; P8BE-NEXT:    mulli r11, r4, 95
+; P8BE-NEXT:    mtvsrwz v1, r7
+; P8BE-NEXT:    sub r3, r3, r11
+; P8BE-NEXT:    addis r11, r2, .LCPI2_0 at toc@ha
+; P8BE-NEXT:    addi r11, r11, .LCPI2_0 at toc@l
+; P8BE-NEXT:    lxvw4x v2, 0, r11
+; P8BE-NEXT:    vperm v5, v0, v5, v2
+; P8BE-NEXT:    mtvsrwz v0, r3
+; P8BE-NEXT:    vperm v3, v4, v3, v2
+; P8BE-NEXT:    mtvsrwz v4, r10
+; P8BE-NEXT:    vperm v0, v0, v1, v2
+; P8BE-NEXT:    mtvsrwz v1, r4
+; P8BE-NEXT:    vperm v2, v1, v4, v2
+; P8BE-NEXT:    xxmrghw v4, v0, v5
 ; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    vadduhm v2, v4, v2
 ; P8BE-NEXT:    blr
@@ -809,86 +807,86 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P8LE-LABEL: dont_fold_srem_power_of_two:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r3, -21386
-; P8LE-NEXT:    ori r3, r3, 37253
-; P8LE-NEXT:    mffprd r4, f0
-; P8LE-NEXT:    rldicl r5, r4, 16, 48
-; P8LE-NEXT:    clrldi r6, r4, 48
-; P8LE-NEXT:    extsh r5, r5
-; P8LE-NEXT:    extsh r6, r6
-; P8LE-NEXT:    mulhw r3, r5, r3
-; P8LE-NEXT:    rldicl r7, r4, 48, 48
-; P8LE-NEXT:    srawi r8, r6, 6
-; P8LE-NEXT:    extsh r7, r7
-; P8LE-NEXT:    addze r8, r8
-; P8LE-NEXT:    rldicl r4, r4, 32, 48
-; P8LE-NEXT:    srawi r9, r7, 5
+; P8LE-NEXT:    mffprd r3, f0
+; P8LE-NEXT:    clrldi r4, r3, 48
 ; P8LE-NEXT:    extsh r4, r4
-; P8LE-NEXT:    slwi r8, r8, 6
-; P8LE-NEXT:    add r3, r3, r5
-; P8LE-NEXT:    addze r9, r9
-; P8LE-NEXT:    sub r6, r6, r8
-; P8LE-NEXT:    srwi r10, r3, 31
-; P8LE-NEXT:    srawi r3, r3, 6
-; P8LE-NEXT:    slwi r8, r9, 5
-; P8LE-NEXT:    mtvsrd v2, r6
-; P8LE-NEXT:    add r3, r3, r10
-; P8LE-NEXT:    srawi r9, r4, 3
-; P8LE-NEXT:    sub r6, r7, r8
-; P8LE-NEXT:    mulli r3, r3, 95
-; P8LE-NEXT:    addze r7, r9
-; P8LE-NEXT:    mtvsrd v3, r6
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    sub r3, r5, r3
-; P8LE-NEXT:    slwi r5, r7, 3
+; P8LE-NEXT:    srawi r5, r4, 6
+; P8LE-NEXT:    addze r5, r5
+; P8LE-NEXT:    slwi r5, r5, 6
+; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    mtvsrd v2, r4
+; P8LE-NEXT:    rldicl r4, r3, 48, 48
+; P8LE-NEXT:    extsh r4, r4
+; P8LE-NEXT:    srawi r5, r4, 5
+; P8LE-NEXT:    addze r5, r5
+; P8LE-NEXT:    slwi r5, r5, 5
+; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    lis r5, -21386
+; P8LE-NEXT:    mtvsrd v3, r4
+; P8LE-NEXT:    rldicl r4, r3, 16, 48
+; P8LE-NEXT:    ori r5, r5, 37253
+; P8LE-NEXT:    rldicl r3, r3, 32, 48
+; P8LE-NEXT:    extsh r4, r4
+; P8LE-NEXT:    extsh r3, r3
+; P8LE-NEXT:    mulhw r5, r4, r5
+; P8LE-NEXT:    add r5, r5, r4
+; P8LE-NEXT:    srwi r6, r5, 31
+; P8LE-NEXT:    srawi r5, r5, 6
+; P8LE-NEXT:    add r5, r5, r6
+; P8LE-NEXT:    mulli r5, r5, 95
 ; P8LE-NEXT:    sub r4, r4, r5
+; P8LE-NEXT:    vmrghh v2, v3, v2
+; P8LE-NEXT:    mtvsrd v3, r4
+; P8LE-NEXT:    srawi r4, r3, 3
+; P8LE-NEXT:    addze r4, r4
+; P8LE-NEXT:    slwi r4, r4, 3
+; P8LE-NEXT:    sub r3, r3, r4
 ; P8LE-NEXT:    mtvsrd v4, r3
-; P8LE-NEXT:    mtvsrd v5, r4
-; P8LE-NEXT:    vmrghh v3, v4, v5
+; P8LE-NEXT:    vmrghh v3, v3, v4
 ; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_srem_power_of_two:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r4, v2
-; P8BE-NEXT:    lis r3, -21386
-; P8BE-NEXT:    ori r3, r3, 37253
-; P8BE-NEXT:    clrldi r5, r4, 48
-; P8BE-NEXT:    rldicl r6, r4, 32, 48
-; P8BE-NEXT:    extsh r5, r5
-; P8BE-NEXT:    extsh r6, r6
-; P8BE-NEXT:    mulhw r3, r5, r3
-; P8BE-NEXT:    rldicl r7, r4, 16, 48
-; P8BE-NEXT:    srawi r8, r6, 5
-; P8BE-NEXT:    extsh r7, r7
-; P8BE-NEXT:    addze r8, r8
-; P8BE-NEXT:    rldicl r4, r4, 48, 48
-; P8BE-NEXT:    srawi r9, r7, 6
+; P8BE-NEXT:    mfvsrd r3, v2
+; P8BE-NEXT:    rldicl r4, r3, 32, 48
 ; P8BE-NEXT:    extsh r4, r4
-; P8BE-NEXT:    slwi r8, r8, 5
-; P8BE-NEXT:    add r3, r3, r5
-; P8BE-NEXT:    addze r9, r9
-; P8BE-NEXT:    sub r6, r6, r8
-; P8BE-NEXT:    srwi r10, r3, 31
-; P8BE-NEXT:    srawi r3, r3, 6
-; P8BE-NEXT:    slwi r8, r9, 6
-; P8BE-NEXT:    mtvsrwz v2, r6
-; P8BE-NEXT:    add r3, r3, r10
-; P8BE-NEXT:    srawi r9, r4, 3
-; P8BE-NEXT:    addis r10, r2, .LCPI3_0 at toc@ha
-; P8BE-NEXT:    sub r6, r7, r8
-; P8BE-NEXT:    mulli r3, r3, 95
-; P8BE-NEXT:    addze r8, r9
-; P8BE-NEXT:    addi r7, r10, .LCPI3_0 at toc@l
-; P8BE-NEXT:    mtvsrwz v4, r6
-; P8BE-NEXT:    lxvw4x v3, 0, r7
-; P8BE-NEXT:    sub r3, r5, r3
-; P8BE-NEXT:    slwi r5, r8, 3
-; P8BE-NEXT:    vperm v2, v4, v2, v3
+; P8BE-NEXT:    srawi r5, r4, 5
+; P8BE-NEXT:    addze r5, r5
+; P8BE-NEXT:    slwi r5, r5, 5
+; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    mtvsrwz v2, r4
+; P8BE-NEXT:    rldicl r4, r3, 16, 48
+; P8BE-NEXT:    extsh r4, r4
+; P8BE-NEXT:    srawi r5, r4, 6
+; P8BE-NEXT:    addze r5, r5
+; P8BE-NEXT:    slwi r5, r5, 6
+; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    lis r5, -21386
+; P8BE-NEXT:    mtvsrwz v3, r4
+; P8BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; P8BE-NEXT:    ori r5, r5, 37253
+; P8BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; P8BE-NEXT:    lxvw4x v4, 0, r4
+; P8BE-NEXT:    clrldi r4, r3, 48
+; P8BE-NEXT:    rldicl r3, r3, 48, 48
+; P8BE-NEXT:    extsh r4, r4
+; P8BE-NEXT:    extsh r3, r3
+; P8BE-NEXT:    mulhw r5, r4, r5
+; P8BE-NEXT:    add r5, r5, r4
+; P8BE-NEXT:    srwi r6, r5, 31
+; P8BE-NEXT:    srawi r5, r5, 6
+; P8BE-NEXT:    add r5, r5, r6
+; P8BE-NEXT:    mulli r5, r5, 95
 ; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    vperm v2, v3, v2, v4
+; P8BE-NEXT:    mtvsrwz v3, r4
+; P8BE-NEXT:    srawi r4, r3, 3
+; P8BE-NEXT:    addze r4, r4
+; P8BE-NEXT:    slwi r4, r4, 3
+; P8BE-NEXT:    sub r3, r3, r4
 ; P8BE-NEXT:    mtvsrwz v5, r3
-; P8BE-NEXT:    mtvsrwz v0, r4
-; P8BE-NEXT:    vperm v3, v0, v5, v3
+; P8BE-NEXT:    vperm v3, v5, v3, v4
 ; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
@@ -997,95 +995,95 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P8LE-LABEL: dont_fold_srem_one:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r5, 24749
-; P8LE-NEXT:    lis r6, -19946
-; P8LE-NEXT:    lis r8, -14230
-; P8LE-NEXT:    ori r5, r5, 47143
-; P8LE-NEXT:    ori r6, r6, 17097
-; P8LE-NEXT:    ori r8, r8, 30865
+; P8LE-NEXT:    lis r8, 24749
+; P8LE-NEXT:    lis r4, -19946
+; P8LE-NEXT:    lis r5, -14230
 ; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    rldicl r4, r3, 16, 48
-; P8LE-NEXT:    rldicl r7, r3, 32, 48
-; P8LE-NEXT:    rldicl r3, r3, 48, 48
-; P8LE-NEXT:    extsh r4, r4
-; P8LE-NEXT:    extsh r7, r7
+; P8LE-NEXT:    ori r8, r8, 47143
+; P8LE-NEXT:    ori r4, r4, 17097
+; P8LE-NEXT:    ori r5, r5, 30865
+; P8LE-NEXT:    rldicl r6, r3, 32, 48
+; P8LE-NEXT:    rldicl r7, r3, 48, 48
+; P8LE-NEXT:    rldicl r3, r3, 16, 48
 ; P8LE-NEXT:    extsh r3, r3
-; P8LE-NEXT:    mulhw r5, r4, r5
-; P8LE-NEXT:    mulhw r6, r7, r6
+; P8LE-NEXT:    extsh r6, r6
+; P8LE-NEXT:    extsh r7, r7
 ; P8LE-NEXT:    mulhw r8, r3, r8
-; P8LE-NEXT:    srwi r9, r5, 31
-; P8LE-NEXT:    srawi r5, r5, 11
-; P8LE-NEXT:    add r6, r6, r7
-; P8LE-NEXT:    add r8, r8, r3
-; P8LE-NEXT:    add r5, r5, r9
-; P8LE-NEXT:    srwi r9, r6, 31
-; P8LE-NEXT:    srawi r6, r6, 4
-; P8LE-NEXT:    add r6, r6, r9
+; P8LE-NEXT:    mulhw r4, r6, r4
+; P8LE-NEXT:    mulhw r5, r7, r5
 ; P8LE-NEXT:    srwi r9, r8, 31
-; P8LE-NEXT:    srawi r8, r8, 9
-; P8LE-NEXT:    mulli r5, r5, 5423
+; P8LE-NEXT:    srawi r8, r8, 11
+; P8LE-NEXT:    add r4, r4, r6
+; P8LE-NEXT:    add r5, r5, r7
 ; P8LE-NEXT:    add r8, r8, r9
-; P8LE-NEXT:    mulli r6, r6, 23
-; P8LE-NEXT:    li r9, 0
-; P8LE-NEXT:    mulli r8, r8, 654
-; P8LE-NEXT:    mtvsrd v2, r9
-; P8LE-NEXT:    sub r4, r4, r5
-; P8LE-NEXT:    sub r5, r7, r6
-; P8LE-NEXT:    mtvsrd v3, r4
+; P8LE-NEXT:    srwi r9, r4, 31
+; P8LE-NEXT:    srawi r4, r4, 4
+; P8LE-NEXT:    mulli r8, r8, 5423
+; P8LE-NEXT:    add r4, r4, r9
+; P8LE-NEXT:    srwi r9, r5, 31
+; P8LE-NEXT:    srawi r5, r5, 9
+; P8LE-NEXT:    add r5, r5, r9
 ; P8LE-NEXT:    sub r3, r3, r8
-; P8LE-NEXT:    mtvsrd v4, r5
-; P8LE-NEXT:    mtvsrd v5, r3
+; P8LE-NEXT:    mtvsrd v2, r3
+; P8LE-NEXT:    mulli r3, r4, 23
+; P8LE-NEXT:    mulli r4, r5, 654
+; P8LE-NEXT:    sub r3, r6, r3
+; P8LE-NEXT:    sub r4, r7, r4
+; P8LE-NEXT:    mtvsrd v3, r3
+; P8LE-NEXT:    li r3, 0
+; P8LE-NEXT:    mtvsrd v4, r3
+; P8LE-NEXT:    vmrghh v2, v2, v3
+; P8LE-NEXT:    mtvsrd v3, r4
 ; P8LE-NEXT:    vmrghh v3, v3, v4
-; P8LE-NEXT:    vmrghh v2, v5, v2
-; P8LE-NEXT:    xxmrglw v2, v3, v2
+; P8LE-NEXT:    xxmrglw v2, v2, v3
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_srem_one:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r4, v2
-; P8BE-NEXT:    lis r3, 24749
-; P8BE-NEXT:    lis r7, -19946
-; P8BE-NEXT:    lis r8, -14230
-; P8BE-NEXT:    ori r3, r3, 47143
-; P8BE-NEXT:    ori r7, r7, 17097
-; P8BE-NEXT:    ori r8, r8, 30865
-; P8BE-NEXT:    clrldi r5, r4, 48
-; P8BE-NEXT:    rldicl r6, r4, 48, 48
-; P8BE-NEXT:    rldicl r4, r4, 32, 48
-; P8BE-NEXT:    extsh r5, r5
+; P8BE-NEXT:    mfvsrd r3, v2
+; P8BE-NEXT:    lis r4, -19946
+; P8BE-NEXT:    lis r8, 24749
+; P8BE-NEXT:    lis r5, -14230
+; P8BE-NEXT:    ori r4, r4, 17097
+; P8BE-NEXT:    ori r8, r8, 47143
+; P8BE-NEXT:    ori r5, r5, 30865
+; P8BE-NEXT:    rldicl r6, r3, 48, 48
+; P8BE-NEXT:    rldicl r7, r3, 32, 48
+; P8BE-NEXT:    clrldi r3, r3, 48
 ; P8BE-NEXT:    extsh r6, r6
-; P8BE-NEXT:    extsh r4, r4
-; P8BE-NEXT:    mulhw r3, r5, r3
-; P8BE-NEXT:    mulhw r7, r6, r7
-; P8BE-NEXT:    mulhw r8, r4, r8
-; P8BE-NEXT:    srawi r9, r3, 11
-; P8BE-NEXT:    srwi r3, r3, 31
-; P8BE-NEXT:    add r7, r7, r6
-; P8BE-NEXT:    add r8, r8, r4
-; P8BE-NEXT:    add r3, r9, r3
-; P8BE-NEXT:    srwi r9, r7, 31
-; P8BE-NEXT:    srawi r7, r7, 4
-; P8BE-NEXT:    srawi r10, r8, 9
-; P8BE-NEXT:    srwi r8, r8, 31
-; P8BE-NEXT:    add r7, r7, r9
+; P8BE-NEXT:    extsh r3, r3
+; P8BE-NEXT:    extsh r7, r7
+; P8BE-NEXT:    mulhw r4, r6, r4
+; P8BE-NEXT:    mulhw r8, r3, r8
+; P8BE-NEXT:    mulhw r5, r7, r5
+; P8BE-NEXT:    add r4, r4, r6
+; P8BE-NEXT:    srwi r9, r8, 31
+; P8BE-NEXT:    srawi r8, r8, 11
+; P8BE-NEXT:    add r5, r5, r7
+; P8BE-NEXT:    add r8, r8, r9
+; P8BE-NEXT:    srwi r9, r4, 31
+; P8BE-NEXT:    srawi r4, r4, 4
+; P8BE-NEXT:    add r4, r4, r9
+; P8BE-NEXT:    mulli r8, r8, 5423
+; P8BE-NEXT:    srwi r9, r5, 31
+; P8BE-NEXT:    srawi r5, r5, 9
+; P8BE-NEXT:    mulli r4, r4, 23
+; P8BE-NEXT:    add r5, r5, r9
 ; P8BE-NEXT:    addis r9, r2, .LCPI4_0 at toc@ha
-; P8BE-NEXT:    mulli r3, r3, 5423
-; P8BE-NEXT:    add r8, r10, r8
-; P8BE-NEXT:    li r10, 0
-; P8BE-NEXT:    mulli r7, r7, 23
-; P8BE-NEXT:    mulli r8, r8, 654
-; P8BE-NEXT:    mtvsrwz v2, r10
-; P8BE-NEXT:    sub r3, r5, r3
-; P8BE-NEXT:    addi r5, r9, .LCPI4_0 at toc@l
-; P8BE-NEXT:    lxvw4x v3, 0, r5
-; P8BE-NEXT:    sub r5, r6, r7
+; P8BE-NEXT:    addi r9, r9, .LCPI4_0 at toc@l
+; P8BE-NEXT:    mulli r5, r5, 654
+; P8BE-NEXT:    sub r3, r3, r8
+; P8BE-NEXT:    lxvw4x v2, 0, r9
+; P8BE-NEXT:    sub r4, r6, r4
+; P8BE-NEXT:    mtvsrwz v3, r3
+; P8BE-NEXT:    mtvsrwz v4, r4
+; P8BE-NEXT:    sub r3, r7, r5
+; P8BE-NEXT:    vperm v3, v4, v3, v2
 ; P8BE-NEXT:    mtvsrwz v4, r3
-; P8BE-NEXT:    sub r3, r4, r8
-; P8BE-NEXT:    mtvsrwz v5, r5
-; P8BE-NEXT:    mtvsrwz v0, r3
-; P8BE-NEXT:    vperm v4, v5, v4, v3
-; P8BE-NEXT:    vperm v2, v2, v0, v3
-; P8BE-NEXT:    xxmrghw v2, v2, v4
+; P8BE-NEXT:    li r3, 0
+; P8BE-NEXT:    mtvsrwz v5, r3
+; P8BE-NEXT:    vperm v2, v5, v4, v2
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
   ret <4 x i16> %1
@@ -1183,85 +1181,85 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P8LE-LABEL: dont_fold_urem_i16_smax:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r4, 24749
-; P8LE-NEXT:    lis r5, -19946
-; P8LE-NEXT:    ori r4, r4, 47143
-; P8LE-NEXT:    ori r5, r5, 17097
+; P8LE-NEXT:    lis r4, -19946
+; P8LE-NEXT:    lis r6, 24749
+; P8LE-NEXT:    li r8, 0
 ; P8LE-NEXT:    mffprd r3, f0
-; P8LE-NEXT:    rldicl r6, r3, 16, 48
-; P8LE-NEXT:    rldicl r7, r3, 32, 48
-; P8LE-NEXT:    extsh r6, r6
-; P8LE-NEXT:    extsh r7, r7
-; P8LE-NEXT:    mulhw r4, r6, r4
-; P8LE-NEXT:    mulhw r5, r7, r5
+; P8LE-NEXT:    ori r4, r4, 17097
+; P8LE-NEXT:    ori r6, r6, 47143
+; P8LE-NEXT:    mtvsrd v2, r8
+; P8LE-NEXT:    rldicl r5, r3, 32, 48
+; P8LE-NEXT:    rldicl r7, r3, 16, 48
 ; P8LE-NEXT:    rldicl r3, r3, 48, 48
+; P8LE-NEXT:    extsh r5, r5
+; P8LE-NEXT:    extsh r7, r7
 ; P8LE-NEXT:    extsh r3, r3
+; P8LE-NEXT:    mulhw r4, r5, r4
+; P8LE-NEXT:    mulhw r6, r7, r6
+; P8LE-NEXT:    add r4, r4, r5
+; P8LE-NEXT:    srwi r8, r6, 31
+; P8LE-NEXT:    srawi r6, r6, 11
+; P8LE-NEXT:    add r6, r6, r8
 ; P8LE-NEXT:    srwi r8, r4, 31
-; P8LE-NEXT:    srawi r4, r4, 11
-; P8LE-NEXT:    add r5, r5, r7
+; P8LE-NEXT:    srawi r4, r4, 4
 ; P8LE-NEXT:    add r4, r4, r8
-; P8LE-NEXT:    srwi r8, r5, 31
-; P8LE-NEXT:    srawi r5, r5, 4
-; P8LE-NEXT:    mulli r4, r4, 5423
-; P8LE-NEXT:    add r5, r5, r8
-; P8LE-NEXT:    srawi r9, r3, 15
-; P8LE-NEXT:    li r8, 0
-; P8LE-NEXT:    mulli r5, r5, 23
-; P8LE-NEXT:    mtvsrd v2, r8
-; P8LE-NEXT:    sub r4, r6, r4
-; P8LE-NEXT:    addze r6, r9
-; P8LE-NEXT:    slwi r6, r6, 15
-; P8LE-NEXT:    mtvsrd v3, r4
-; P8LE-NEXT:    sub r5, r7, r5
-; P8LE-NEXT:    sub r3, r3, r6
-; P8LE-NEXT:    mtvsrd v4, r5
-; P8LE-NEXT:    mtvsrd v5, r3
+; P8LE-NEXT:    mulli r6, r6, 5423
+; P8LE-NEXT:    mulli r4, r4, 23
+; P8LE-NEXT:    sub r6, r7, r6
+; P8LE-NEXT:    sub r4, r5, r4
+; P8LE-NEXT:    srawi r5, r3, 15
+; P8LE-NEXT:    mtvsrd v3, r6
+; P8LE-NEXT:    addze r5, r5
+; P8LE-NEXT:    mtvsrd v4, r4
+; P8LE-NEXT:    slwi r4, r5, 15
+; P8LE-NEXT:    sub r3, r3, r4
 ; P8LE-NEXT:    vmrghh v3, v3, v4
-; P8LE-NEXT:    vmrghh v2, v5, v2
+; P8LE-NEXT:    mtvsrd v4, r3
+; P8LE-NEXT:    vmrghh v2, v4, v2
 ; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_urem_i16_smax:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    lis r4, 24749
-; P8BE-NEXT:    lis r5, -19946
-; P8BE-NEXT:    li r9, 0
-; P8BE-NEXT:    ori r4, r4, 47143
-; P8BE-NEXT:    ori r5, r5, 17097
-; P8BE-NEXT:    mtvsrwz v2, r9
-; P8BE-NEXT:    clrldi r6, r3, 48
-; P8BE-NEXT:    rldicl r7, r3, 48, 48
-; P8BE-NEXT:    extsh r6, r6
-; P8BE-NEXT:    extsh r7, r7
-; P8BE-NEXT:    mulhw r4, r6, r4
-; P8BE-NEXT:    mulhw r5, r7, r5
+; P8BE-NEXT:    lis r4, -19946
+; P8BE-NEXT:    lis r6, 24749
+; P8BE-NEXT:    ori r4, r4, 17097
+; P8BE-NEXT:    ori r6, r6, 47143
+; P8BE-NEXT:    rldicl r5, r3, 48, 48
+; P8BE-NEXT:    clrldi r7, r3, 48
 ; P8BE-NEXT:    rldicl r3, r3, 32, 48
+; P8BE-NEXT:    extsh r5, r5
+; P8BE-NEXT:    extsh r7, r7
 ; P8BE-NEXT:    extsh r3, r3
+; P8BE-NEXT:    mulhw r4, r5, r4
+; P8BE-NEXT:    mulhw r6, r7, r6
+; P8BE-NEXT:    add r4, r4, r5
+; P8BE-NEXT:    srwi r8, r6, 31
+; P8BE-NEXT:    srawi r6, r6, 11
+; P8BE-NEXT:    add r6, r6, r8
 ; P8BE-NEXT:    srwi r8, r4, 31
-; P8BE-NEXT:    srawi r4, r4, 11
-; P8BE-NEXT:    add r5, r5, r7
+; P8BE-NEXT:    srawi r4, r4, 4
 ; P8BE-NEXT:    add r4, r4, r8
-; P8BE-NEXT:    srwi r8, r5, 31
-; P8BE-NEXT:    srawi r5, r5, 4
-; P8BE-NEXT:    mulli r4, r4, 5423
-; P8BE-NEXT:    add r5, r5, r8
 ; P8BE-NEXT:    addis r8, r2, .LCPI5_0 at toc@ha
-; P8BE-NEXT:    srawi r10, r3, 15
-; P8BE-NEXT:    mulli r5, r5, 23
-; P8BE-NEXT:    sub r4, r6, r4
-; P8BE-NEXT:    addi r6, r8, .LCPI5_0 at toc@l
-; P8BE-NEXT:    addze r8, r10
-; P8BE-NEXT:    lxvw4x v3, 0, r6
-; P8BE-NEXT:    slwi r6, r8, 15
+; P8BE-NEXT:    mulli r6, r6, 5423
+; P8BE-NEXT:    mulli r4, r4, 23
+; P8BE-NEXT:    addi r8, r8, .LCPI5_0 at toc@l
+; P8BE-NEXT:    lxvw4x v2, 0, r8
+; P8BE-NEXT:    srawi r8, r3, 15
+; P8BE-NEXT:    sub r6, r7, r6
+; P8BE-NEXT:    addze r8, r8
+; P8BE-NEXT:    sub r4, r5, r4
+; P8BE-NEXT:    mtvsrwz v3, r6
+; P8BE-NEXT:    slwi r8, r8, 15
 ; P8BE-NEXT:    mtvsrwz v4, r4
-; P8BE-NEXT:    sub r5, r7, r5
-; P8BE-NEXT:    sub r3, r3, r6
-; P8BE-NEXT:    mtvsrwz v5, r5
-; P8BE-NEXT:    mtvsrwz v0, r3
-; P8BE-NEXT:    vperm v4, v5, v4, v3
-; P8BE-NEXT:    vperm v2, v2, v0, v3
-; P8BE-NEXT:    xxmrghw v2, v2, v4
+; P8BE-NEXT:    sub r3, r3, r8
+; P8BE-NEXT:    vperm v3, v4, v3, v2
+; P8BE-NEXT:    mtvsrwz v4, r3
+; P8BE-NEXT:    li r3, 0
+; P8BE-NEXT:    mtvsrwz v5, r3
+; P8BE-NEXT:    vperm v2, v5, v4, v2
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
   ret <4 x i16> %1
@@ -1358,101 +1356,101 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
 ;
 ; P8LE-LABEL: dont_fold_srem_i64:
 ; P8LE:       # %bb.0:
-; P8LE-NEXT:    lis r3, 12374
-; P8LE-NEXT:    lis r4, 5698
+; P8LE-NEXT:    lis r4, 12374
+; P8LE-NEXT:    lis r3, 5698
 ; P8LE-NEXT:    lis r5, 3206
 ; P8LE-NEXT:    xxswapd vs0, v3
-; P8LE-NEXT:    mfvsrd r6, v3
-; P8LE-NEXT:    ori r3, r3, 56339
-; P8LE-NEXT:    ori r4, r4, 51289
+; P8LE-NEXT:    mfvsrd r7, v3
+; P8LE-NEXT:    mfvsrd r8, v2
+; P8LE-NEXT:    ori r4, r4, 56339
+; P8LE-NEXT:    ori r3, r3, 51289
 ; P8LE-NEXT:    ori r5, r5, 42889
-; P8LE-NEXT:    mfvsrd r7, v2
-; P8LE-NEXT:    rldic r3, r3, 33, 1
-; P8LE-NEXT:    rldic r4, r4, 35, 0
+; P8LE-NEXT:    mffprd r6, f0
+; P8LE-NEXT:    rldic r4, r4, 33, 1
+; P8LE-NEXT:    rldic r3, r3, 35, 0
 ; P8LE-NEXT:    rldic r5, r5, 35, 1
-; P8LE-NEXT:    oris r3, r3, 58853
-; P8LE-NEXT:    oris r4, r4, 22795
-; P8LE-NEXT:    mffprd r8, f0
+; P8LE-NEXT:    oris r4, r4, 58853
+; P8LE-NEXT:    oris r3, r3, 22795
 ; P8LE-NEXT:    oris r5, r5, 1603
-; P8LE-NEXT:    ori r3, r3, 6055
-; P8LE-NEXT:    ori r4, r4, 8549
+; P8LE-NEXT:    ori r4, r4, 6055
+; P8LE-NEXT:    ori r3, r3, 8549
 ; P8LE-NEXT:    ori r5, r5, 21445
+; P8LE-NEXT:    mulhd r4, r7, r4
 ; P8LE-NEXT:    mulhd r3, r6, r3
-; P8LE-NEXT:    mulhd r5, r7, r5
-; P8LE-NEXT:    mulhd r4, r8, r4
-; P8LE-NEXT:    rldicl r9, r3, 1, 63
-; P8LE-NEXT:    sradi r3, r3, 11
-; P8LE-NEXT:    add r3, r3, r9
+; P8LE-NEXT:    mulhd r5, r8, r5
+; P8LE-NEXT:    rldicl r9, r4, 1, 63
+; P8LE-NEXT:    sradi r4, r4, 11
+; P8LE-NEXT:    add r3, r3, r6
+; P8LE-NEXT:    add r4, r4, r9
 ; P8LE-NEXT:    rldicl r9, r5, 1, 63
-; P8LE-NEXT:    add r4, r4, r8
 ; P8LE-NEXT:    sradi r5, r5, 8
-; P8LE-NEXT:    mulli r3, r3, 5423
 ; P8LE-NEXT:    add r5, r5, r9
-; P8LE-NEXT:    rldicl r9, r4, 1, 63
-; P8LE-NEXT:    sradi r4, r4, 4
+; P8LE-NEXT:    rldicl r9, r3, 1, 63
+; P8LE-NEXT:    sradi r3, r3, 4
+; P8LE-NEXT:    mulli r4, r4, 5423
+; P8LE-NEXT:    add r3, r3, r9
 ; P8LE-NEXT:    mulli r5, r5, 654
-; P8LE-NEXT:    add r4, r4, r9
-; P8LE-NEXT:    mulli r4, r4, 23
+; P8LE-NEXT:    mulli r3, r3, 23
+; P8LE-NEXT:    sub r4, r7, r4
+; P8LE-NEXT:    mtfprd f0, r4
+; P8LE-NEXT:    sub r5, r8, r5
 ; P8LE-NEXT:    sub r3, r6, r3
-; P8LE-NEXT:    mtfprd f0, r3
-; P8LE-NEXT:    sub r5, r7, r5
-; P8LE-NEXT:    mtfprd f1, r5
-; P8LE-NEXT:    sub r3, r8, r4
-; P8LE-NEXT:    li r4, 0
-; P8LE-NEXT:    mtfprd f2, r3
-; P8LE-NEXT:    mtfprd f3, r4
-; P8LE-NEXT:    xxmrghd v3, vs0, vs2
-; P8LE-NEXT:    xxmrghd v2, vs1, vs3
+; P8LE-NEXT:    mtfprd f1, r3
+; P8LE-NEXT:    li r3, 0
+; P8LE-NEXT:    xxmrghd v3, vs0, vs1
+; P8LE-NEXT:    mtfprd f0, r5
+; P8LE-NEXT:    mtfprd f1, r3
+; P8LE-NEXT:    xxmrghd v2, vs0, vs1
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_srem_i64:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    lis r4, 5698
-; P8BE-NEXT:    lis r3, 12374
-; P8BE-NEXT:    xxswapd vs0, v3
+; P8BE-NEXT:    lis r4, 12374
+; P8BE-NEXT:    lis r3, 5698
 ; P8BE-NEXT:    lis r5, 3206
+; P8BE-NEXT:    xxswapd vs0, v3
 ; P8BE-NEXT:    xxswapd vs1, v2
-; P8BE-NEXT:    ori r4, r4, 51289
-; P8BE-NEXT:    ori r3, r3, 56339
-; P8BE-NEXT:    ori r5, r5, 42889
 ; P8BE-NEXT:    mfvsrd r6, v3
-; P8BE-NEXT:    rldic r4, r4, 35, 0
-; P8BE-NEXT:    rldic r3, r3, 33, 1
-; P8BE-NEXT:    oris r4, r4, 22795
-; P8BE-NEXT:    rldic r5, r5, 35, 1
-; P8BE-NEXT:    oris r3, r3, 58853
+; P8BE-NEXT:    ori r4, r4, 56339
+; P8BE-NEXT:    ori r3, r3, 51289
+; P8BE-NEXT:    ori r5, r5, 42889
 ; P8BE-NEXT:    mffprd r7, f0
-; P8BE-NEXT:    ori r4, r4, 8549
-; P8BE-NEXT:    ori r3, r3, 6055
-; P8BE-NEXT:    oris r5, r5, 1603
 ; P8BE-NEXT:    mffprd r8, f1
-; P8BE-NEXT:    mulhd r4, r6, r4
-; P8BE-NEXT:    mulhd r3, r7, r3
+; P8BE-NEXT:    rldic r4, r4, 33, 1
+; P8BE-NEXT:    rldic r3, r3, 35, 0
+; P8BE-NEXT:    rldic r5, r5, 35, 1
+; P8BE-NEXT:    oris r4, r4, 58853
+; P8BE-NEXT:    oris r3, r3, 22795
+; P8BE-NEXT:    oris r5, r5, 1603
+; P8BE-NEXT:    ori r4, r4, 6055
+; P8BE-NEXT:    ori r3, r3, 8549
 ; P8BE-NEXT:    ori r5, r5, 21445
+; P8BE-NEXT:    mulhd r4, r7, r4
+; P8BE-NEXT:    mulhd r3, r6, r3
 ; P8BE-NEXT:    mulhd r5, r8, r5
-; P8BE-NEXT:    add r4, r4, r6
-; P8BE-NEXT:    rldicl r9, r3, 1, 63
-; P8BE-NEXT:    sradi r3, r3, 11
-; P8BE-NEXT:    rldicl r10, r4, 1, 63
-; P8BE-NEXT:    sradi r4, r4, 4
-; P8BE-NEXT:    add r3, r3, r9
+; P8BE-NEXT:    rldicl r9, r4, 1, 63
+; P8BE-NEXT:    sradi r4, r4, 11
+; P8BE-NEXT:    add r3, r3, r6
+; P8BE-NEXT:    add r4, r4, r9
 ; P8BE-NEXT:    rldicl r9, r5, 1, 63
-; P8BE-NEXT:    add r4, r4, r10
 ; P8BE-NEXT:    sradi r5, r5, 8
-; P8BE-NEXT:    mulli r3, r3, 5423
 ; P8BE-NEXT:    add r5, r5, r9
-; P8BE-NEXT:    mulli r4, r4, 23
+; P8BE-NEXT:    rldicl r9, r3, 1, 63
+; P8BE-NEXT:    sradi r3, r3, 4
+; P8BE-NEXT:    mulli r4, r4, 5423
+; P8BE-NEXT:    add r3, r3, r9
 ; P8BE-NEXT:    mulli r5, r5, 654
-; P8BE-NEXT:    sub r3, r7, r3
-; P8BE-NEXT:    sub r4, r6, r4
-; P8BE-NEXT:    mtfprd f0, r3
-; P8BE-NEXT:    sub r3, r8, r5
-; P8BE-NEXT:    mtfprd f1, r4
-; P8BE-NEXT:    li r4, 0
-; P8BE-NEXT:    mtfprd f2, r3
-; P8BE-NEXT:    mtfprd f3, r4
+; P8BE-NEXT:    mulli r3, r3, 23
+; P8BE-NEXT:    sub r4, r7, r4
+; P8BE-NEXT:    mtfprd f0, r4
+; P8BE-NEXT:    sub r5, r8, r5
+; P8BE-NEXT:    sub r3, r6, r3
+; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    li r3, 0
 ; P8BE-NEXT:    xxmrghd v3, vs1, vs0
-; P8BE-NEXT:    xxmrghd v2, vs3, vs2
+; P8BE-NEXT:    mtfprd f0, r5
+; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
   %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
   ret <4 x i64> %1

diff  --git a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
index 5f605e93c93e8ac..bd7b3b8603f7ed5 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
@@ -18,17 +18,17 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin
 ; CHECK-LE-NEXT:    std r31, -8(r1)
 ; CHECK-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-LE-NEXT:    rldic r3, r3, 2, 30
-; CHECK-LE-NEXT:    li r5, -32768
+; CHECK-LE-NEXT:    li r6, -32768
 ; CHECK-LE-NEXT:    mr r31, r1
 ; CHECK-LE-NEXT:    addi r3, r3, 15
 ; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-LE-NEXT:    rldicl r3, r3, 4, 29
-; CHECK-LE-NEXT:    neg r4, r3
+; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    addi r3, r31, 48
-; CHECK-LE-NEXT:    divd r6, r4, r5
-; CHECK-LE-NEXT:    mulld r5, r6, r5
-; CHECK-LE-NEXT:    sub r5, r4, r5
-; CHECK-LE-NEXT:    add r4, r1, r4
+; CHECK-LE-NEXT:    divd r7, r5, r6
+; CHECK-LE-NEXT:    add r4, r1, r5
+; CHECK-LE-NEXT:    mulld r6, r7, r6
+; CHECK-LE-NEXT:    sub r5, r5, r6
 ; CHECK-LE-NEXT:    stdux r3, r1, r5
 ; CHECK-LE-NEXT:    cmpd r1, r4
 ; CHECK-LE-NEXT:    beq cr0, .LBB0_2
@@ -154,17 +154,17 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind {
 ; CHECK-LE-NEXT:    std r31, -8(r1)
 ; CHECK-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-LE-NEXT:    rldic r4, r3, 2, 30
-; CHECK-LE-NEXT:    li r6, -4096
+; CHECK-LE-NEXT:    li r7, -4096
 ; CHECK-LE-NEXT:    mr r31, r1
 ; CHECK-LE-NEXT:    addi r4, r4, 15
 ; CHECK-LE-NEXT:    rldicl r4, r4, 60, 4
 ; CHECK-LE-NEXT:    rldicl r4, r4, 4, 29
-; CHECK-LE-NEXT:    neg r5, r4
+; CHECK-LE-NEXT:    neg r6, r4
 ; CHECK-LE-NEXT:    addi r4, r31, 48
-; CHECK-LE-NEXT:    divd r7, r5, r6
-; CHECK-LE-NEXT:    mulld r6, r7, r6
-; CHECK-LE-NEXT:    sub r6, r5, r6
-; CHECK-LE-NEXT:    add r5, r1, r5
+; CHECK-LE-NEXT:    divd r8, r6, r7
+; CHECK-LE-NEXT:    add r5, r1, r6
+; CHECK-LE-NEXT:    mulld r7, r8, r7
+; CHECK-LE-NEXT:    sub r6, r6, r7
 ; CHECK-LE-NEXT:    stdux r4, r1, r6
 ; CHECK-LE-NEXT:    cmpd r1, r5
 ; CHECK-LE-NEXT:    beq cr0, .LBB1_2
@@ -300,24 +300,24 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind
 ; CHECK-LE-NEXT:    std r31, -8(r1)
 ; CHECK-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-LE-NEXT:    rldic r3, r3, 2, 30
-; CHECK-LE-NEXT:    lis r4, -1
+; CHECK-LE-NEXT:    lis r5, -1
 ; CHECK-LE-NEXT:    mr r31, r1
 ; CHECK-LE-NEXT:    addi r3, r3, 15
-; CHECK-LE-NEXT:    ori r4, r4, 0
+; CHECK-LE-NEXT:    ori r5, r5, 0
 ; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-LE-NEXT:    rldicl r3, r3, 4, 29
-; CHECK-LE-NEXT:    neg r5, r3
+; CHECK-LE-NEXT:    neg r6, r3
 ; CHECK-LE-NEXT:    addi r3, r31, 48
-; CHECK-LE-NEXT:    divd r6, r5, r4
-; CHECK-LE-NEXT:    mulld r6, r6, r4
-; CHECK-LE-NEXT:    sub r6, r5, r6
-; CHECK-LE-NEXT:    add r5, r1, r5
+; CHECK-LE-NEXT:    divd r7, r6, r5
+; CHECK-LE-NEXT:    add r4, r1, r6
+; CHECK-LE-NEXT:    mulld r7, r7, r5
+; CHECK-LE-NEXT:    sub r6, r6, r7
 ; CHECK-LE-NEXT:    stdux r3, r1, r6
-; CHECK-LE-NEXT:    cmpd r1, r5
+; CHECK-LE-NEXT:    cmpd r1, r4
 ; CHECK-LE-NEXT:    beq cr0, .LBB2_2
 ; CHECK-LE-NEXT:  .LBB2_1:
-; CHECK-LE-NEXT:    stdux r3, r1, r4
-; CHECK-LE-NEXT:    cmpd r1, r5
+; CHECK-LE-NEXT:    stdux r3, r1, r5
+; CHECK-LE-NEXT:    cmpd r1, r4
 ; CHECK-LE-NEXT:    bne cr0, .LBB2_1
 ; CHECK-LE-NEXT:  .LBB2_2:
 ; CHECK-LE-NEXT:    li r4, 1

diff  --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index 58d7e7d40e7c7fa..4b19cb30aba2a90 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -539,8 +539,8 @@ define i32 @f8(i64 %i) local_unnamed_addr #0 {
 ; CHECK-LE-NEXT:    stdux r1, r1, r0
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
-; CHECK-LE-NEXT:    addi r4, r1, 64
 ; CHECK-LE-NEXT:    sldi r3, r3, 2
+; CHECK-LE-NEXT:    addi r4, r1, 64
 ; CHECK-LE-NEXT:    li r5, 1
 ; CHECK-LE-NEXT:    stwx r5, r4, r3
 ; CHECK-LE-NEXT:    lwz r3, 64(r1)
@@ -619,8 +619,8 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 {
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
-; CHECK-LE-NEXT:    addi r4, r1, 2048
 ; CHECK-LE-NEXT:    sldi r3, r3, 2
+; CHECK-LE-NEXT:    addi r4, r1, 2048
 ; CHECK-LE-NEXT:    li r5, 1
 ; CHECK-LE-NEXT:    stwx r5, r4, r3
 ; CHECK-LE-NEXT:    lwz r3, 2048(r1)
@@ -726,8 +726,8 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 {
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
-; CHECK-LE-NEXT:    addi r4, r1, 1024
 ; CHECK-LE-NEXT:    sldi r3, r3, 2
+; CHECK-LE-NEXT:    addi r4, r1, 1024
 ; CHECK-LE-NEXT:    li r5, 1
 ; CHECK-LE-NEXT:    stwx r5, r4, r3
 ; CHECK-LE-NEXT:    lwz r3, 1024(r1)
@@ -839,23 +839,23 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-LE-NEXT:    lis r5, 1
 ; CHECK-LE-NEXT:    mr r31, r1
 ; CHECK-LE-NEXT:    li r6, 1
+; CHECK-LE-NEXT:    sldi r4, r4, 2
 ; CHECK-LE-NEXT:    addi r3, r3, 15
 ; CHECK-LE-NEXT:    ori r5, r5, 0
 ; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
-; CHECK-LE-NEXT:    sldi r4, r4, 2
 ; CHECK-LE-NEXT:    add r5, r31, r5
 ; CHECK-LE-NEXT:    rldicl r3, r3, 4, 31
 ; CHECK-LE-NEXT:    stwx r6, r5, r4
 ; CHECK-LE-NEXT:    li r4, -32768
-; CHECK-LE-NEXT:    neg r7, r3
+; CHECK-LE-NEXT:    li r6, -4096
+; CHECK-LE-NEXT:    neg r5, r3
 ; CHECK-LE-NEXT:    ld r3, 0(r1)
-; CHECK-LE-NEXT:    and r4, r7, r4
-; CHECK-LE-NEXT:    mr r7, r4
-; CHECK-LE-NEXT:    li r4, -4096
-; CHECK-LE-NEXT:    divd r5, r7, r4
-; CHECK-LE-NEXT:    mulld r4, r5, r4
-; CHECK-LE-NEXT:    sub r5, r7, r4
-; CHECK-LE-NEXT:    add r4, r1, r7
+; CHECK-LE-NEXT:    and r4, r5, r4
+; CHECK-LE-NEXT:    mr r5, r4
+; CHECK-LE-NEXT:    divd r7, r5, r6
+; CHECK-LE-NEXT:    add r4, r1, r5
+; CHECK-LE-NEXT:    mulld r6, r7, r6
+; CHECK-LE-NEXT:    sub r5, r5, r6
 ; CHECK-LE-NEXT:    stdux r3, r1, r5
 ; CHECK-LE-NEXT:    cmpd r1, r4
 ; CHECK-LE-NEXT:    beq cr0, .LBB11_4

diff  --git a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
index 2e50940f927280d..c8278e58ad064cb 100644
--- a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
@@ -12,23 +12,24 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mfocrf 12, 32
-; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    std 31, -8(1)
 ; CHECK-NEXT:    stw 12, 8(1)
+; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    stdu 1, -784(1)
 ; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; CHECK-NEXT:    cmpwi 2, 3, 2
+; CHECK-NEXT:    li 4, 0
 ; CHECK-NEXT:    std 0, 800(1)
 ; CHECK-NEXT:    mr 31, 1
-; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blt 2, .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %if.end
 ; CHECK-NEXT:    addi 3, 31, 112
 ; CHECK-NEXT:    bl _setjmp
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    crmove 20, 10
 ; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; CHECK-NEXT:    cmpwi 3, 0
+; CHECK-NEXT:    crmove 20, 10
 ; CHECK-NEXT:    crorc 20, 10, 2
 ; CHECK-NEXT:    crmove 21, 2
 ; CHECK-NEXT:    bc 4, 20, .LBB0_4
@@ -44,8 +45,8 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
 ; CHECK-NEXT:    ld 0, 16(1)
 ; CHECK-NEXT:    lwz 12, 8(1)
 ; CHECK-NEXT:    ld 31, -8(1)
-; CHECK-NEXT:    mtocrf 32, 12
 ; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    mtocrf 32, 12
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB0_4: # %if.then3
 ; CHECK-NEXT:    ld 4, 0(1)

diff  --git a/llvm/test/CodeGen/PowerPC/store-constant.ll b/llvm/test/CodeGen/PowerPC/store-constant.ll
index 3c72cf58d467024..6c55b818adaa8e0 100644
--- a/llvm/test/CodeGen/PowerPC/store-constant.ll
+++ b/llvm/test/CodeGen/PowerPC/store-constant.ll
@@ -17,8 +17,8 @@ define void @foo(ptr %p) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li 4, 0
-; CHECK-NEXT:    stb 4, 8(3)
 ; CHECK-NEXT:    std 4, 0(3)
+; CHECK-NEXT:    stb 4, 8(3)
 ; CHECK-NEXT:    sth 4, 10(3)
 ; CHECK-NEXT:    stw 4, 12(3)
 ; CHECK-NEXT:    blr
@@ -39,8 +39,8 @@ define void @bar(ptr %p) {
 ; CHECK-NEXT:    li 4, 2
 ; CHECK-NEXT:    stw 4, 12(3)
 ; CHECK-NEXT:    sth 4, 10(3)
-; CHECK-NEXT:    std 4, 0(3)
 ; CHECK-NEXT:    stb 4, 8(3)
+; CHECK-NEXT:    std 4, 0(3)
 ; CHECK-NEXT:    blr
   %i = getelementptr %struct.S, ptr %p, i64 0, i32 3
   store i32 2, ptr %i, align 4
@@ -58,18 +58,18 @@ define void @setSmallNeg() {
 ; CHECK-LABEL: setSmallNeg:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LC1 at toc@ha
-; CHECK-NEXT:    addis 5, 2, .LC2 at toc@ha
-; CHECK-NEXT:    addis 6, 2, .LC3 at toc@ha
-; CHECK-NEXT:    li 7, -7
+; CHECK-NEXT:    li 4, -7
 ; CHECK-NEXT:    ld 3, .LC0 at toc@l(3)
-; CHECK-NEXT:    ld 4, .LC1 at toc@l(4)
-; CHECK-NEXT:    ld 5, .LC2 at toc@l(5)
-; CHECK-NEXT:    ld 6, .LC3 at toc@l(6)
-; CHECK-NEXT:    stb 7, 0(3)
-; CHECK-NEXT:    sth 7, 0(4)
-; CHECK-NEXT:    std 7, 0(6)
-; CHECK-NEXT:    stw 7, 0(5)
+; CHECK-NEXT:    stb 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC1 at toc@ha
+; CHECK-NEXT:    ld 3, .LC1 at toc@l(3)
+; CHECK-NEXT:    sth 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC2 at toc@ha
+; CHECK-NEXT:    ld 3, .LC2 at toc@l(3)
+; CHECK-NEXT:    stw 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC3 at toc@ha
+; CHECK-NEXT:    ld 3, .LC3 at toc@l(3)
+; CHECK-NEXT:    std 4, 0(3)
 ; CHECK-NEXT:    blr
 entry:
   store i8 -7, ptr @CVal, align 1
@@ -84,18 +84,18 @@ define void @setSmallPos() {
 ; CHECK-LABEL: setSmallPos:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LC1 at toc@ha
-; CHECK-NEXT:    addis 5, 2, .LC2 at toc@ha
-; CHECK-NEXT:    addis 6, 2, .LC3 at toc@ha
-; CHECK-NEXT:    li 7, 8
+; CHECK-NEXT:    li 4, 8
 ; CHECK-NEXT:    ld 3, .LC0 at toc@l(3)
-; CHECK-NEXT:    ld 4, .LC1 at toc@l(4)
-; CHECK-NEXT:    ld 5, .LC2 at toc@l(5)
-; CHECK-NEXT:    ld 6, .LC3 at toc@l(6)
-; CHECK-NEXT:    stb 7, 0(3)
-; CHECK-NEXT:    sth 7, 0(4)
-; CHECK-NEXT:    std 7, 0(6)
-; CHECK-NEXT:    stw 7, 0(5)
+; CHECK-NEXT:    stb 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC1 at toc@ha
+; CHECK-NEXT:    ld 3, .LC1 at toc@l(3)
+; CHECK-NEXT:    sth 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC2 at toc@ha
+; CHECK-NEXT:    ld 3, .LC2 at toc@l(3)
+; CHECK-NEXT:    stw 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC3 at toc@ha
+; CHECK-NEXT:    ld 3, .LC3 at toc@l(3)
+; CHECK-NEXT:    std 4, 0(3)
 ; CHECK-NEXT:    blr
 entry:
   store i8 8, ptr @CVal, align 1
@@ -110,15 +110,15 @@ define void @setMaxNeg() {
 ; CHECK-LABEL: setMaxNeg:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LC1 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LC2 at toc@ha
-; CHECK-NEXT:    addis 5, 2, .LC3 at toc@ha
-; CHECK-NEXT:    li 6, -32768
+; CHECK-NEXT:    li 4, -32768
 ; CHECK-NEXT:    ld 3, .LC1 at toc@l(3)
-; CHECK-NEXT:    ld 4, .LC2 at toc@l(4)
-; CHECK-NEXT:    ld 5, .LC3 at toc@l(5)
-; CHECK-NEXT:    sth 6, 0(3)
-; CHECK-NEXT:    stw 6, 0(4)
-; CHECK-NEXT:    std 6, 0(5)
+; CHECK-NEXT:    sth 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC2 at toc@ha
+; CHECK-NEXT:    ld 3, .LC2 at toc@l(3)
+; CHECK-NEXT:    stw 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC3 at toc@ha
+; CHECK-NEXT:    ld 3, .LC3 at toc@l(3)
+; CHECK-NEXT:    std 4, 0(3)
 ; CHECK-NEXT:    blr
 entry:
   store i16 -32768, ptr @SVal, align 2
@@ -132,15 +132,15 @@ define void @setMaxPos() {
 ; CHECK-LABEL: setMaxPos:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LC1 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LC2 at toc@ha
-; CHECK-NEXT:    addis 5, 2, .LC3 at toc@ha
-; CHECK-NEXT:    li 6, 32767
+; CHECK-NEXT:    li 4, 32767
 ; CHECK-NEXT:    ld 3, .LC1 at toc@l(3)
-; CHECK-NEXT:    ld 4, .LC2 at toc@l(4)
-; CHECK-NEXT:    ld 5, .LC3 at toc@l(5)
-; CHECK-NEXT:    sth 6, 0(3)
-; CHECK-NEXT:    stw 6, 0(4)
-; CHECK-NEXT:    std 6, 0(5)
+; CHECK-NEXT:    sth 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC2 at toc@ha
+; CHECK-NEXT:    ld 3, .LC2 at toc@l(3)
+; CHECK-NEXT:    stw 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC3 at toc@ha
+; CHECK-NEXT:    ld 3, .LC3 at toc@l(3)
+; CHECK-NEXT:    std 4, 0(3)
 ; CHECK-NEXT:    blr
 entry:
   store i16 32767, ptr @SVal, align 2
@@ -154,13 +154,13 @@ define void @setExcessiveNeg() {
 ; CHECK-LABEL: setExcessiveNeg:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LC2 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LC3 at toc@ha
-; CHECK-NEXT:    lis 5, -1
+; CHECK-NEXT:    lis 4, -1
 ; CHECK-NEXT:    ld 3, .LC2 at toc@l(3)
-; CHECK-NEXT:    ld 4, .LC3 at toc@l(4)
-; CHECK-NEXT:    ori 5, 5, 32767
-; CHECK-NEXT:    stw 5, 0(3)
-; CHECK-NEXT:    std 5, 0(4)
+; CHECK-NEXT:    ori 4, 4, 32767
+; CHECK-NEXT:    stw 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC3 at toc@ha
+; CHECK-NEXT:    ld 3, .LC3 at toc@l(3)
+; CHECK-NEXT:    std 4, 0(3)
 ; CHECK-NEXT:    blr
 entry:
   store i32 -32769, ptr @IVal, align 4
@@ -173,16 +173,16 @@ define void @setExcessivePos() {
 ; CHECK-LABEL: setExcessivePos:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LC4 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LC2 at toc@ha
-; CHECK-NEXT:    addis 5, 2, .LC3 at toc@ha
-; CHECK-NEXT:    li 6, 0
+; CHECK-NEXT:    li 4, 0
 ; CHECK-NEXT:    ld 3, .LC4 at toc@l(3)
-; CHECK-NEXT:    ld 4, .LC2 at toc@l(4)
-; CHECK-NEXT:    ld 5, .LC3 at toc@l(5)
-; CHECK-NEXT:    ori 6, 6, 32768
-; CHECK-NEXT:    sth 6, 0(3)
-; CHECK-NEXT:    stw 6, 0(4)
-; CHECK-NEXT:    std 6, 0(5)
+; CHECK-NEXT:    ori 4, 4, 32768
+; CHECK-NEXT:    sth 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC2 at toc@ha
+; CHECK-NEXT:    ld 3, .LC2 at toc@l(3)
+; CHECK-NEXT:    stw 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC3 at toc@ha
+; CHECK-NEXT:    ld 3, .LC3 at toc@l(3)
+; CHECK-NEXT:    std 4, 0(3)
 ; CHECK-NEXT:    blr
 entry:
   store i16 -32768, ptr @USVal, align 2
@@ -244,12 +244,12 @@ define void @setSameValDiffSizeCI() {
 ; CHECK-LABEL: setSameValDiffSizeCI:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LC2 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LC0 at toc@ha
-; CHECK-NEXT:    li 5, 255
+; CHECK-NEXT:    li 4, 255
 ; CHECK-NEXT:    ld 3, .LC2 at toc@l(3)
-; CHECK-NEXT:    ld 4, .LC0 at toc@l(4)
-; CHECK-NEXT:    stw 5, 0(3)
-; CHECK-NEXT:    stb 5, 0(4)
+; CHECK-NEXT:    stw 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
+; CHECK-NEXT:    ld 3, .LC0 at toc@l(3)
+; CHECK-NEXT:    stb 4, 0(3)
 ; CHECK-NEXT:    blr
 entry:
   store i32 255, ptr @IVal, align 4
@@ -261,13 +261,13 @@ define void @setSameValDiffSizeSI() {
 ; CHECK-LABEL: setSameValDiffSizeSI:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LC2 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LC1 at toc@ha
-; CHECK-NEXT:    li 5, 0
+; CHECK-NEXT:    li 4, 0
 ; CHECK-NEXT:    ld 3, .LC2 at toc@l(3)
-; CHECK-NEXT:    ld 4, .LC1 at toc@l(4)
-; CHECK-NEXT:    ori 5, 5, 65535
-; CHECK-NEXT:    stw 5, 0(3)
-; CHECK-NEXT:    sth 5, 0(4)
+; CHECK-NEXT:    ori 4, 4, 65535
+; CHECK-NEXT:    stw 4, 0(3)
+; CHECK-NEXT:    addis 3, 2, .LC1 at toc@ha
+; CHECK-NEXT:    ld 3, .LC1 at toc@l(3)
+; CHECK-NEXT:    sth 4, 0(3)
 ; CHECK-NEXT:    blr
 entry:
   store i32 65535, ptr @IVal, align 4

diff  --git a/llvm/test/CodeGen/PowerPC/stwu-sched.ll b/llvm/test/CodeGen/PowerPC/stwu-sched.ll
index 1f4a9246e77c71b..4cf07c396d48c76 100644
--- a/llvm/test/CodeGen/PowerPC/stwu-sched.ll
+++ b/llvm/test/CodeGen/PowerPC/stwu-sched.ll
@@ -15,8 +15,8 @@ define void @initCombList(ptr nocapture, i32 signext) local_unnamed_addr #0 {
 ; CHECK: stwu [[REG:[0-9]+]], 64(3)
 
 ; CHECK-ITIN-LABEL: initCombList:
-; CHECK-ITIN: stwu [[REG:[0-9]+]], 64(3)
-; CHECK-ITIN-NEXT:   addi [[REG2:[0-9]+]], [[REG2]], 8
+; CHECK-ITIN:   addi [[REG2:[0-9]+]], [[REG2]], 8
+; CHECK-ITIN-NEXT: stwu [[REG:[0-9]+]], 64(3)
 
 
   %3 = zext i32 %1 to i64

diff  --git a/llvm/test/CodeGen/PowerPC/swap-reduction.ll b/llvm/test/CodeGen/PowerPC/swap-reduction.ll
index 1152594e37e68d2..fbd18d940921b53 100644
--- a/llvm/test/CodeGen/PowerPC/swap-reduction.ll
+++ b/llvm/test/CodeGen/PowerPC/swap-reduction.ll
@@ -4,14 +4,14 @@
 define i64 @test1(ptr %a, ptr %b) {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mr 5, 3
-; CHECK-NEXT:    ld 3, 0(3)
-; CHECK-NEXT:    ld 4, 0(4)
-; CHECK-NEXT:    mtvsrd 34, 3
-; CHECK-NEXT:    add 3, 3, 4
-; CHECK-NEXT:    mtvsrd 35, 4
+; CHECK-NEXT:    ld 5, 0(3)
+; CHECK-NEXT:    ld 6, 0(4)
+; CHECK-NEXT:    mtvsrd 34, 5
+; CHECK-NEXT:    mtvsrd 35, 6
+; CHECK-NEXT:    add 4, 5, 6
 ; CHECK-NEXT:    vavgsb 2, 2, 3
-; CHECK-NEXT:    stxsdx 34, 0, 5
+; CHECK-NEXT:    stxsdx 34, 0, 3
+; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blr
 entry:
   %lhs = load i64, ptr %a, align 8
@@ -31,14 +31,14 @@ entry:
 define i64 @test2(ptr %a, ptr %b) {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mr 5, 3
-; CHECK-NEXT:    ld 3, 0(3)
-; CHECK-NEXT:    ld 4, 0(4)
-; CHECK-NEXT:    mtvsrd 34, 3
-; CHECK-NEXT:    add 3, 3, 4
-; CHECK-NEXT:    mtvsrd 35, 4
+; CHECK-NEXT:    ld 5, 0(3)
+; CHECK-NEXT:    ld 6, 0(4)
+; CHECK-NEXT:    mtvsrd 34, 5
+; CHECK-NEXT:    mtvsrd 35, 6
+; CHECK-NEXT:    add 4, 5, 6
 ; CHECK-NEXT:    vadduhm 2, 2, 3
-; CHECK-NEXT:    stxsdx 34, 0, 5
+; CHECK-NEXT:    stxsdx 34, 0, 3
+; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blr
 entry:
   %lhs = load i64, ptr %a, align 8
@@ -60,10 +60,10 @@ define signext i16 @vecop_uses(ptr %addr) {
 ; CHECK-LABEL: vecop_uses:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li 4, 16
-; CHECK-NEXT:    lxvd2x 1, 0, 3
 ; CHECK-NEXT:    lxvd2x 0, 3, 4
-; CHECK-NEXT:    xxswapd 35, 1
 ; CHECK-NEXT:    xxswapd 34, 0
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vminsh 2, 3, 2
 ; CHECK-NEXT:    xxswapd 35, 34
 ; CHECK-NEXT:    vminsh 2, 2, 3
@@ -86,15 +86,15 @@ define signext i32 @vecop_uses2(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: vecop_uses2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    lxvd2x 1, 0, 4
 ; CHECK-NEXT:    xxswapd 34, 0
-; CHECK-NEXT:    xxswapd 35, 1
+; CHECK-NEXT:    lxvd2x 0, 0, 4
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    xxsldwi 0, 34, 34, 3
 ; CHECK-NEXT:    vmuluwm 2, 3, 2
 ; CHECK-NEXT:    mffprwz 3, 0
-; CHECK-NEXT:    xxswapd 0, 34
 ; CHECK-NEXT:    extsw 3, 3
-; CHECK-NEXT:    stxvd2x 0, 0, 5
+; CHECK-NEXT:    xxswapd 1, 34
+; CHECK-NEXT:    stxvd2x 1, 0, 5
 ; CHECK-NEXT:    blr
 entry:
   %0 = load <4 x i32>, ptr %a, align 4

diff  --git a/llvm/test/CodeGen/PowerPC/swaps-le-5.ll b/llvm/test/CodeGen/PowerPC/swaps-le-5.ll
index 4e650015070085b..98d9a57f0995251 100644
--- a/llvm/test/CodeGen/PowerPC/swaps-le-5.ll
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-5.ll
@@ -15,9 +15,9 @@ entry:
 }
 
 ; CHECK-LABEL: @bar0
-; CHECK-DAG: xxswapd 1, 1
+; CHECK-DAG: xxswapd 0, 1
 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
-; CHECK: xxmrgld [[REG2:[0-9]+]], 1, [[REG1]]
+; CHECK: xxmrgld [[REG2:[0-9]+]], 0, [[REG1]]
 ; CHECK: stxvd2x [[REG2]]
 ; CHECK-NOT: xxswapd
 
@@ -30,9 +30,9 @@ entry:
 }
 
 ; CHECK-LABEL: @bar1
-; CHECK-DAG: xxswapd 1, 1
+; CHECK-DAG: xxswapd 0, 1
 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
-; CHECK: xxpermdi [[REG2:[0-9]+]], [[REG1]], 1, 1
+; CHECK: xxpermdi [[REG2:[0-9]+]], [[REG1]], 0, 1
 ; CHECK: stxvd2x [[REG2]]
 ; CHECK-NOT: xxswapd
 

diff  --git a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
index 5092b4ac78abac1..9bbebd32718b56f 100644
--- a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
@@ -24,14 +24,14 @@ define void @bar0() {
 ; CHECK-LABEL: bar0:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
-; CHECK-NEXT:    addis r4, r2, .LC1 at toc@ha
 ; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
 ; CHECK-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-NEXT:    ld r3, .LC1 at toc@l(r4)
-; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    addis r3, r2, .LC1 at toc@ha
+; CHECK-NEXT:    ld r3, .LC1 at toc@l(r3)
 ; CHECK-NEXT:    lfd f1, 0(r3)
 ; CHECK-NEXT:    addis r3, r2, .LC2 at toc@ha
 ; CHECK-NEXT:    ld r3, .LC2 at toc@l(r3)
+; CHECK-NEXT:    xxswapd vs0, vs0
 ; CHECK-NEXT:    xxmrghd vs0, vs0, vs1
 ; CHECK-NEXT:    xxswapd vs0, vs0
 ; CHECK-NEXT:    stxvd2x vs0, 0, r3
@@ -78,14 +78,14 @@ define void @bar1() {
 ; CHECK-LABEL: bar1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
-; CHECK-NEXT:    addis r4, r2, .LC1 at toc@ha
 ; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
 ; CHECK-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-NEXT:    ld r3, .LC1 at toc@l(r4)
-; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    addis r3, r2, .LC1 at toc@ha
+; CHECK-NEXT:    ld r3, .LC1 at toc@l(r3)
 ; CHECK-NEXT:    lfd f1, 0(r3)
 ; CHECK-NEXT:    addis r3, r2, .LC2 at toc@ha
 ; CHECK-NEXT:    ld r3, .LC2 at toc@l(r3)
+; CHECK-NEXT:    xxswapd vs0, vs0
 ; CHECK-NEXT:    xxpermdi vs0, vs1, vs0, 1
 ; CHECK-NEXT:    xxswapd vs0, vs0
 ; CHECK-NEXT:    stxvd2x vs0, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/swaps-le-7.ll b/llvm/test/CodeGen/PowerPC/swaps-le-7.ll
index c5d16f1f08e5016..da282463b066343 100644
--- a/llvm/test/CodeGen/PowerPC/swaps-le-7.ll
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-7.ll
@@ -10,12 +10,12 @@
 
 ; CHECK-LABEL: @zg
 ; CHECK: lxvdsx
-; CHECK-NEXT: lxvdsx
+; CHECK: xxswapd
 ; CHECK-NEXT: xvmuldp
+; CHECK-NEXT: lxvdsx
 ; CHECK-DAG: xvmuldp
 ; CHECK-DAG: xvsubdp
 ; CHECK-DAG: xvadddp
-; CHECK-DAG: xxswapd
 ; CHECK-DAG: xxpermdi
 ; CHECK-DAG: xvsubdp
 ; CHECK: xxswapd

diff  --git a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
index 288894b8f29c1ab..187626c8cee3aa3 100644
--- a/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
+++ b/llvm/test/CodeGen/PowerPC/test-vector-insert.ll
@@ -41,11 +41,11 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ; CHECK-LE-P8-LABEL: test:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-P8-NEXT:    xscvdpsxws v3, f1
+; CHECK-LE-P8-NEXT:    xscvdpsxws v4, f1
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test:
@@ -71,11 +71,11 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
 ;
 ; CHECK-BE-P8-LABEL: test:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpsxws v3, f1
 ; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-P8-NEXT:    xscvdpsxws v4, f1
 ; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test:
@@ -110,11 +110,11 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ; CHECK-LE-P8-LABEL: test2:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-LE-P8-NEXT:    xscvdpsxws v3, f1
+; CHECK-LE-P8-NEXT:    xscvdpsxws v4, f1
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test2:
@@ -140,11 +140,11 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
 ;
 ; CHECK-BE-P8-LABEL: test2:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpsxws v3, f1
 ; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-P8-NEXT:    xscvdpsxws v4, f1
 ; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test2:
@@ -179,11 +179,11 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ; CHECK-LE-P8-LABEL: test3:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P8-NEXT:    xscvdpuxws v3, f1
+; CHECK-LE-P8-NEXT:    xscvdpuxws v4, f1
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test3:
@@ -209,11 +209,11 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
 ;
 ; CHECK-BE-P8-LABEL: test3:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpuxws v3, f1
 ; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-P8-NEXT:    xscvdpuxws v4, f1
 ; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test3:
@@ -248,11 +248,11 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ; CHECK-LE-P8-LABEL: test4:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-LE-P8-NEXT:    xscvdpuxws v3, f1
+; CHECK-LE-P8-NEXT:    xscvdpuxws v4, f1
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test4:
@@ -278,11 +278,11 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
 ;
 ; CHECK-BE-P8-LABEL: test4:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    xscvdpuxws v3, f1
 ; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-P8-NEXT:    xscvdpuxws v4, f1
 ; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
-; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test4:

diff  --git a/llvm/test/CodeGen/PowerPC/testBitReverse.ll b/llvm/test/CodeGen/PowerPC/testBitReverse.ll
index a7c71cd136bbcaf..b3d5a5273717a10 100644
--- a/llvm/test/CodeGen/PowerPC/testBitReverse.ll
+++ b/llvm/test/CodeGen/PowerPC/testBitReverse.ll
@@ -41,30 +41,30 @@ define i32 @testBitReverseIntrinsicI32(i32 %arg) {
 ; CHECK-LABEL: testBitReverseIntrinsicI32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    lis 4, -21846
-; CHECK-NEXT:    lis 5, 21845
-; CHECK-NEXT:    slwi 6, 3, 1
+; CHECK-NEXT:    slwi 5, 3, 1
 ; CHECK-NEXT:    srwi 3, 3, 1
 ; CHECK-NEXT:    ori 4, 4, 43690
+; CHECK-NEXT:    and 4, 5, 4
+; CHECK-NEXT:    lis 5, 21845
 ; CHECK-NEXT:    ori 5, 5, 21845
-; CHECK-NEXT:    and 4, 6, 4
 ; CHECK-NEXT:    and 3, 3, 5
-; CHECK-NEXT:    lis 5, 13107
+; CHECK-NEXT:    lis 5, -13108
 ; CHECK-NEXT:    or 3, 3, 4
-; CHECK-NEXT:    lis 4, -13108
-; CHECK-NEXT:    ori 5, 5, 13107
-; CHECK-NEXT:    slwi 6, 3, 2
-; CHECK-NEXT:    ori 4, 4, 52428
+; CHECK-NEXT:    ori 5, 5, 52428
+; CHECK-NEXT:    slwi 4, 3, 2
 ; CHECK-NEXT:    srwi 3, 3, 2
-; CHECK-NEXT:    and 4, 6, 4
+; CHECK-NEXT:    and 4, 4, 5
+; CHECK-NEXT:    lis 5, 13107
+; CHECK-NEXT:    ori 5, 5, 13107
 ; CHECK-NEXT:    and 3, 3, 5
-; CHECK-NEXT:    lis 5, 3855
+; CHECK-NEXT:    lis 5, -3856
 ; CHECK-NEXT:    or 3, 3, 4
-; CHECK-NEXT:    lis 4, -3856
-; CHECK-NEXT:    ori 5, 5, 3855
-; CHECK-NEXT:    slwi 6, 3, 4
-; CHECK-NEXT:    ori 4, 4, 61680
+; CHECK-NEXT:    ori 5, 5, 61680
+; CHECK-NEXT:    slwi 4, 3, 4
 ; CHECK-NEXT:    srwi 3, 3, 4
-; CHECK-NEXT:    and 4, 6, 4
+; CHECK-NEXT:    and 4, 4, 5
+; CHECK-NEXT:    lis 5, 3855
+; CHECK-NEXT:    ori 5, 5, 3855
 ; CHECK-NEXT:    and 3, 3, 5
 ; CHECK-NEXT:    or 3, 3, 4
 ; CHECK-NEXT:    rotlwi 4, 3, 24
@@ -133,59 +133,59 @@ define i64 @testBitReverseIntrinsicI64(i64 %arg) {
 ; CHECK-LABEL: testBitReverseIntrinsicI64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    lis 4, -21846
-; CHECK-NEXT:    lis 5, 21845
-; CHECK-NEXT:    lis 7, -13108
-; CHECK-NEXT:    lis 8, 13107
+; CHECK-NEXT:    sldi 5, 3, 1
+; CHECK-NEXT:    rldicl 3, 3, 63, 1
 ; CHECK-NEXT:    ori 4, 4, 43690
-; CHECK-NEXT:    ori 5, 5, 21845
-; CHECK-NEXT:    ori 7, 7, 52428
-; CHECK-NEXT:    ori 8, 8, 13107
 ; CHECK-NEXT:    sldi 4, 4, 32
-; CHECK-NEXT:    sldi 5, 5, 32
 ; CHECK-NEXT:    oris 4, 4, 43690
-; CHECK-NEXT:    oris 5, 5, 21845
-; CHECK-NEXT:    sldi 6, 3, 1
-; CHECK-NEXT:    rldicl 3, 3, 63, 1
 ; CHECK-NEXT:    ori 4, 4, 43690
+; CHECK-NEXT:    and 4, 5, 4
+; CHECK-NEXT:    lis 5, 21845
+; CHECK-NEXT:    ori 5, 5, 21845
+; CHECK-NEXT:    sldi 5, 5, 32
+; CHECK-NEXT:    oris 5, 5, 21845
 ; CHECK-NEXT:    ori 5, 5, 21845
-; CHECK-NEXT:    sldi 7, 7, 32
-; CHECK-NEXT:    sldi 8, 8, 32
-; CHECK-NEXT:    and 4, 6, 4
 ; CHECK-NEXT:    and 3, 3, 5
-; CHECK-NEXT:    lis 5, -3856
-; CHECK-NEXT:    oris 6, 7, 52428
-; CHECK-NEXT:    oris 7, 8, 13107
+; CHECK-NEXT:    lis 5, -13108
+; CHECK-NEXT:    ori 5, 5, 52428
 ; CHECK-NEXT:    or 3, 3, 4
-; CHECK-NEXT:    lis 4, 3855
-; CHECK-NEXT:    ori 5, 5, 61680
-; CHECK-NEXT:    ori 6, 6, 52428
-; CHECK-NEXT:    ori 7, 7, 13107
-; CHECK-NEXT:    ori 4, 4, 3855
-; CHECK-NEXT:    sldi 8, 3, 2
+; CHECK-NEXT:    sldi 5, 5, 32
+; CHECK-NEXT:    sldi 4, 3, 2
 ; CHECK-NEXT:    rldicl 3, 3, 62, 2
-; CHECK-NEXT:    and 6, 8, 6
-; CHECK-NEXT:    and 3, 3, 7
+; CHECK-NEXT:    oris 5, 5, 52428
+; CHECK-NEXT:    ori 5, 5, 52428
+; CHECK-NEXT:    and 4, 4, 5
+; CHECK-NEXT:    lis 5, 13107
+; CHECK-NEXT:    ori 5, 5, 13107
 ; CHECK-NEXT:    sldi 5, 5, 32
-; CHECK-NEXT:    sldi 4, 4, 32
-; CHECK-NEXT:    or 3, 3, 6
-; CHECK-NEXT:    oris 5, 5, 61680
-; CHECK-NEXT:    oris 4, 4, 3855
-; CHECK-NEXT:    sldi 6, 3, 4
+; CHECK-NEXT:    oris 5, 5, 13107
+; CHECK-NEXT:    ori 5, 5, 13107
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    lis 5, -3856
 ; CHECK-NEXT:    ori 5, 5, 61680
-; CHECK-NEXT:    ori 4, 4, 3855
+; CHECK-NEXT:    or 3, 3, 4
+; CHECK-NEXT:    sldi 5, 5, 32
+; CHECK-NEXT:    sldi 4, 3, 4
 ; CHECK-NEXT:    rldicl 3, 3, 60, 4
-; CHECK-NEXT:    and 5, 6, 5
-; CHECK-NEXT:    and 3, 3, 4
-; CHECK-NEXT:    or 3, 3, 5
+; CHECK-NEXT:    oris 5, 5, 61680
+; CHECK-NEXT:    ori 5, 5, 61680
+; CHECK-NEXT:    and 4, 4, 5
+; CHECK-NEXT:    lis 5, 3855
+; CHECK-NEXT:    ori 5, 5, 3855
+; CHECK-NEXT:    sldi 5, 5, 32
+; CHECK-NEXT:    oris 5, 5, 3855
+; CHECK-NEXT:    ori 5, 5, 3855
+; CHECK-NEXT:    and 3, 3, 5
+; CHECK-NEXT:    or 3, 3, 4
 ; CHECK-NEXT:    rldicl 4, 3, 32, 32
-; CHECK-NEXT:    rotlwi 5, 3, 24
-; CHECK-NEXT:    rotlwi 6, 4, 24
-; CHECK-NEXT:    rlwimi 5, 3, 8, 8, 15
-; CHECK-NEXT:    rlwimi 5, 3, 8, 24, 31
-; CHECK-NEXT:    rlwimi 6, 4, 8, 8, 15
-; CHECK-NEXT:    rlwimi 6, 4, 8, 24, 31
-; CHECK-NEXT:    sldi 3, 5, 32
-; CHECK-NEXT:    or 3, 3, 6
+; CHECK-NEXT:    rotlwi 5, 4, 24
+; CHECK-NEXT:    rlwimi 5, 4, 8, 8, 15
+; CHECK-NEXT:    rlwimi 5, 4, 8, 24, 31
+; CHECK-NEXT:    rotlwi 4, 3, 24
+; CHECK-NEXT:    rlwimi 4, 3, 8, 8, 15
+; CHECK-NEXT:    rlwimi 4, 3, 8, 24, 31
+; CHECK-NEXT:    sldi 3, 4, 32
+; CHECK-NEXT:    or 3, 3, 5
 ; CHECK-NEXT:    blr
   %res = call i64 @llvm.bitreverse.i64(i64 %arg)
   ret i64 %res

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
index 14c5eaa5532398e..9d62a1c0b3d90c5 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
@@ -31,14 +31,14 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
-; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    addis r4, r2, testCompare1 at toc@ha
-; BE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    lbz r3, 0(r3)
-; BE-NEXT:    clrlwi r4, r4, 31
+; BE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
 ; BE-NEXT:    clrlwi r3, r3, 31
-; BE-NEXT:    clrldi r4, r4, 32
+; BE-NEXT:    clrlwi r4, r4, 31
 ; BE-NEXT:    clrldi r3, r3, 32
+; BE-NEXT:    clrldi r4, r4, 32
 ; BE-NEXT:    sub r3, r3, r4
 ; BE-NEXT:    rldicl r3, r3, 1, 63
 ; BE-NEXT:    bl fn2
@@ -52,14 +52,14 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    mflr r0
 ; LE-NEXT:    stdu r1, -32(r1)
-; LE-NEXT:    std r0, 48(r1)
 ; LE-NEXT:    addis r4, r2, testCompare1 at toc@ha
-; LE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
+; LE-NEXT:    std r0, 48(r1)
 ; LE-NEXT:    lbz r3, 0(r3)
-; LE-NEXT:    clrlwi r4, r4, 31
+; LE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
 ; LE-NEXT:    clrlwi r3, r3, 31
-; LE-NEXT:    clrldi r4, r4, 32
+; LE-NEXT:    clrlwi r4, r4, 31
 ; LE-NEXT:    clrldi r3, r3, 32
+; LE-NEXT:    clrldi r4, r4, 32
 ; LE-NEXT:    sub r3, r3, r4
 ; LE-NEXT:    rldicl r3, r3, 1, 63
 ; LE-NEXT:    bl fn2

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
index ea0997674630daa..6b864ecad1ade6e 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
@@ -33,14 +33,14 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
-; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    addis r4, r2, testCompare1 at toc@ha
-; BE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    lbz r3, 0(r3)
-; BE-NEXT:    clrlwi r4, r4, 31
+; BE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
 ; BE-NEXT:    clrlwi r3, r3, 31
-; BE-NEXT:    clrldi r4, r4, 32
+; BE-NEXT:    clrlwi r4, r4, 31
 ; BE-NEXT:    clrldi r3, r3, 32
+; BE-NEXT:    clrldi r4, r4, 32
 ; BE-NEXT:    sub r3, r4, r3
 ; BE-NEXT:    rldicl r3, r3, 1, 63
 ; BE-NEXT:    bl fn2
@@ -54,14 +54,14 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    mflr r0
 ; LE-NEXT:    stdu r1, -32(r1)
-; LE-NEXT:    std r0, 48(r1)
 ; LE-NEXT:    addis r4, r2, testCompare1 at toc@ha
-; LE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
+; LE-NEXT:    std r0, 48(r1)
 ; LE-NEXT:    lbz r3, 0(r3)
-; LE-NEXT:    clrlwi r4, r4, 31
+; LE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
 ; LE-NEXT:    clrlwi r3, r3, 31
-; LE-NEXT:    clrldi r4, r4, 32
+; LE-NEXT:    clrlwi r4, r4, 31
 ; LE-NEXT:    clrldi r3, r3, 32
+; LE-NEXT:    clrldi r4, r4, 32
 ; LE-NEXT:    sub r3, r4, r3
 ; LE-NEXT:    rldicl r3, r3, 1, 63
 ; LE-NEXT:    bl fn2

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesieqsc.ll b/llvm/test/CodeGen/PowerPC/testComparesieqsc.ll
index cabaf81e87bd422..4b11638fdd1c13d 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesieqsc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesieqsc.ll
@@ -130,19 +130,19 @@ define dso_local void @test_ieqsc_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_ieqsc_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ieqsc_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i8 %a, %b
@@ -165,21 +165,21 @@ define dso_local void @test_ieqsc_sext_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_ieqsc_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ieqsc_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesieqsi.ll b/llvm/test/CodeGen/PowerPC/testComparesieqsi.ll
index d8cccd39f7f8dbb..79b85475f9c2e88 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesieqsi.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesieqsi.ll
@@ -130,19 +130,19 @@ define dso_local void @test_ieqsi_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_ieqsi_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ieqsi_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i32 %a, %b
@@ -165,21 +165,21 @@ define dso_local void @test_ieqsi_sext_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_ieqsi_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ieqsi_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesieqsll.ll b/llvm/test/CodeGen/PowerPC/testComparesieqsll.ll
index 2999231e7510519..414ff5e5e421317 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesieqsll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesieqsll.ll
@@ -124,19 +124,19 @@ define dso_local void @test_ieqsll_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_ieqsll_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzd r3, r3
 ; CHECK-BE-NEXT:    rldicl r3, r3, 58, 63
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ieqsll_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzd r3, r3
 ; CHECK-LE-NEXT:    rldicl r3, r3, 58, 63
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i64 %a, %b
@@ -158,19 +158,19 @@ define dso_local void @test_ieqsll_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_ieqsll_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    addic r3, r3, -1
 ; CHECK-BE-NEXT:    subfe r3, r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ieqsll_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    addic r3, r3, -1
 ; CHECK-LE-NEXT:    subfe r3, r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesieqss.ll b/llvm/test/CodeGen/PowerPC/testComparesieqss.ll
index d1ff694594f89c1..4d565b5a539d274 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesieqss.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesieqss.ll
@@ -130,19 +130,19 @@ define dso_local void @test_ieqss_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_ieqss_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ieqss_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i16 %a, %b
@@ -165,21 +165,21 @@ define dso_local void @test_ieqss_sext_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_ieqss_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ieqss_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiequc.ll b/llvm/test/CodeGen/PowerPC/testComparesiequc.ll
index e8c53242234ab63..6e799716fe44dcb 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiequc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiequc.ll
@@ -130,19 +130,19 @@ define dso_local void @test_iequc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-BE-LABEL: test_iequc_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iequc_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i8 %a, %b
@@ -165,21 +165,21 @@ define dso_local void @test_iequc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-BE-LABEL: test_iequc_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iequc_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiequi.ll b/llvm/test/CodeGen/PowerPC/testComparesiequi.ll
index 4a0ac17a2be55cf..a8ea8b5d942b2b5 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiequi.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiequi.ll
@@ -130,19 +130,19 @@ define dso_local void @test_iequi_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-BE-LABEL: test_iequi_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iequi_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i32 %a, %b
@@ -165,21 +165,21 @@ define dso_local void @test_iequi_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-BE-LABEL: test_iequi_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iequi_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiequll.ll b/llvm/test/CodeGen/PowerPC/testComparesiequll.ll
index 9e21682b969ee28..9cf07e9c51cf105 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiequll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiequll.ll
@@ -124,19 +124,19 @@ define dso_local void @test_iequll_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_iequll_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzd r3, r3
 ; CHECK-BE-NEXT:    rldicl r3, r3, 58, 63
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iequll_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzd r3, r3
 ; CHECK-LE-NEXT:    rldicl r3, r3, 58, 63
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i64 %a, %b
@@ -158,19 +158,19 @@ define dso_local void @test_iequll_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_iequll_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    addic r3, r3, -1
 ; CHECK-BE-NEXT:    subfe r3, r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iequll_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    addic r3, r3, -1
 ; CHECK-LE-NEXT:    subfe r3, r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiequs.ll b/llvm/test/CodeGen/PowerPC/testComparesiequs.ll
index 26df66cb73d6a91..c8025880eb0ec33 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiequs.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiequs.ll
@@ -130,19 +130,19 @@ define dso_local void @test_iequs_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-BE-LABEL: test_iequs_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iequs_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i16 %a, %b
@@ -165,21 +165,21 @@ define dso_local void @test_iequs_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-BE-LABEL: test_iequs_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iequs_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigesc.ll b/llvm/test/CodeGen/PowerPC/testComparesigesc.ll
index af7402e0073cb1e..816fcf157f4c9d8 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigesc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigesc.ll
@@ -71,19 +71,19 @@ define dso_local void @test_igesc_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_igesc_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_igesc_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i8 %a, %b
@@ -104,19 +104,19 @@ define dso_local void @test_igesc_sext_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_igesc_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_igesc_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigesi.ll b/llvm/test/CodeGen/PowerPC/testComparesigesi.ll
index 53535f905f1d9c0..0b74e76fc759613 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigesi.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigesi.ll
@@ -71,19 +71,19 @@ define dso_local void @test_igesi_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_igesi_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_igesi_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i32 %a, %b
@@ -104,19 +104,19 @@ define dso_local void @test_igesi_sext_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_igesi_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_igesi_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigesll.ll b/llvm/test/CodeGen/PowerPC/testComparesigesll.ll
index 331a94543657539..4e423a2a3ccd9bd 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigesll.ll
@@ -17,18 +17,18 @@ define dso_local signext i32 @test_igesll(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_igesll:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    sradi r5, r3, 63
-; CHECK-BE-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-BE-NEXT:    rldicl r5, r4, 1, 63
+; CHECK-BE-NEXT:    sradi r6, r3, 63
 ; CHECK-BE-NEXT:    subc r3, r3, r4
-; CHECK-BE-NEXT:    adde r3, r5, r6
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_igesll:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    sradi r5, r3, 63
-; CHECK-LE-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-LE-NEXT:    rldicl r5, r4, 1, 63
+; CHECK-LE-NEXT:    sradi r6, r3, 63
 ; CHECK-LE-NEXT:    subc r3, r3, r4
-; CHECK-LE-NEXT:    adde r3, r5, r6
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i64 %a, %b
@@ -47,19 +47,19 @@ define dso_local signext i32 @test_igesll_sext(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_igesll_sext:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    sradi r5, r3, 63
-; CHECK-BE-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-BE-NEXT:    rldicl r5, r4, 1, 63
+; CHECK-BE-NEXT:    sradi r6, r3, 63
 ; CHECK-BE-NEXT:    subc r3, r3, r4
-; CHECK-BE-NEXT:    adde r3, r5, r6
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    neg r3, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_igesll_sext:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    sradi r5, r3, 63
-; CHECK-LE-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-LE-NEXT:    rldicl r5, r4, 1, 63
+; CHECK-LE-NEXT:    sradi r6, r3, 63
 ; CHECK-LE-NEXT:    subc r3, r3, r4
-; CHECK-LE-NEXT:    adde r3, r5, r6
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    neg r3, r3
 ; CHECK-LE-NEXT:    blr
 entry:
@@ -126,22 +126,22 @@ define dso_local void @test_igesll_store(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_igesll_store:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    rldicl r5, r4, 1, 63
 ; CHECK-BE-NEXT:    sradi r6, r3, 63
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    subc r3, r3, r4
-; CHECK-BE-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-BE-NEXT:    adde r3, r6, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    adde r3, r6, r5
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_igesll_store:
 ; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    rldicl r5, r4, 1, 63
 ; CHECK-LE-NEXT:    sradi r6, r3, 63
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    subc r3, r3, r4
-; CHECK-LE-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-LE-NEXT:    adde r3, r6, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    adde r3, r6, r5
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i64 %a, %b
@@ -163,24 +163,24 @@ define dso_local void @test_igesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_igesll_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    rldicl r5, r4, 1, 63
 ; CHECK-BE-NEXT:    sradi r6, r3, 63
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    subc r3, r3, r4
-; CHECK-BE-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-BE-NEXT:    adde r3, r6, r3
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_igesll_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    rldicl r5, r4, 1, 63
 ; CHECK-LE-NEXT:    sradi r6, r3, 63
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    subc r3, r3, r4
-; CHECK-LE-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-LE-NEXT:    adde r3, r6, r3
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigess.ll b/llvm/test/CodeGen/PowerPC/testComparesigess.ll
index d7bf7d782be2d7a..66847ab36a5ef1b 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigess.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigess.ll
@@ -71,19 +71,19 @@ define dso_local void @test_igess_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_igess_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_igess_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i16 %a, %b
@@ -104,19 +104,19 @@ define dso_local void @test_igess_sext_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_igess_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_igess_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigeuc.ll b/llvm/test/CodeGen/PowerPC/testComparesigeuc.ll
index 14f053baa929c2d..37e589711f2dbb3 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigeuc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigeuc.ll
@@ -66,10 +66,10 @@ define dso_local void @test_igeuc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_igeuc_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i8 %a, %b
@@ -83,10 +83,10 @@ define dso_local void @test_igeuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_igeuc_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigeui.ll b/llvm/test/CodeGen/PowerPC/testComparesigeui.ll
index 6cd8a3d0111d082..fc951124cf2a365 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigeui.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigeui.ll
@@ -65,10 +65,10 @@ define dso_local void @test_igeui_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_igeui_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i32 %a, %b
@@ -82,10 +82,10 @@ define dso_local void @test_igeui_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_igeui_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigeull.ll b/llvm/test/CodeGen/PowerPC/testComparesigeull.ll
index 2fe51d2f15f1f34..e5c6e7974d6e17a 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigeull.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigeull.ll
@@ -65,10 +65,10 @@ define dso_local void @test_igeull_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_igeull_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subc r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-NEXT:    subfe r3, r4, r4
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    addi r3, r3, 1
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i64 %a, %b
@@ -82,10 +82,10 @@ define dso_local void @test_igeull_sext_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_igeull_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subc r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-NEXT:    subfe r3, r4, r4
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigeus.ll b/llvm/test/CodeGen/PowerPC/testComparesigeus.ll
index 364b40254bb141f..522de14e4798abe 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigeus.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigeus.ll
@@ -65,10 +65,10 @@ define dso_local void @test_igeus_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_igeus_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i16 %a, %b
@@ -82,10 +82,10 @@ define dso_local void @test_igeus_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_igeus_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigtsc.ll b/llvm/test/CodeGen/PowerPC/testComparesigtsc.ll
index 6d7abcebf607c43..9bfffccf63049c5 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigtsc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigtsc.ll
@@ -65,11 +65,11 @@ entry:
 define void @test_igtsc_store(i8 signext %a, i8 signext %b) {
 ; CHECK-LABEL: test_igtsc_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stb r3, 0(r5)
+; CHECK-NEXT:    stb r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp sgt i8 %a, %b
@@ -82,11 +82,11 @@ entry:
 define void @test_igtsc_sext_store(i8 signext %a, i8 signext %b) {
 ; CHECK-LABEL: test_igtsc_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stb r3, 0(r5)
+; CHECK-NEXT:    stb r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp sgt i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigtsi.ll b/llvm/test/CodeGen/PowerPC/testComparesigtsi.ll
index 6a900f1dc8e9d4c..8c33c0a84cf1695 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigtsi.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigtsi.ll
@@ -65,11 +65,11 @@ entry:
 define void @test_igtsi_store(i32 signext %a, i32 signext %b) {
 ; CHECK-LABEL: test_igtsi_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stw r3, 0(r5)
+; CHECK-NEXT:    stw r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp sgt i32 %a, %b
@@ -82,11 +82,11 @@ entry:
 define void @test_igtsi_sext_store(i32 signext %a, i32 signext %b) {
 ; CHECK-LABEL: test_igtsi_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stw r3, 0(r5)
+; CHECK-NEXT:    stw r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp sgt i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigtsll.ll b/llvm/test/CodeGen/PowerPC/testComparesigtsll.ll
index 27524a8ba4c2002..0af0d86fbed6ece 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigtsll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigtsll.ll
@@ -74,12 +74,12 @@ entry:
 define void @test_igtsll_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_igtsll_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sradi r6, r4, 63
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
-; CHECK-NEXT:    subc r4, r4, r3
-; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    ld r4, .LC0 at toc@l(r5)
-; CHECK-NEXT:    adde r3, r3, r6
+; CHECK-NEXT:    sradi r5, r4, 63
+; CHECK-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-NEXT:    subc r3, r4, r3
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    adde r3, r6, r5
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    std r3, 0(r4)
 ; CHECK-NEXT:    blr
@@ -94,12 +94,12 @@ entry:
 define void @test_igtsll_sext_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_igtsll_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sradi r6, r4, 63
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
-; CHECK-NEXT:    subc r4, r4, r3
-; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    adde r3, r3, r6
-; CHECK-NEXT:    ld r4, .LC0 at toc@l(r5)
+; CHECK-NEXT:    sradi r5, r4, 63
+; CHECK-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-NEXT:    subc r3, r4, r3
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    adde r3, r6, r5
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
 ; CHECK-NEXT:    std r3, 0(r4)
@@ -116,10 +116,10 @@ entry:
 define void @test_igtsll_z_store(i64 %a) {
 ; CHECK-LABEL: test_igtsll_z_store:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi r4, r3, -1
+; CHECK-NEXT:    nor r3, r4, r3
 ; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-NEXT:    addi r5, r3, -1
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-NEXT:    nor r3, r5, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    std r3, 0(r4)
 ; CHECK-NEXT:    blr
@@ -134,10 +134,10 @@ entry:
 define void @test_igtsll_sext_z_store(i64 %a) {
 ; CHECK-LABEL: test_igtsll_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi r4, r3, -1
+; CHECK-NEXT:    nor r3, r4, r3
 ; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-NEXT:    addi r5, r3, -1
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-NEXT:    nor r3, r5, r3
 ; CHECK-NEXT:    sradi r3, r3, 63
 ; CHECK-NEXT:    std r3, 0(r4)
 ; CHECK-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigtss.ll b/llvm/test/CodeGen/PowerPC/testComparesigtss.ll
index ffef0f8710f98ca..725fa7004d38727 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigtss.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigtss.ll
@@ -65,11 +65,11 @@ entry:
 define void @test_igtss_store(i16 signext %a, i16 signext %b) {
 ; CHECK-LABEL: test_igtss_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    sth r3, 0(r5)
+; CHECK-NEXT:    sth r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp sgt i16 %a, %b
@@ -82,11 +82,11 @@ entry:
 define void @test_igtss_sext_store(i16 signext %a, i16 signext %b) {
 ; CHECK-LABEL: test_igtss_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    sth r3, 0(r5)
+; CHECK-NEXT:    sth r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp sgt i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigtuc.ll b/llvm/test/CodeGen/PowerPC/testComparesigtuc.ll
index 56bb6a1a972fad0..75ce15575a6aeff 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigtuc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigtuc.ll
@@ -67,11 +67,11 @@ entry:
 define void @test_igtuc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_igtuc_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stb r3, 0(r5)
+; CHECK-NEXT:    stb r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i8 %a, %b
@@ -84,11 +84,11 @@ entry:
 define void @test_igtuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_igtuc_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stb r3, 0(r5)
+; CHECK-NEXT:    stb r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i8 %a, %b
@@ -101,8 +101,8 @@ entry:
 define void @test_igtuc_z_store(i8 zeroext %a) {
 ; CHECK-LABEL: test_igtuc_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
@@ -119,10 +119,10 @@ entry:
 define void @test_igtuc_sext_z_store(i8 zeroext %a) {
 ; CHECK-LABEL: test_igtuc_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
-; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
 ; CHECK-NEXT:    stb r3, 0(r4)

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigtui.ll b/llvm/test/CodeGen/PowerPC/testComparesigtui.ll
index 9cb5dd5a26f32f4..5238420d5445aba 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigtui.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigtui.ll
@@ -67,11 +67,11 @@ entry:
 define void @test_igtui_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_igtui_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stw r3, 0(r5)
+; CHECK-NEXT:    stw r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i32 %a, %b
@@ -84,11 +84,11 @@ entry:
 define void @test_igtui_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_igtui_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stw r3, 0(r5)
+; CHECK-NEXT:    stw r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i32 %a, %b
@@ -101,8 +101,8 @@ entry:
 define void @test_igtui_z_store(i32 zeroext %a) {
 ; CHECK-LABEL: test_igtui_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
@@ -119,10 +119,10 @@ entry:
 define void @test_igtui_sext_z_store(i32 zeroext %a) {
 ; CHECK-LABEL: test_igtui_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
-; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
 ; CHECK-NEXT:    stw r3, 0(r4)

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesigtus.ll b/llvm/test/CodeGen/PowerPC/testComparesigtus.ll
index 1db4a854d09dd35..b9263b26bc0d790 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigtus.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigtus.ll
@@ -67,11 +67,11 @@ entry:
 define void @test_igtus_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_igtus_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    sth r3, 0(r5)
+; CHECK-NEXT:    sth r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i16 %a, %b
@@ -84,11 +84,11 @@ entry:
 define void @test_igtus_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_igtus_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    sth r3, 0(r5)
+; CHECK-NEXT:    sth r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i16 %a, %b
@@ -101,8 +101,8 @@ entry:
 define void @test_igtus_z_store(i16 zeroext %a) {
 ; CHECK-LABEL: test_igtus_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
@@ -119,10 +119,10 @@ entry:
 define void @test_igtus_sext_z_store(i16 zeroext %a) {
 ; CHECK-LABEL: test_igtus_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
-; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
 ; CHECK-NEXT:    sth r3, 0(r4)

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesilesc.ll b/llvm/test/CodeGen/PowerPC/testComparesilesc.ll
index 6e4fd87778136da..da55ace5a86bc66 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesilesc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesilesc.ll
@@ -71,19 +71,19 @@ define dso_local void @test_ilesc_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_ilesc_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ilesc_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i8 %a, %b
@@ -104,19 +104,19 @@ define dso_local void @test_ilesc_sext_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_ilesc_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ilesc_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesilesi.ll b/llvm/test/CodeGen/PowerPC/testComparesilesi.ll
index 400d2486c0f2a64..3bab7553049518d 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesilesi.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesilesi.ll
@@ -71,19 +71,19 @@ define dso_local void @test_ilesi_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_ilesi_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ilesi_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i32 %a, %b
@@ -104,19 +104,19 @@ define dso_local void @test_ilesi_sext_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_ilesi_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ilesi_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesilesll.ll b/llvm/test/CodeGen/PowerPC/testComparesilesll.ll
index 02d580b74b55716..3c60f67b8a40da9 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesilesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesilesll.ll
@@ -17,18 +17,18 @@ define dso_local signext i32 @test_ilesll(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_ilesll:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    sradi r5, r4, 63
-; CHECK-BE-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-BE-NEXT:    rldicl r5, r3, 1, 63
+; CHECK-BE-NEXT:    sradi r6, r4, 63
 ; CHECK-BE-NEXT:    subc r3, r4, r3
-; CHECK-BE-NEXT:    adde r3, r5, r6
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ilesll:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    sradi r5, r4, 63
-; CHECK-LE-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-LE-NEXT:    rldicl r5, r3, 1, 63
+; CHECK-LE-NEXT:    sradi r6, r4, 63
 ; CHECK-LE-NEXT:    subc r3, r4, r3
-; CHECK-LE-NEXT:    adde r3, r5, r6
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i64 %a, %b
@@ -47,19 +47,19 @@ define dso_local signext i32 @test_ilesll_sext(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_ilesll_sext:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    sradi r5, r4, 63
-; CHECK-BE-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-BE-NEXT:    rldicl r5, r3, 1, 63
+; CHECK-BE-NEXT:    sradi r6, r4, 63
 ; CHECK-BE-NEXT:    subc r3, r4, r3
-; CHECK-BE-NEXT:    adde r3, r5, r6
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    neg r3, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ilesll_sext:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    sradi r5, r4, 63
-; CHECK-LE-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-LE-NEXT:    rldicl r5, r3, 1, 63
+; CHECK-LE-NEXT:    sradi r6, r4, 63
 ; CHECK-LE-NEXT:    subc r3, r4, r3
-; CHECK-LE-NEXT:    adde r3, r5, r6
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    neg r3, r3
 ; CHECK-LE-NEXT:    blr
 entry:
@@ -132,22 +132,22 @@ define dso_local void @test_ilesll_store(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_ilesll_store:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    rldicl r5, r3, 1, 63
 ; CHECK-BE-NEXT:    sradi r6, r4, 63
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
-; CHECK-BE-NEXT:    subc r4, r4, r3
-; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-BE-NEXT:    adde r3, r6, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    subc r3, r4, r3
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    adde r3, r6, r5
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ilesll_store:
 ; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    rldicl r5, r3, 1, 63
 ; CHECK-LE-NEXT:    sradi r6, r4, 63
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
-; CHECK-LE-NEXT:    subc r4, r4, r3
-; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-LE-NEXT:    adde r3, r6, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    subc r3, r4, r3
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    adde r3, r6, r5
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i64 %a, %b
@@ -169,24 +169,24 @@ define dso_local void @test_ilesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_ilesll_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    rldicl r5, r3, 1, 63
 ; CHECK-BE-NEXT:    sradi r6, r4, 63
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
-; CHECK-BE-NEXT:    subc r4, r4, r3
-; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-BE-NEXT:    adde r3, r6, r3
+; CHECK-BE-NEXT:    subc r3, r4, r3
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ilesll_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    rldicl r5, r3, 1, 63
 ; CHECK-LE-NEXT:    sradi r6, r4, 63
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
-; CHECK-LE-NEXT:    subc r4, r4, r3
-; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-LE-NEXT:    adde r3, r6, r3
+; CHECK-LE-NEXT:    subc r3, r4, r3
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i64 %a, %b
@@ -206,18 +206,18 @@ define dso_local void @test_ilesll_z_store(i64 %a) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_ilesll_z_store:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addi r5, r3, -1
+; CHECK-BE-NEXT:    addi r4, r3, -1
+; CHECK-BE-NEXT:    or r3, r4, r3
 ; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-BE-NEXT:    or r3, r5, r3
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ilesll_z_store:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    addi r5, r3, -1
+; CHECK-LE-NEXT:    addi r4, r3, -1
+; CHECK-LE-NEXT:    or r3, r4, r3
 ; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-LE-NEXT:    or r3, r5, r3
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
@@ -239,18 +239,18 @@ define dso_local void @test_ilesll_sext_z_store(i64 %a) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_ilesll_sext_z_store:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addi r5, r3, -1
+; CHECK-BE-NEXT:    addi r4, r3, -1
+; CHECK-BE-NEXT:    or r3, r4, r3
 ; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-BE-NEXT:    or r3, r5, r3
 ; CHECK-BE-NEXT:    sradi r3, r3, 63
 ; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ilesll_sext_z_store:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    addi r5, r3, -1
+; CHECK-LE-NEXT:    addi r4, r3, -1
+; CHECK-LE-NEXT:    or r3, r4, r3
 ; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-LE-NEXT:    or r3, r5, r3
 ; CHECK-LE-NEXT:    sradi r3, r3, 63
 ; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiless.ll b/llvm/test/CodeGen/PowerPC/testComparesiless.ll
index 5bdf9099f0be59c..581fe0d5333b96d 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiless.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiless.ll
@@ -71,19 +71,19 @@ define dso_local void @test_iless_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_iless_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iless_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i16 %a, %b
@@ -104,19 +104,19 @@ define dso_local void @test_iless_sext_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_iless_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iless_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesileuc.ll b/llvm/test/CodeGen/PowerPC/testComparesileuc.ll
index fcbf82731d84ff4..1b6d1ae17c9b019 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesileuc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesileuc.ll
@@ -68,10 +68,10 @@ define dso_local void @test_ileuc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_ileuc_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i8 %a, %b
@@ -85,10 +85,10 @@ define dso_local void @test_ileuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_ileuc_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesileui.ll b/llvm/test/CodeGen/PowerPC/testComparesileui.ll
index 61fa814d957acf5..4c1efde742db963 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesileui.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesileui.ll
@@ -68,10 +68,10 @@ define dso_local void @test_ileui_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_ileui_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i32 %a, %b
@@ -85,10 +85,10 @@ define dso_local void @test_ileui_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_ileui_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesileull.ll b/llvm/test/CodeGen/PowerPC/testComparesileull.ll
index 77d87b303469d2d..ad87de9f081ce6d 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesileull.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesileull.ll
@@ -67,10 +67,10 @@ define dso_local void @test_ileull_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_ileull_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subc r4, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    subfe r3, r3, r3
 ; CHECK-NEXT:    addi r3, r3, 1
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i64 %a, %b
@@ -84,10 +84,10 @@ define dso_local void @test_ileull_sext_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_ileull_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subc r4, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    subfe r3, r3, r3
 ; CHECK-NEXT:    not r3, r3
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesileus.ll b/llvm/test/CodeGen/PowerPC/testComparesileus.ll
index df8303acc0d3628..31952081f3db9d1 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesileus.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesileus.ll
@@ -68,10 +68,10 @@ define dso_local void @test_ileus_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_ileus_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i16 %a, %b
@@ -85,10 +85,10 @@ define dso_local void @test_ileus_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_ileus_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiltsc.ll b/llvm/test/CodeGen/PowerPC/testComparesiltsc.ll
index 9d8a8374d28d2ed..97284bca4c4a973 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiltsc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiltsc.ll
@@ -51,9 +51,9 @@ define dso_local void @test_iltsc_store(i8 signext %a, i8 signext %b) {
 ; CHECK-LABEL: test_iltsc_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp slt i8 %a, %b
@@ -67,9 +67,9 @@ define dso_local void @test_iltsc_sext_store(i8 signext %a, i8 signext %b) {
 ; CHECK-LABEL: test_iltsc_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp slt i8 %a, %b
@@ -82,8 +82,8 @@ entry:
 define dso_local void @test_iltsc_sext_z_store(i8 signext %a) {
 ; CHECK-LABEL: test_iltsc_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    srwi r3, r3, 7
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiltsi.ll b/llvm/test/CodeGen/PowerPC/testComparesiltsi.ll
index c71827ad5610855..03419268774e0ce 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiltsi.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiltsi.ll
@@ -51,9 +51,9 @@ define dso_local void @test_iltsi_store(i32 signext %a, i32 signext %b) {
 ; CHECK-LABEL: test_iltsi_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp slt i32 %a, %b
@@ -67,9 +67,9 @@ define dso_local void @test_iltsi_sext_store(i32 signext %a, i32 signext %b) {
 ; CHECK-LABEL: test_iltsi_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp slt i32 %a, %b
@@ -82,8 +82,8 @@ entry:
 define dso_local void @test_iltsi_sext_z_store(i32 signext %a) {
 ; CHECK-LABEL: test_iltsi_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    srawi r3, r3, 31
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiltsll.ll b/llvm/test/CodeGen/PowerPC/testComparesiltsll.ll
index eb989717460105a..ee8f995df4f84cb 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiltsll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiltsll.ll
@@ -57,13 +57,13 @@ entry:
 define dso_local void @test_iltsll_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_iltsll_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sradi r6, r3, 63
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    sradi r5, r3, 63
+; CHECK-NEXT:    rldicl r6, r4, 1, 63
 ; CHECK-NEXT:    subc r3, r3, r4
-; CHECK-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-NEXT:    adde r3, r3, r6
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp slt i64 %a, %b
@@ -76,14 +76,14 @@ entry:
 define dso_local void @test_iltsll_sext_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_iltsll_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sradi r6, r3, 63
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    sradi r5, r3, 63
+; CHECK-NEXT:    rldicl r6, r4, 1, 63
 ; CHECK-NEXT:    subc r3, r3, r4
-; CHECK-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-NEXT:    adde r3, r3, r6
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp slt i64 %a, %b
@@ -96,8 +96,8 @@ entry:
 define dso_local void @test_iltsll_sext_z_store(i64 %a) {
 ; CHECK-LABEL: test_iltsll_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sradi r3, r3, 63
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiltss.ll b/llvm/test/CodeGen/PowerPC/testComparesiltss.ll
index aa07cd6b87e44e5..958d58b695807f5 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiltss.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiltss.ll
@@ -51,9 +51,9 @@ define dso_local void @test_iltss_store(i16 signext %a, i16 signext %b) {
 ; CHECK-LABEL: test_iltss_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp slt i16 %a, %b
@@ -67,9 +67,9 @@ define dso_local void @test_iltss_sext_store(i16 signext %a, i16 signext %b) {
 ; CHECK-LABEL: test_iltss_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp slt i16 %a, %b
@@ -82,8 +82,8 @@ entry:
 define dso_local void @test_iltss_sext_z_store(i16 signext %a) {
 ; CHECK-LABEL: test_iltss_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    srwi r3, r3, 15
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiltuc.ll b/llvm/test/CodeGen/PowerPC/testComparesiltuc.ll
index f84a9a003068277..bcead8d97d6d2a0 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiltuc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiltuc.ll
@@ -39,9 +39,9 @@ define dso_local void @test_iltuc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_iltuc_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i8 %a, %b
@@ -55,9 +55,9 @@ define dso_local void @test_iltuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_iltuc_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiltui.ll b/llvm/test/CodeGen/PowerPC/testComparesiltui.ll
index 388b94a743f8670..424ae39a8ee6450 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiltui.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiltui.ll
@@ -39,9 +39,9 @@ define dso_local void @test_iltui_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_iltui_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i32 %a, %b
@@ -55,9 +55,9 @@ define dso_local void @test_iltui_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_iltui_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiltus.ll b/llvm/test/CodeGen/PowerPC/testComparesiltus.ll
index 086be821e896530..38cc3dec6c444ef 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiltus.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiltus.ll
@@ -39,9 +39,9 @@ define dso_local void @test_iltus_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_iltus_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i16 %a, %b
@@ -55,9 +55,9 @@ define dso_local void @test_iltus_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_iltus_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesinesc.ll b/llvm/test/CodeGen/PowerPC/testComparesinesc.ll
index da3d138bfbad9c9..1097f21e809ec17 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesinesc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesinesc.ll
@@ -137,21 +137,21 @@ define dso_local void @test_inesc_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_inesc_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_inesc_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i8 %a, %b
@@ -174,23 +174,23 @@ define dso_local void @test_inesc_sext_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_inesc_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_inesc_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesinesi.ll b/llvm/test/CodeGen/PowerPC/testComparesinesi.ll
index 6a6fbb829707724..3cd7895c5392b8f 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesinesi.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesinesi.ll
@@ -137,21 +137,21 @@ define dso_local void @test_inesi_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_inesi_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_inesi_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i32 %a, %b
@@ -174,23 +174,23 @@ define dso_local void @test_inesi_sext_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_inesi_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_inesi_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesinesll.ll b/llvm/test/CodeGen/PowerPC/testComparesinesll.ll
index 63475110d7c3a79..c1b72308eb82f81 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesinesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesinesll.ll
@@ -118,19 +118,19 @@ define dso_local void @test_inesll_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_inesll_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    addic r4, r3, -1
 ; CHECK-BE-NEXT:    subfe r3, r4, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_inesll_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    addic r4, r3, -1
 ; CHECK-LE-NEXT:    subfe r3, r4, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i64 %a, %b
@@ -151,19 +151,19 @@ define dso_local void @test_inesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_inesll_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    subfic r3, r3, 0
 ; CHECK-BE-NEXT:    subfe r3, r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_inesll_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    subfic r3, r3, 0
 ; CHECK-LE-NEXT:    subfe r3, r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i64 %a, %b
@@ -182,17 +182,17 @@ define dso_local void @test_inesll_z_store(i64 %a) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_inesll_z_store:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addic r5, r3, -1
+; CHECK-BE-NEXT:    addic r4, r3, -1
+; CHECK-BE-NEXT:    subfe r3, r4, r3
 ; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-BE-NEXT:    subfe r3, r5, r3
 ; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_inesll_z_store:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    addic r5, r3, -1
+; CHECK-LE-NEXT:    addic r4, r3, -1
+; CHECK-LE-NEXT:    subfe r3, r4, r3
 ; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-LE-NEXT:    subfe r3, r5, r3
 ; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesiness.ll b/llvm/test/CodeGen/PowerPC/testComparesiness.ll
index c982278a174ec7e..06452870ed2680b 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiness.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiness.ll
@@ -137,21 +137,21 @@ define dso_local void @test_iness_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_iness_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iness_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i16 %a, %b
@@ -174,23 +174,23 @@ define dso_local void @test_iness_sext_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_iness_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_iness_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesineuc.ll b/llvm/test/CodeGen/PowerPC/testComparesineuc.ll
index 61040d356556ad6..7cc5bc2ccabea3c 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesineuc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesineuc.ll
@@ -136,21 +136,21 @@ define dso_local void @test_ineuc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-BE-LABEL: test_ineuc_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ineuc_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i8 %a, %b
@@ -173,23 +173,23 @@ define dso_local void @test_ineuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-BE-LABEL: test_ineuc_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ineuc_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesineui.ll b/llvm/test/CodeGen/PowerPC/testComparesineui.ll
index 43cfc2e05c7a5c5..30492192b1e3070 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesineui.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesineui.ll
@@ -137,21 +137,21 @@ define dso_local void @test_ineui_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-BE-LABEL: test_ineui_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ineui_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i32 %a, %b
@@ -174,23 +174,23 @@ define dso_local void @test_ineui_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-BE-LABEL: test_ineui_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ineui_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesineull.ll b/llvm/test/CodeGen/PowerPC/testComparesineull.ll
index 0c2eac4098e7013..562747597e3dd4f 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesineull.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesineull.ll
@@ -118,19 +118,19 @@ define dso_local void @test_ineull_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_ineull_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    addic r4, r3, -1
 ; CHECK-BE-NEXT:    subfe r3, r4, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ineull_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    addic r4, r3, -1
 ; CHECK-LE-NEXT:    subfe r3, r4, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i64 %a, %b
@@ -151,19 +151,19 @@ define dso_local void @test_ineull_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_ineull_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    subfic r3, r3, 0
 ; CHECK-BE-NEXT:    subfe r3, r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ineull_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    subfic r3, r3, 0
 ; CHECK-LE-NEXT:    subfe r3, r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i64 %a, %b
@@ -182,17 +182,17 @@ define dso_local void @test_ineull_z_store(i64 %a) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_ineull_z_store:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addic r5, r3, -1
+; CHECK-BE-NEXT:    addic r4, r3, -1
+; CHECK-BE-NEXT:    subfe r3, r4, r3
 ; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-BE-NEXT:    subfe r3, r5, r3
 ; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ineull_z_store:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    addic r5, r3, -1
+; CHECK-LE-NEXT:    addic r4, r3, -1
+; CHECK-LE-NEXT:    subfe r3, r4, r3
 ; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-LE-NEXT:    subfe r3, r5, r3
 ; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesineus.ll b/llvm/test/CodeGen/PowerPC/testComparesineus.ll
index e96bb94bd13d456..22077f59ca1d2d4 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesineus.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesineus.ll
@@ -137,21 +137,21 @@ define dso_local void @test_ineus_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-BE-LABEL: test_ineus_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ineus_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i16 %a, %b
@@ -174,23 +174,23 @@ define dso_local void @test_ineus_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-BE-LABEL: test_ineus_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    xori r3, r3, 1
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_ineus_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    xori r3, r3, 1
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testCompareslleqsc.ll b/llvm/test/CodeGen/PowerPC/testCompareslleqsc.ll
index 718921d281a707e..6184fe6845b5055 100644
--- a/llvm/test/CodeGen/PowerPC/testCompareslleqsc.ll
+++ b/llvm/test/CodeGen/PowerPC/testCompareslleqsc.ll
@@ -130,19 +130,19 @@ define dso_local void @test_lleqsc_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_lleqsc_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lleqsc_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i8 %a, %b
@@ -165,21 +165,21 @@ define dso_local void @test_lleqsc_sext_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_lleqsc_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lleqsc_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testCompareslleqsi.ll b/llvm/test/CodeGen/PowerPC/testCompareslleqsi.ll
index 368c0231242be13..c1d15c56bf9760f 100644
--- a/llvm/test/CodeGen/PowerPC/testCompareslleqsi.ll
+++ b/llvm/test/CodeGen/PowerPC/testCompareslleqsi.ll
@@ -129,19 +129,19 @@ define dso_local void @test_lleqsi_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_lleqsi_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lleqsi_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i32 %a, %b
@@ -164,21 +164,21 @@ define dso_local void @test_lleqsi_sext_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_lleqsi_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lleqsi_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testCompareslleqsll.ll b/llvm/test/CodeGen/PowerPC/testCompareslleqsll.ll
index 23216b95e0ef560..fd5f421ca50ef14 100644
--- a/llvm/test/CodeGen/PowerPC/testCompareslleqsll.ll
+++ b/llvm/test/CodeGen/PowerPC/testCompareslleqsll.ll
@@ -123,19 +123,19 @@ define dso_local void @test_lleqsll_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_lleqsll_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzd r3, r3
 ; CHECK-BE-NEXT:    rldicl r3, r3, 58, 63
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lleqsll_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzd r3, r3
 ; CHECK-LE-NEXT:    rldicl r3, r3, 58, 63
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i64 %a, %b
@@ -157,19 +157,19 @@ define dso_local void @test_lleqsll_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_lleqsll_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    addic r3, r3, -1
 ; CHECK-BE-NEXT:    subfe r3, r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lleqsll_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    addic r3, r3, -1
 ; CHECK-LE-NEXT:    subfe r3, r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testCompareslleqss.ll b/llvm/test/CodeGen/PowerPC/testCompareslleqss.ll
index 9ede3bfb3f03956..d4d38232adb0146 100644
--- a/llvm/test/CodeGen/PowerPC/testCompareslleqss.ll
+++ b/llvm/test/CodeGen/PowerPC/testCompareslleqss.ll
@@ -129,19 +129,19 @@ define dso_local void @test_lleqss_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_lleqss_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lleqss_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i16 %a, %b
@@ -164,21 +164,21 @@ define dso_local void @test_lleqss_sext_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_lleqss_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lleqss_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllequc.ll b/llvm/test/CodeGen/PowerPC/testComparesllequc.ll
index b984283172dc464..68dc0bde819428b 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllequc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllequc.ll
@@ -129,19 +129,19 @@ define dso_local void @test_llequc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-BE-LABEL: test_llequc_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llequc_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i8 %a, %b
@@ -164,21 +164,21 @@ define dso_local void @test_llequc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-BE-LABEL: test_llequc_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llequc_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllequi.ll b/llvm/test/CodeGen/PowerPC/testComparesllequi.ll
index 46d7d40c7d6ef94..70e9e62d8424943 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllequi.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllequi.ll
@@ -129,19 +129,19 @@ define dso_local void @test_llequi_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-BE-LABEL: test_llequi_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llequi_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i32 %a, %b
@@ -164,21 +164,21 @@ define dso_local void @test_llequi_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-BE-LABEL: test_llequi_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llequi_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllequll.ll b/llvm/test/CodeGen/PowerPC/testComparesllequll.ll
index 367f3200ad1db13..081b1b0d6c390a2 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllequll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllequll.ll
@@ -123,19 +123,19 @@ define dso_local void @test_llequll_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_llequll_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzd r3, r3
 ; CHECK-BE-NEXT:    rldicl r3, r3, 58, 63
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llequll_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzd r3, r3
 ; CHECK-LE-NEXT:    rldicl r3, r3, 58, 63
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i64 %a, %b
@@ -157,19 +157,19 @@ define dso_local void @test_llequll_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_llequll_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    addic r3, r3, -1
 ; CHECK-BE-NEXT:    subfe r3, r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llequll_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    addic r3, r3, -1
 ; CHECK-LE-NEXT:    subfe r3, r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllequs.ll b/llvm/test/CodeGen/PowerPC/testComparesllequs.ll
index abc8b623e9b633e..37816e9b2524308 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllequs.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllequs.ll
@@ -129,19 +129,19 @@ define dso_local void @test_llequs_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-BE-LABEL: test_llequs_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llequs_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i16 %a, %b
@@ -164,21 +164,21 @@ define dso_local void @test_llequs_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-BE-LABEL: test_llequs_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    cntlzw r3, r3
 ; CHECK-BE-NEXT:    srwi r3, r3, 5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llequs_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    cntlzw r3, r3
 ; CHECK-LE-NEXT:    srwi r3, r3, 5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp eq i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgesc.ll b/llvm/test/CodeGen/PowerPC/testComparesllgesc.ll
index faebfadf2f5d106..6802a5d3e823a18 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgesc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgesc.ll
@@ -71,19 +71,19 @@ define dso_local void @test_llgesc_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_llgesc_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llgesc_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i8 %a, %b
@@ -104,19 +104,19 @@ define dso_local void @test_llgesc_sext_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_llgesc_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llgesc_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgesi.ll b/llvm/test/CodeGen/PowerPC/testComparesllgesi.ll
index c2f34c403fcc145..d500fb39c1c4909 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgesi.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgesi.ll
@@ -71,19 +71,19 @@ define dso_local void @test_llgesi_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_llgesi_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llgesi_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i32 %a, %b
@@ -104,19 +104,19 @@ define dso_local void @test_llgesi_sext_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_llgesi_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llgesi_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll b/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll
index 2a96ceaf32e86be..abc2d05e51cf705 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll
@@ -17,18 +17,18 @@ define i64 @test_llgesll(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_llgesll:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    sradi r5, r3, 63
-; CHECK-BE-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-BE-NEXT:    rldicl r5, r4, 1, 63
+; CHECK-BE-NEXT:    sradi r6, r3, 63
 ; CHECK-BE-NEXT:    subc r3, r3, r4
-; CHECK-BE-NEXT:    adde r3, r5, r6
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llgesll:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    sradi r5, r3, 63
-; CHECK-LE-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-LE-NEXT:    rldicl r5, r4, 1, 63
+; CHECK-LE-NEXT:    sradi r6, r3, 63
 ; CHECK-LE-NEXT:    subc r3, r3, r4
-; CHECK-LE-NEXT:    adde r3, r5, r6
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i64 %a, %b
@@ -47,19 +47,19 @@ define i64 @test_llgesll_sext(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_llgesll_sext:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    sradi r5, r3, 63
-; CHECK-BE-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-BE-NEXT:    rldicl r5, r4, 1, 63
+; CHECK-BE-NEXT:    sradi r6, r3, 63
 ; CHECK-BE-NEXT:    subc r3, r3, r4
-; CHECK-BE-NEXT:    adde r3, r5, r6
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    neg r3, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llgesll_sext:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    sradi r5, r3, 63
-; CHECK-LE-NEXT:    rldicl r6, r4, 1, 63
+; CHECK-LE-NEXT:    rldicl r5, r4, 1, 63
+; CHECK-LE-NEXT:    sradi r6, r3, 63
 ; CHECK-LE-NEXT:    subc r3, r3, r4
-; CHECK-LE-NEXT:    adde r3, r5, r6
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    neg r3, r3
 ; CHECK-LE-NEXT:    blr
 entry:
@@ -126,22 +126,22 @@ define dso_local void @test_llgesll_store(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_llgesll_store:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    rldicl r5, r4, 1, 63
 ; CHECK-BE-NEXT:    sradi r6, r3, 63
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    subc r3, r3, r4
-; CHECK-BE-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-BE-NEXT:    adde r3, r6, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    adde r3, r6, r5
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llgesll_store:
 ; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    rldicl r5, r4, 1, 63
 ; CHECK-LE-NEXT:    sradi r6, r3, 63
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    subc r3, r3, r4
-; CHECK-LE-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-LE-NEXT:    adde r3, r6, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    adde r3, r6, r5
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i64 %a, %b
@@ -163,24 +163,24 @@ define dso_local void @test_llgesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_llgesll_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    rldicl r5, r4, 1, 63
 ; CHECK-BE-NEXT:    sradi r6, r3, 63
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    subc r3, r3, r4
-; CHECK-BE-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-BE-NEXT:    adde r3, r6, r3
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llgesll_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    rldicl r5, r4, 1, 63
 ; CHECK-LE-NEXT:    sradi r6, r3, 63
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    subc r3, r3, r4
-; CHECK-LE-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-LE-NEXT:    adde r3, r6, r3
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgess.ll b/llvm/test/CodeGen/PowerPC/testComparesllgess.ll
index 2205d1a4ac8488c..8cf3d809d917367 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgess.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgess.ll
@@ -71,19 +71,19 @@ define dso_local void @test_llgess_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_llgess_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llgess_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i16 %a, %b
@@ -104,19 +104,19 @@ define dso_local void @test_llgess_sext_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_llgess_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llgess_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sge i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgeuc.ll b/llvm/test/CodeGen/PowerPC/testComparesllgeuc.ll
index 509104931f0190e..14519472ad01d9e 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgeuc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgeuc.ll
@@ -65,10 +65,10 @@ define dso_local void @test_llgeuc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_llgeuc_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i8 %a, %b
@@ -82,10 +82,10 @@ define dso_local void @test_llgeuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_llgeuc_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgeui.ll b/llvm/test/CodeGen/PowerPC/testComparesllgeui.ll
index b6b07e478f81b1d..6161109dbf92348 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgeui.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgeui.ll
@@ -65,10 +65,10 @@ define dso_local void @test_llgeui_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_llgeui_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i32 %a, %b
@@ -82,10 +82,10 @@ define dso_local void @test_llgeui_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_llgeui_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgeull.ll b/llvm/test/CodeGen/PowerPC/testComparesllgeull.ll
index 1bd5509fc1ef09e..b1b24f89eb826e2 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgeull.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgeull.ll
@@ -65,10 +65,10 @@ define dso_local void @test_llgeull_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llgeull_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subc r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-NEXT:    subfe r3, r4, r4
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    addi r3, r3, 1
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i64 %a, %b
@@ -82,10 +82,10 @@ define dso_local void @test_llgeull_sext_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llgeull_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subc r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-NEXT:    subfe r3, r4, r4
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgeus.ll b/llvm/test/CodeGen/PowerPC/testComparesllgeus.ll
index 011df13e2b4e868..a22def4beaf709d 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgeus.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgeus.ll
@@ -65,10 +65,10 @@ define dso_local void @test_llgeus_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_llgeus_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i16 %a, %b
@@ -82,10 +82,10 @@ define dso_local void @test_llgeus_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_llgeus_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp uge i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgtsll.ll b/llvm/test/CodeGen/PowerPC/testComparesllgtsll.ll
index 105692ab717aa64..e712ce32d06bcc2 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgtsll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgtsll.ll
@@ -74,12 +74,12 @@ entry:
 define void @test_llgtsll_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llgtsll_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sradi r6, r4, 63
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
-; CHECK-NEXT:    subc r4, r4, r3
-; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    ld r4, .LC0 at toc@l(r5)
-; CHECK-NEXT:    adde r3, r3, r6
+; CHECK-NEXT:    sradi r5, r4, 63
+; CHECK-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-NEXT:    subc r3, r4, r3
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    adde r3, r6, r5
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    std r3, 0(r4)
 ; CHECK-NEXT:    blr
@@ -94,12 +94,12 @@ entry:
 define void @test_llgtsll_sext_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llgtsll_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sradi r6, r4, 63
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
-; CHECK-NEXT:    subc r4, r4, r3
-; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    adde r3, r3, r6
-; CHECK-NEXT:    ld r4, .LC0 at toc@l(r5)
+; CHECK-NEXT:    sradi r5, r4, 63
+; CHECK-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-NEXT:    subc r3, r4, r3
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    adde r3, r6, r5
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
 ; CHECK-NEXT:    std r3, 0(r4)
@@ -116,10 +116,10 @@ entry:
 define void @test_llgtsll_z_store(i64 %a) {
 ; CHECK-LABEL: test_llgtsll_z_store:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi r4, r3, -1
+; CHECK-NEXT:    nor r3, r4, r3
 ; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-NEXT:    addi r5, r3, -1
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-NEXT:    nor r3, r5, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    std r3, 0(r4)
 ; CHECK-NEXT:    blr
@@ -134,10 +134,10 @@ entry:
 define void @test_llgtsll_sext_z_store(i64 %a) {
 ; CHECK-LABEL: test_llgtsll_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi r4, r3, -1
+; CHECK-NEXT:    nor r3, r4, r3
 ; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
-; CHECK-NEXT:    addi r5, r3, -1
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
-; CHECK-NEXT:    nor r3, r5, r3
 ; CHECK-NEXT:    sradi r3, r3, 63
 ; CHECK-NEXT:    std r3, 0(r4)
 ; CHECK-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgtuc.ll b/llvm/test/CodeGen/PowerPC/testComparesllgtuc.ll
index 919d8fdf501c1c8..8109db523b975a9 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgtuc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgtuc.ll
@@ -67,11 +67,11 @@ entry:
 define void @test_llgtuc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_llgtuc_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stb r3, 0(r5)
+; CHECK-NEXT:    stb r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i8 %a, %b
@@ -84,11 +84,11 @@ entry:
 define void @test_llgtuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_llgtuc_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stb r3, 0(r5)
+; CHECK-NEXT:    stb r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i8 %a, %b
@@ -101,8 +101,8 @@ entry:
 define void @test_llgtuc_z_store(i8 zeroext %a) {
 ; CHECK-LABEL: test_llgtuc_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
@@ -119,10 +119,10 @@ entry:
 define void @test_llgtuc_sext_z_store(i8 zeroext %a) {
 ; CHECK-LABEL: test_llgtuc_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
-; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
 ; CHECK-NEXT:    stb r3, 0(r4)

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgtui.ll b/llvm/test/CodeGen/PowerPC/testComparesllgtui.ll
index d1c3a232c98dc53..0fa52c4fb400450 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgtui.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgtui.ll
@@ -67,11 +67,11 @@ entry:
 define void @test_llgtui_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_llgtui_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stw r3, 0(r5)
+; CHECK-NEXT:    stw r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i32 %a, %b
@@ -84,11 +84,11 @@ entry:
 define void @test_llgtui_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_llgtui_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stw r3, 0(r5)
+; CHECK-NEXT:    stw r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i32 %a, %b
@@ -101,8 +101,8 @@ entry:
 define void @test_llgtui_z_store(i32 zeroext %a) {
 ; CHECK-LABEL: test_llgtui_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
@@ -119,10 +119,10 @@ entry:
 define void @test_llgtui_sext_z_store(i32 zeroext %a) {
 ; CHECK-LABEL: test_llgtui_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
-; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
 ; CHECK-NEXT:    stw r3, 0(r4)

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllgtus.ll b/llvm/test/CodeGen/PowerPC/testComparesllgtus.ll
index 88338d51ee2ad55..543d82100437242 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgtus.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgtus.ll
@@ -67,11 +67,11 @@ entry:
 define void @test_llgtus_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_llgtus_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    sth r3, 0(r5)
+; CHECK-NEXT:    sth r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i16 %a, %b
@@ -84,11 +84,11 @@ entry:
 define void @test_llgtus_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_llgtus_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    sth r3, 0(r5)
+; CHECK-NEXT:    sth r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ugt i16 %a, %b
@@ -101,8 +101,8 @@ entry:
 define void @test_llgtus_z_store(i16 zeroext %a) {
 ; CHECK-LABEL: test_llgtus_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
@@ -119,10 +119,10 @@ entry:
 define void @test_llgtus_sext_z_store(i16 zeroext %a) {
 ; CHECK-LABEL: test_llgtus_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
-; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-NEXT:    srwi r3, r3, 5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
 ; CHECK-NEXT:    sth r3, 0(r4)

diff  --git a/llvm/test/CodeGen/PowerPC/testCompareslllesc.ll b/llvm/test/CodeGen/PowerPC/testCompareslllesc.ll
index 2fa4933658f4a5d..4799d44893445e3 100644
--- a/llvm/test/CodeGen/PowerPC/testCompareslllesc.ll
+++ b/llvm/test/CodeGen/PowerPC/testCompareslllesc.ll
@@ -72,19 +72,19 @@ define dso_local void @test_lllesc_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_lllesc_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lllesc_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i8 %a, %b
@@ -105,19 +105,19 @@ define dso_local void @test_lllesc_sext_store(i8 signext %a, i8 signext %b) {
 ; CHECK-BE-LABEL: test_lllesc_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lllesc_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testCompareslllesi.ll b/llvm/test/CodeGen/PowerPC/testCompareslllesi.ll
index fe4b6ffa4b4bb6c..d016c6ae1691971 100644
--- a/llvm/test/CodeGen/PowerPC/testCompareslllesi.ll
+++ b/llvm/test/CodeGen/PowerPC/testCompareslllesi.ll
@@ -72,19 +72,19 @@ define dso_local void @test_lllesi_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_lllesi_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lllesi_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i32 %a, %b
@@ -105,19 +105,19 @@ define dso_local void @test_lllesi_sext_store(i32 signext %a, i32 signext %b) {
 ; CHECK-BE-LABEL: test_lllesi_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lllesi_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testCompareslllesll.ll b/llvm/test/CodeGen/PowerPC/testCompareslllesll.ll
index 065253d0d985168..7e877a3bb04e9a7 100644
--- a/llvm/test/CodeGen/PowerPC/testCompareslllesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testCompareslllesll.ll
@@ -18,18 +18,18 @@ define i64 @test_lllesll(i64 %a, i64 %b)  {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_lllesll:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    sradi r5, r4, 63
-; CHECK-BE-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-BE-NEXT:    rldicl r5, r3, 1, 63
+; CHECK-BE-NEXT:    sradi r6, r4, 63
 ; CHECK-BE-NEXT:    subc r3, r4, r3
-; CHECK-BE-NEXT:    adde r3, r5, r6
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lllesll:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    sradi r5, r4, 63
-; CHECK-LE-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-LE-NEXT:    rldicl r5, r3, 1, 63
+; CHECK-LE-NEXT:    sradi r6, r4, 63
 ; CHECK-LE-NEXT:    subc r3, r4, r3
-; CHECK-LE-NEXT:    adde r3, r5, r6
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i64 %a, %b
@@ -49,19 +49,19 @@ define i64 @test_lllesll_sext(i64 %a, i64 %b)  {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_lllesll_sext:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    sradi r5, r4, 63
-; CHECK-BE-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-BE-NEXT:    rldicl r5, r3, 1, 63
+; CHECK-BE-NEXT:    sradi r6, r4, 63
 ; CHECK-BE-NEXT:    subc r3, r4, r3
-; CHECK-BE-NEXT:    adde r3, r5, r6
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    neg r3, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lllesll_sext:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    sradi r5, r4, 63
-; CHECK-LE-NEXT:    rldicl r6, r3, 1, 63
+; CHECK-LE-NEXT:    rldicl r5, r3, 1, 63
+; CHECK-LE-NEXT:    sradi r6, r4, 63
 ; CHECK-LE-NEXT:    subc r3, r4, r3
-; CHECK-LE-NEXT:    adde r3, r5, r6
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    neg r3, r3
 ; CHECK-LE-NEXT:    blr
 entry:
@@ -137,22 +137,22 @@ define dso_local void @test_lllesll_store(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_lllesll_store:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    rldicl r5, r3, 1, 63
 ; CHECK-BE-NEXT:    sradi r6, r4, 63
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
-; CHECK-BE-NEXT:    subc r4, r4, r3
-; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-BE-NEXT:    adde r3, r6, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    subc r3, r4, r3
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    adde r3, r6, r5
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lllesll_store:
 ; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    rldicl r5, r3, 1, 63
 ; CHECK-LE-NEXT:    sradi r6, r4, 63
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
-; CHECK-LE-NEXT:    subc r4, r4, r3
-; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-LE-NEXT:    adde r3, r6, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    subc r3, r4, r3
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    adde r3, r6, r5
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i64 %a, %b
@@ -175,24 +175,24 @@ define dso_local void @test_lllesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_lllesll_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    rldicl r5, r3, 1, 63
 ; CHECK-BE-NEXT:    sradi r6, r4, 63
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
-; CHECK-BE-NEXT:    subc r4, r4, r3
-; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-BE-NEXT:    adde r3, r6, r3
+; CHECK-BE-NEXT:    subc r3, r4, r3
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    adde r3, r6, r5
 ; CHECK-BE-NEXT:    neg r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lllesll_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    rldicl r5, r3, 1, 63
 ; CHECK-LE-NEXT:    sradi r6, r4, 63
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
-; CHECK-LE-NEXT:    subc r4, r4, r3
-; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-LE-NEXT:    adde r3, r6, r3
+; CHECK-LE-NEXT:    subc r3, r4, r3
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    adde r3, r6, r5
 ; CHECK-LE-NEXT:    neg r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i64 %a, %b
@@ -213,18 +213,18 @@ define dso_local void @test_lllesll_z_store(i64 %a) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_lllesll_z_store:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addi r5, r3, -1
+; CHECK-BE-NEXT:    addi r4, r3, -1
+; CHECK-BE-NEXT:    or r3, r4, r3
 ; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-BE-NEXT:    or r3, r5, r3
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lllesll_z_store:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    addi r5, r3, -1
+; CHECK-LE-NEXT:    addi r4, r3, -1
+; CHECK-LE-NEXT:    or r3, r4, r3
 ; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-LE-NEXT:    or r3, r5, r3
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
@@ -247,18 +247,18 @@ define dso_local void @test_lllesll_sext_z_store(i64 %a) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_lllesll_sext_z_store:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addi r5, r3, -1
+; CHECK-BE-NEXT:    addi r4, r3, -1
+; CHECK-BE-NEXT:    or r3, r4, r3
 ; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-BE-NEXT:    or r3, r5, r3
 ; CHECK-BE-NEXT:    sradi r3, r3, 63
 ; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_lllesll_sext_z_store:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    addi r5, r3, -1
+; CHECK-LE-NEXT:    addi r4, r3, -1
+; CHECK-LE-NEXT:    or r3, r4, r3
 ; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-LE-NEXT:    or r3, r5, r3
 ; CHECK-LE-NEXT:    sradi r3, r3, 63
 ; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllless.ll b/llvm/test/CodeGen/PowerPC/testComparesllless.ll
index f8db96c8bb3b0b8..22d3d32b2880e78 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllless.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllless.ll
@@ -72,19 +72,19 @@ define dso_local void @test_llless_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_llless_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    xori r3, r3, 1
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llless_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    xori r3, r3, 1
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i16 %a, %b
@@ -105,19 +105,19 @@ define dso_local void @test_llless_sext_store(i16 signext %a, i16 signext %b) {
 ; CHECK-BE-LABEL: test_llless_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sub r3, r4, r3
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    addi r3, r3, -1
-; CHECK-BE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llless_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sub r3, r4, r3
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    addi r3, r3, -1
-; CHECK-LE-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp sle i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllleuc.ll b/llvm/test/CodeGen/PowerPC/testComparesllleuc.ll
index 6e9ce7c7c5ef299..cfaa6e8fe1d8302 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllleuc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllleuc.ll
@@ -68,10 +68,10 @@ define dso_local void @test_llleuc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_llleuc_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i8 %a, %b
@@ -85,10 +85,10 @@ define dso_local void @test_llleuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_llleuc_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllleui.ll b/llvm/test/CodeGen/PowerPC/testComparesllleui.ll
index bf94ab723e4340f..e438797cde772f0 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllleui.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllleui.ll
@@ -68,10 +68,10 @@ define dso_local void @test_llleui_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_llleui_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i32 %a, %b
@@ -85,10 +85,10 @@ define dso_local void @test_llleui_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_llleui_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    stw r3, glob at toc@l(r5)
+; CHECK-NEXT:    stw r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllleull.ll b/llvm/test/CodeGen/PowerPC/testComparesllleull.ll
index 881043b5ee1f6c0..bf000912435ac1d 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllleull.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllleull.ll
@@ -67,10 +67,10 @@ define dso_local void @test_llleull_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llleull_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subc r4, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    subfe r3, r3, r3
 ; CHECK-NEXT:    addi r3, r3, 1
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i64 %a, %b
@@ -84,10 +84,10 @@ define dso_local void @test_llleull_sext_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llleull_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subc r4, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    subfe r3, r3, r3
 ; CHECK-NEXT:    not r3, r3
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i64 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllleus.ll b/llvm/test/CodeGen/PowerPC/testComparesllleus.ll
index 3b3ec08bf9d5182..53b9b8a16d503b4 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllleus.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllleus.ll
@@ -68,10 +68,10 @@ define dso_local void @test_llleus_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_llleus_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i16 %a, %b
@@ -85,10 +85,10 @@ define dso_local void @test_llleus_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_llleus_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r4, r3
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    addi r3, r3, -1
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ule i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllltsll.ll b/llvm/test/CodeGen/PowerPC/testComparesllltsll.ll
index a1be92ec371b6ea..50a4945bbe09dc8 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllltsll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllltsll.ll
@@ -57,13 +57,13 @@ entry:
 define dso_local void @test_llltsll_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llltsll_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sradi r6, r3, 63
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    sradi r5, r3, 63
+; CHECK-NEXT:    rldicl r6, r4, 1, 63
 ; CHECK-NEXT:    subc r3, r3, r4
-; CHECK-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-NEXT:    adde r3, r3, r6
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp slt i64 %a, %b
@@ -76,14 +76,14 @@ entry:
 define dso_local void @test_llltsll_sext_store(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llltsll_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sradi r6, r3, 63
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    sradi r5, r3, 63
+; CHECK-NEXT:    rldicl r6, r4, 1, 63
 ; CHECK-NEXT:    subc r3, r3, r4
-; CHECK-NEXT:    rldicl r3, r4, 1, 63
-; CHECK-NEXT:    adde r3, r3, r6
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
-; CHECK-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp slt i64 %a, %b
@@ -96,8 +96,8 @@ entry:
 define dso_local void @test_llltsll_sext_z_store(i64 %a) {
 ; CHECK-LABEL: test_llltsll_sext_z_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sradi r3, r3, 63
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllltuc.ll b/llvm/test/CodeGen/PowerPC/testComparesllltuc.ll
index 5b068f9e5c46be7..0b8de6c3fc95371 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllltuc.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllltuc.ll
@@ -39,9 +39,9 @@ define dso_local void @test_llltuc_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_llltuc_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i8 %a, %b
@@ -55,9 +55,9 @@ define dso_local void @test_llltuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test_llltuc_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stb r3, glob at toc@l(r5)
+; CHECK-NEXT:    stb r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i8 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllltui.ll b/llvm/test/CodeGen/PowerPC/testComparesllltui.ll
index ebe05589bde2afe..ba48500f9200d97 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllltui.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllltui.ll
@@ -58,11 +58,11 @@ entry:
 define void @test_llltui_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_llltui_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    stw r3, 0(r5)
+; CHECK-NEXT:    stw r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i32 %a, %b
@@ -75,11 +75,11 @@ entry:
 define void @test_llltui_sext_store(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: test_llltui_sext_store:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    ld r5, .LC0 at toc@l(r5)
+; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    stw r3, 0(r5)
+; CHECK-NEXT:    stw r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i32 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllltus.ll b/llvm/test/CodeGen/PowerPC/testComparesllltus.ll
index 96b9f463cfc7cd0..d3fc88e5cd9e089 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllltus.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllltus.ll
@@ -39,9 +39,9 @@ define dso_local void @test_llltus_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_llltus_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i16 %a, %b
@@ -55,9 +55,9 @@ define dso_local void @test_llltus_sext_store(i16 zeroext %a, i16 zeroext %b) {
 ; CHECK-LABEL: test_llltus_sext_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sub r3, r3, r4
-; CHECK-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-NEXT:    sradi r3, r3, 63
-; CHECK-NEXT:    sth r3, glob at toc@l(r5)
+; CHECK-NEXT:    sth r3, glob at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp ult i16 %a, %b

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllnesll.ll b/llvm/test/CodeGen/PowerPC/testComparesllnesll.ll
index 47f051adf4986dd..0db36395c4c2a42 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllnesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllnesll.ll
@@ -118,19 +118,19 @@ define dso_local void @test_llnesll_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_llnesll_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    addic r4, r3, -1
 ; CHECK-BE-NEXT:    subfe r3, r4, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llnesll_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    addic r4, r3, -1
 ; CHECK-LE-NEXT:    subfe r3, r4, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i64 %a, %b
@@ -151,19 +151,19 @@ define dso_local void @test_llnesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_llnesll_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    subfic r3, r3, 0
 ; CHECK-BE-NEXT:    subfe r3, r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llnesll_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    subfic r3, r3, 0
 ; CHECK-LE-NEXT:    subfe r3, r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i64 %a, %b
@@ -182,17 +182,17 @@ define dso_local void @test_llnesll_z_store(i64 %a) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_llnesll_z_store:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addic r5, r3, -1
+; CHECK-BE-NEXT:    addic r4, r3, -1
+; CHECK-BE-NEXT:    subfe r3, r4, r3
 ; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-BE-NEXT:    subfe r3, r5, r3
 ; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llnesll_z_store:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    addic r5, r3, -1
+; CHECK-LE-NEXT:    addic r4, r3, -1
+; CHECK-LE-NEXT:    subfe r3, r4, r3
 ; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-LE-NEXT:    subfe r3, r5, r3
 ; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesllneull.ll b/llvm/test/CodeGen/PowerPC/testComparesllneull.ll
index bc2b2e0b237e7ff..928902f6e48938f 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllneull.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllneull.ll
@@ -118,19 +118,19 @@ define dso_local void @test_llneull_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_llneull_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    addic r4, r3, -1
 ; CHECK-BE-NEXT:    subfe r3, r4, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llneull_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    addic r4, r3, -1
 ; CHECK-LE-NEXT:    subfe r3, r4, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i64 %a, %b
@@ -151,19 +151,19 @@ define dso_local void @test_llneull_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE-LABEL: test_llneull_sext_store:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xor r3, r3, r4
-; CHECK-BE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-BE-NEXT:    subfic r3, r3, 0
 ; CHECK-BE-NEXT:    subfe r3, r3, r3
-; CHECK-BE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llneull_sext_store:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xor r3, r3, r4
-; CHECK-LE-NEXT:    addis r5, r2, glob at toc@ha
+; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
 ; CHECK-LE-NEXT:    subfic r3, r3, 0
 ; CHECK-LE-NEXT:    subfe r3, r3, r3
-; CHECK-LE-NEXT:    std r3, glob at toc@l(r5)
+; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:
   %cmp = icmp ne i64 %a, %b
@@ -182,17 +182,17 @@ define dso_local void @test_llneull_z_store(i64 %a) {
 ; CHECK-NEXT:    blr
 ; CHECK-BE-LABEL: test_llneull_z_store:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    addic r5, r3, -1
+; CHECK-BE-NEXT:    addic r4, r3, -1
+; CHECK-BE-NEXT:    subfe r3, r4, r3
 ; CHECK-BE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-BE-NEXT:    subfe r3, r5, r3
 ; CHECK-BE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test_llneull_z_store:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    addic r5, r3, -1
+; CHECK-LE-NEXT:    addic r4, r3, -1
+; CHECK-LE-NEXT:    subfe r3, r4, r3
 ; CHECK-LE-NEXT:    addis r4, r2, glob at toc@ha
-; CHECK-LE-NEXT:    subfe r3, r5, r3
 ; CHECK-LE-NEXT:    std r3, glob at toc@l(r4)
 ; CHECK-LE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll b/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
index e787bb4a682fe1f..945e0df72a6f898 100644
--- a/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
+++ b/llvm/test/CodeGen/PowerPC/tls-pie-xform.ll
@@ -39,9 +39,9 @@ define dso_local zeroext i8 @test_char_three(i8 zeroext %a) {
 ; CHECK-NEXT:    addis 4, 2, var_char at got@tprel at ha
 ; CHECK-NEXT:    ld 4, var_char at got@tprel at l(4)
 ; CHECK-NEXT:    lbzx 5, 4, var_char at tls
-; CHECK-NEXT:    add 5, 5, 3
-; CHECK-NEXT:    clrldi 3, 5, 56
-; CHECK-NEXT:    stbx 5, 4, var_char at tls
+; CHECK-NEXT:    add 3, 5, 3
+; CHECK-NEXT:    stbx 3, 4, var_char at tls
+; CHECK-NEXT:    clrldi 3, 3, 56
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i8, ptr @var_char, align 1, !tbaa !4
@@ -93,9 +93,9 @@ define dso_local signext i16 @test_short_three(i16 signext %a) {
 ; CHECK-NEXT:    addis 4, 2, var_short at got@tprel at ha
 ; CHECK-NEXT:    ld 4, var_short at got@tprel at l(4)
 ; CHECK-NEXT:    lhzx 5, 4, var_short at tls
-; CHECK-NEXT:    add 5, 5, 3
-; CHECK-NEXT:    extsh 3, 5
-; CHECK-NEXT:    sthx 5, 4, var_short at tls
+; CHECK-NEXT:    add 3, 5, 3
+; CHECK-NEXT:    sthx 3, 4, var_short at tls
+; CHECK-NEXT:    extsh 3, 3
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i16, ptr @var_short, align 2, !tbaa !7
@@ -146,9 +146,9 @@ define dso_local signext i32 @test_int_three(i32 signext %a) {
 ; CHECK-NEXT:    addis 4, 2, var_int at got@tprel at ha
 ; CHECK-NEXT:    ld 4, var_int at got@tprel at l(4)
 ; CHECK-NEXT:    lwzx 5, 4, var_int at tls
-; CHECK-NEXT:    add 5, 5, 3
-; CHECK-NEXT:    extsw 3, 5
-; CHECK-NEXT:    stwx 5, 4, var_int at tls
+; CHECK-NEXT:    add 3, 5, 3
+; CHECK-NEXT:    stwx 3, 4, var_int at tls
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i32, ptr @var_int, align 4, !tbaa !9

diff  --git a/llvm/test/CodeGen/PowerPC/toc-float.ll b/llvm/test/CodeGen/PowerPC/toc-float.ll
index d77105a9eadb58e..1d6f1f71a2383db 100644
--- a/llvm/test/CodeGen/PowerPC/toc-float.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-float.ll
@@ -53,9 +53,9 @@ define float @floatConstantArray() local_unnamed_addr  {
 ; CHECK-P8-LABEL: floatConstantArray:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis 3, 2, FArr at toc@ha+12
-; CHECK-P8-NEXT:    addis 4, 2, .LCPI2_0 at toc@ha
 ; CHECK-P8-NEXT:    lfs 0, FArr at toc@l+12(3)
-; CHECK-P8-NEXT:    lfs 1, .LCPI2_0 at toc@l(4)
+; CHECK-P8-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
+; CHECK-P8-NEXT:    lfs 1, .LCPI2_0 at toc@l(3)
 ; CHECK-P8-NEXT:    xsaddsp 1, 0, 1
 ; CHECK-P8-NEXT:    blr
   %1 = load float, ptr getelementptr inbounds ([10 x float], ptr @FArr, i64 0, i64 3), align 4
@@ -95,9 +95,9 @@ define double @doubleConstantArray()  {
 ; CHECK-P8-LABEL: doubleConstantArray:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis 3, 2, d at toc@ha+24
-; CHECK-P8-NEXT:    addis 4, 2, .LCPI4_0 at toc@ha
 ; CHECK-P8-NEXT:    lfd 0, d at toc@l+24(3)
-; CHECK-P8-NEXT:    lfd 1, .LCPI4_0 at toc@l(4)
+; CHECK-P8-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
+; CHECK-P8-NEXT:    lfd 1, .LCPI4_0 at toc@l(3)
 ; CHECK-P8-NEXT:    xsadddp 1, 0, 1
 ; CHECK-P8-NEXT:    blr
   %1 = load double, ptr getelementptr inbounds ([200 x double], ptr @d, i64 0, i64 3), align 8
@@ -125,11 +125,11 @@ define double @doubleLargeConstantArray()  {
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    addis 3, 2, arr at toc@ha
 ; CHECK-P8-NEXT:    li 4, 0
-; CHECK-P8-NEXT:    addis 5, 2, .LCPI5_0 at toc@ha
 ; CHECK-P8-NEXT:    addi 3, 3, arr at toc@l
 ; CHECK-P8-NEXT:    ori 4, 4, 32768
-; CHECK-P8-NEXT:    lfd 1, .LCPI5_0 at toc@l(5)
 ; CHECK-P8-NEXT:    lfdx 0, 3, 4
+; CHECK-P8-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
+; CHECK-P8-NEXT:    lfd 1, .LCPI5_0 at toc@l(3)
 ; CHECK-P8-NEXT:    xsadddp 1, 0, 1
 ; CHECK-P8-NEXT:    blr
   %1 = load double, ptr getelementptr inbounds ([20000 x double], ptr @arr, i64 0, i64 4096), align 8

diff  --git a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
index 039933c87101429..abda0c897a7cf99 100644
--- a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
+++ b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
@@ -14,11 +14,11 @@ define dso_local void @test(ptr nocapture %fp, i32 signext %Arg, i32 signext %Le
 ; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    mr r30, r4
 ; CHECK-NEXT:    cmpwi r5, 1
-; CHECK-NEXT:    mr r29, r3
+; CHECK-NEXT:    mr r30, r4
 ; CHECK-NEXT:    std r2, 24(r1)
 ; CHECK-NEXT:    std r0, 80(r1)
+; CHECK-NEXT:    mr r29, r3
 ; CHECK-NEXT:    bc 12, lt, .LBB0_4
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    cmpwi r30, 11

diff  --git a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
index cc218d15987fefc..b7590530b6652cc 100644
--- a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
+++ b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
@@ -90,12 +90,12 @@ define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) {
 ; P8BE-LABEL: test2:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    xxsldwi vs0, v2, v2, 3
-; P8BE-NEXT:    mfvsrwz r4, v3
-; P8BE-NEXT:    mtfprwz f1, r4
 ; P8BE-NEXT:    mffprwz r3, f0
-; P8BE-NEXT:    xscvuxddp f1, f1
 ; P8BE-NEXT:    mtfprwz f0, r3
+; P8BE-NEXT:    mfvsrwz r3, v3
+; P8BE-NEXT:    mtfprwz f1, r3
 ; P8BE-NEXT:    xscvuxddp f0, f0
+; P8BE-NEXT:    xscvuxddp f1, f1
 ; P8BE-NEXT:    xxmrghd v2, vs0, vs1
 ; P8BE-NEXT:    blr
 ;
@@ -104,9 +104,9 @@ define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) {
 ; P8LE-NEXT:    xxswapd vs0, v2
 ; P8LE-NEXT:    xxsldwi vs1, v3, v3, 1
 ; P8LE-NEXT:    mffprwz r3, f0
-; P8LE-NEXT:    mffprwz r4, f1
 ; P8LE-NEXT:    mtfprwz f0, r3
-; P8LE-NEXT:    mtfprwz f1, r4
+; P8LE-NEXT:    mffprwz r3, f1
+; P8LE-NEXT:    mtfprwz f1, r3
 ; P8LE-NEXT:    xscvuxddp f0, f0
 ; P8LE-NEXT:    xscvuxddp f1, f1
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0

diff  --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
index 54e9221d46fc2fb..f708da86444b2ba 100644
--- a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
@@ -18,11 +18,11 @@ define i1 @test_urem_odd(i13 %X) nounwind {
 ; PPC64LE-LABEL: test_urem_odd:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    mulli 3, 3, 3277
-; PPC64LE-NEXT:    li 4, 0
+; PPC64LE-NEXT:    li 4, 1
 ; PPC64LE-NEXT:    clrlwi 3, 3, 19
 ; PPC64LE-NEXT:    cmplwi 3, 1639
-; PPC64LE-NEXT:    li 3, 1
-; PPC64LE-NEXT:    isellt 3, 3, 4
+; PPC64LE-NEXT:    li 3, 0
+; PPC64LE-NEXT:    isellt 3, 4, 3
 ; PPC64LE-NEXT:    blr
   %urem = urem i13 %X, 5
   %cmp = icmp eq i13 %urem, 0
@@ -53,13 +53,13 @@ define i1 @test_urem_even(i27 %X) nounwind {
 ; PPC64LE-NEXT:    lis 4, 1755
 ; PPC64LE-NEXT:    ori 4, 4, 28087
 ; PPC64LE-NEXT:    mullw 3, 3, 4
-; PPC64LE-NEXT:    lis 4, 146
-; PPC64LE-NEXT:    rlwinm 5, 3, 31, 6, 31
-; PPC64LE-NEXT:    rlwimi 5, 3, 26, 5, 5
-; PPC64LE-NEXT:    ori 3, 4, 18725
-; PPC64LE-NEXT:    li 4, 1
-; PPC64LE-NEXT:    cmplw 5, 3
+; PPC64LE-NEXT:    rlwinm 4, 3, 31, 6, 31
+; PPC64LE-NEXT:    rlwimi 4, 3, 26, 5, 5
+; PPC64LE-NEXT:    lis 3, 146
+; PPC64LE-NEXT:    ori 3, 3, 18725
+; PPC64LE-NEXT:    cmplw 4, 3
 ; PPC64LE-NEXT:    li 3, 0
+; PPC64LE-NEXT:    li 4, 1
 ; PPC64LE-NEXT:    isellt 3, 4, 3
 ; PPC64LE-NEXT:    blr
   %urem = urem i27 %X, 14
@@ -82,14 +82,14 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind {
 ;
 ; PPC64LE-LABEL: test_urem_odd_setne:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    slwi 5, 3, 1
-; PPC64LE-NEXT:    li 4, 0
-; PPC64LE-NEXT:    add 3, 3, 5
+; PPC64LE-NEXT:    slwi 4, 3, 1
+; PPC64LE-NEXT:    add 3, 3, 4
+; PPC64LE-NEXT:    li 4, 1
 ; PPC64LE-NEXT:    neg 3, 3
 ; PPC64LE-NEXT:    clrlwi 3, 3, 28
 ; PPC64LE-NEXT:    cmplwi 3, 3
-; PPC64LE-NEXT:    li 3, 1
-; PPC64LE-NEXT:    iselgt 3, 3, 4
+; PPC64LE-NEXT:    li 3, 0
+; PPC64LE-NEXT:    iselgt 3, 4, 3
 ; PPC64LE-NEXT:    blr
   %urem = urem i4 %X, 5
   %cmp = icmp ne i4 %urem, 0
@@ -112,11 +112,11 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind {
 ; PPC64LE-LABEL: test_urem_negative_odd:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    mulli 3, 3, 307
-; PPC64LE-NEXT:    li 4, 0
+; PPC64LE-NEXT:    li 4, 1
 ; PPC64LE-NEXT:    clrlwi 3, 3, 23
 ; PPC64LE-NEXT:    cmplwi 3, 1
-; PPC64LE-NEXT:    li 3, 1
-; PPC64LE-NEXT:    iselgt 3, 3, 4
+; PPC64LE-NEXT:    li 3, 0
+; PPC64LE-NEXT:    iselgt 3, 4, 3
 ; PPC64LE-NEXT:    blr
   %urem = urem i9 %X, -5
   %cmp = icmp ne i9 %urem, 0
@@ -161,48 +161,48 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 ;
 ; PPC64LE-LABEL: test_urem_vec:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    addis 6, 2, .LCPI4_0 at toc@ha
 ; PPC64LE-NEXT:    mtfprwz 0, 3
+; PPC64LE-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
+; PPC64LE-NEXT:    mtfprwz 1, 4
+; PPC64LE-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
+; PPC64LE-NEXT:    mtvsrwz 36, 5
+; PPC64LE-NEXT:    vspltisw 5, -11
+; PPC64LE-NEXT:    lxvd2x 2, 0, 3
 ; PPC64LE-NEXT:    addis 3, 2, .LCPI4_1 at toc@ha
-; PPC64LE-NEXT:    addi 6, 6, .LCPI4_0 at toc@l
-; PPC64LE-NEXT:    mtfprwz 2, 4
 ; PPC64LE-NEXT:    addi 3, 3, .LCPI4_1 at toc@l
-; PPC64LE-NEXT:    addis 4, 2, .LCPI4_2 at toc@ha
-; PPC64LE-NEXT:    lxvd2x 1, 0, 6
-; PPC64LE-NEXT:    mtvsrwz 36, 5
-; PPC64LE-NEXT:    xxmrghw 34, 2, 0
+; PPC64LE-NEXT:    xxmrghw 34, 1, 0
+; PPC64LE-NEXT:    lxvd2x 0, 0, 3
+; PPC64LE-NEXT:    addis 3, 2, .LCPI4_2 at toc@ha
+; PPC64LE-NEXT:    addi 3, 3, .LCPI4_2 at toc@l
+; PPC64LE-NEXT:    xxswapd 35, 2
+; PPC64LE-NEXT:    vperm 2, 4, 2, 3
+; PPC64LE-NEXT:    xxswapd 35, 0
 ; PPC64LE-NEXT:    lxvd2x 0, 0, 3
-; PPC64LE-NEXT:    addi 3, 4, .LCPI4_2 at toc@l
-; PPC64LE-NEXT:    addis 4, 2, .LCPI4_4 at toc@ha
-; PPC64LE-NEXT:    addi 4, 4, .LCPI4_4 at toc@l
-; PPC64LE-NEXT:    xxswapd 35, 1
-; PPC64LE-NEXT:    lxvd2x 1, 0, 3
 ; PPC64LE-NEXT:    addis 3, 2, .LCPI4_3 at toc@ha
 ; PPC64LE-NEXT:    addi 3, 3, .LCPI4_3 at toc@l
-; PPC64LE-NEXT:    vperm 2, 4, 2, 3
-; PPC64LE-NEXT:    vspltisw 3, -11
+; PPC64LE-NEXT:    vsubuwm 2, 2, 3
 ; PPC64LE-NEXT:    xxswapd 36, 0
-; PPC64LE-NEXT:    xxswapd 37, 1
 ; PPC64LE-NEXT:    lxvd2x 0, 0, 3
-; PPC64LE-NEXT:    lxvd2x 1, 0, 4
+; PPC64LE-NEXT:    addis 3, 2, .LCPI4_4 at toc@ha
+; PPC64LE-NEXT:    addi 3, 3, .LCPI4_4 at toc@l
+; PPC64LE-NEXT:    vmuluwm 2, 2, 4
+; PPC64LE-NEXT:    vsrw 4, 5, 5
+; PPC64LE-NEXT:    xxswapd 32, 0
+; PPC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PPC64LE-NEXT:    addis 3, 2, .LCPI4_5 at toc@ha
 ; PPC64LE-NEXT:    addi 3, 3, .LCPI4_5 at toc@l
-; PPC64LE-NEXT:    vsrw 3, 3, 3
-; PPC64LE-NEXT:    vsubuwm 2, 2, 4
-; PPC64LE-NEXT:    xxswapd 36, 0
+; PPC64LE-NEXT:    vslw 3, 2, 0
+; PPC64LE-NEXT:    xxland 34, 34, 36
+; PPC64LE-NEXT:    xxswapd 33, 0
 ; PPC64LE-NEXT:    lxvd2x 0, 0, 3
-; PPC64LE-NEXT:    vmuluwm 2, 2, 5
-; PPC64LE-NEXT:    xxswapd 37, 1
-; PPC64LE-NEXT:    xxland 32, 34, 35
-; PPC64LE-NEXT:    vslw 2, 2, 4
-; PPC64LE-NEXT:    vsrw 4, 0, 5
-; PPC64LE-NEXT:    xxlor 1, 36, 34
-; PPC64LE-NEXT:    xxswapd 34, 0
-; PPC64LE-NEXT:    xxland 35, 1, 35
-; PPC64LE-NEXT:    vcmpgtuw 2, 3, 2
+; PPC64LE-NEXT:    vsrw 2, 2, 1
+; PPC64LE-NEXT:    xxswapd 38, 0
+; PPC64LE-NEXT:    xxlor 0, 34, 35
+; PPC64LE-NEXT:    xxland 34, 0, 36
+; PPC64LE-NEXT:    vcmpgtuw 2, 2, 6
+; PPC64LE-NEXT:    mfvsrwz 5, 34
 ; PPC64LE-NEXT:    xxswapd 0, 34
 ; PPC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PPC64LE-NEXT:    mfvsrwz 5, 34
 ; PPC64LE-NEXT:    mffprwz 3, 0
 ; PPC64LE-NEXT:    mffprwz 4, 1
 ; PPC64LE-NEXT:    blr
@@ -259,25 +259,25 @@ define i1 @test_urem_oversized(i66 %X) nounwind {
 ; PPC64LE-LABEL: test_urem_oversized:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    lis 5, 6028
+; PPC64LE-NEXT:    sldi 7, 3, 1
 ; PPC64LE-NEXT:    ori 5, 5, 51361
 ; PPC64LE-NEXT:    rldic 5, 5, 33, 2
 ; PPC64LE-NEXT:    oris 5, 5, 52741
 ; PPC64LE-NEXT:    ori 5, 5, 40665
 ; PPC64LE-NEXT:    mulhdu 6, 3, 5
 ; PPC64LE-NEXT:    mulld 4, 4, 5
-; PPC64LE-NEXT:    mulld 5, 3, 5
-; PPC64LE-NEXT:    sldi 3, 3, 1
-; PPC64LE-NEXT:    add 3, 6, 3
-; PPC64LE-NEXT:    add 3, 3, 4
-; PPC64LE-NEXT:    lis 4, -8538
-; PPC64LE-NEXT:    rotldi 6, 5, 63
-; PPC64LE-NEXT:    ori 4, 4, 44780
-; PPC64LE-NEXT:    rldimi 6, 3, 63, 0
-; PPC64LE-NEXT:    rlwinm 3, 3, 31, 31, 31
-; PPC64LE-NEXT:    rldicl 4, 4, 4, 28
-; PPC64LE-NEXT:    rlwimi. 3, 5, 1, 30, 30
-; PPC64LE-NEXT:    cmpld 1, 6, 4
+; PPC64LE-NEXT:    mulld 3, 3, 5
+; PPC64LE-NEXT:    add 6, 6, 7
+; PPC64LE-NEXT:    rotldi 5, 3, 63
+; PPC64LE-NEXT:    add 4, 6, 4
+; PPC64LE-NEXT:    lis 6, -8538
+; PPC64LE-NEXT:    ori 6, 6, 44780
+; PPC64LE-NEXT:    rldimi 5, 4, 63, 0
+; PPC64LE-NEXT:    rlwinm 4, 4, 31, 31, 31
+; PPC64LE-NEXT:    rldicl 6, 6, 4, 28
+; PPC64LE-NEXT:    rlwimi. 4, 3, 1, 30, 30
 ; PPC64LE-NEXT:    li 3, 1
+; PPC64LE-NEXT:    cmpld 1, 5, 6
 ; PPC64LE-NEXT:    crnand 20, 2, 4
 ; PPC64LE-NEXT:    isel 3, 0, 3, 20
 ; PPC64LE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
index b4cdf2844a7310f..98314a02c23fe85 100644
--- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
@@ -101,85 +101,85 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P8LE-LABEL: fold_urem_vec_1:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r3, 689
-; P8LE-NEXT:    lis r8, 528
-; P8LE-NEXT:    lis r9, 668
-; P8LE-NEXT:    lis r10, 65
-; P8LE-NEXT:    ori r3, r3, 55879
-; P8LE-NEXT:    ori r8, r8, 33826
-; P8LE-NEXT:    ori r9, r9, 48149
-; P8LE-NEXT:    ori r10, r10, 22281
-; P8LE-NEXT:    mffprd r4, f0
-; P8LE-NEXT:    clrldi r5, r4, 48
-; P8LE-NEXT:    rldicl r6, r4, 48, 48
-; P8LE-NEXT:    rldicl r7, r4, 32, 48
-; P8LE-NEXT:    rldicl r4, r4, 16, 48
-; P8LE-NEXT:    clrlwi r5, r5, 16
-; P8LE-NEXT:    clrlwi r6, r6, 16
-; P8LE-NEXT:    mulhwu r3, r5, r3
+; P8LE-NEXT:    lis r4, 689
+; P8LE-NEXT:    lis r5, 528
+; P8LE-NEXT:    lis r6, 668
+; P8LE-NEXT:    mffprd r3, f0
+; P8LE-NEXT:    ori r4, r4, 55879
+; P8LE-NEXT:    ori r5, r5, 33826
+; P8LE-NEXT:    ori r6, r6, 48149
+; P8LE-NEXT:    clrldi r7, r3, 48
+; P8LE-NEXT:    clrlwi r7, r7, 16
+; P8LE-NEXT:    mulhwu r4, r7, r4
+; P8LE-NEXT:    mulli r4, r4, 95
+; P8LE-NEXT:    sub r4, r7, r4
+; P8LE-NEXT:    rldicl r7, r3, 48, 48
 ; P8LE-NEXT:    clrlwi r7, r7, 16
-; P8LE-NEXT:    clrlwi r4, r4, 16
-; P8LE-NEXT:    mulhwu r8, r6, r8
-; P8LE-NEXT:    mulhwu r9, r7, r9
-; P8LE-NEXT:    mulhwu r10, r4, r10
-; P8LE-NEXT:    mulli r3, r3, 95
-; P8LE-NEXT:    mulli r8, r8, 124
-; P8LE-NEXT:    mulli r9, r9, 98
-; P8LE-NEXT:    mulli r10, r10, 1003
-; P8LE-NEXT:    sub r3, r5, r3
-; P8LE-NEXT:    sub r5, r6, r8
-; P8LE-NEXT:    mtvsrd v2, r3
-; P8LE-NEXT:    sub r3, r7, r9
-; P8LE-NEXT:    sub r4, r4, r10
+; P8LE-NEXT:    mtvsrd v2, r4
+; P8LE-NEXT:    lis r4, 65
+; P8LE-NEXT:    mulhwu r5, r7, r5
+; P8LE-NEXT:    ori r4, r4, 22281
+; P8LE-NEXT:    mulli r5, r5, 124
+; P8LE-NEXT:    sub r5, r7, r5
+; P8LE-NEXT:    rldicl r7, r3, 32, 48
+; P8LE-NEXT:    rldicl r3, r3, 16, 48
+; P8LE-NEXT:    clrlwi r7, r7, 16
+; P8LE-NEXT:    clrlwi r3, r3, 16
 ; P8LE-NEXT:    mtvsrd v3, r5
+; P8LE-NEXT:    mulhwu r6, r7, r6
+; P8LE-NEXT:    mulhwu r4, r3, r4
+; P8LE-NEXT:    mulli r6, r6, 98
+; P8LE-NEXT:    mulli r4, r4, 1003
+; P8LE-NEXT:    sub r6, r7, r6
+; P8LE-NEXT:    sub r3, r3, r4
 ; P8LE-NEXT:    mtvsrd v4, r3
-; P8LE-NEXT:    mtvsrd v5, r4
 ; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    vmrghh v3, v5, v4
+; P8LE-NEXT:    mtvsrd v3, r6
+; P8LE-NEXT:    vmrghh v3, v4, v3
 ; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_urem_vec_1:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r4, v2
-; P8BE-NEXT:    lis r3, 65
-; P8BE-NEXT:    lis r8, 668
-; P8BE-NEXT:    lis r9, 528
-; P8BE-NEXT:    lis r10, 689
-; P8BE-NEXT:    ori r3, r3, 22281
-; P8BE-NEXT:    ori r8, r8, 48149
-; P8BE-NEXT:    ori r9, r9, 33826
-; P8BE-NEXT:    ori r10, r10, 55879
-; P8BE-NEXT:    clrldi r5, r4, 48
-; P8BE-NEXT:    rldicl r6, r4, 48, 48
-; P8BE-NEXT:    clrlwi r5, r5, 16
-; P8BE-NEXT:    rldicl r7, r4, 32, 48
+; P8BE-NEXT:    mfvsrd r3, v2
+; P8BE-NEXT:    addis r6, r2, .LCPI0_0 at toc@ha
+; P8BE-NEXT:    lis r4, 65
+; P8BE-NEXT:    lis r5, 668
+; P8BE-NEXT:    lis r7, 528
+; P8BE-NEXT:    addi r6, r6, .LCPI0_0 at toc@l
+; P8BE-NEXT:    ori r4, r4, 22281
+; P8BE-NEXT:    ori r5, r5, 48149
+; P8BE-NEXT:    ori r7, r7, 33826
+; P8BE-NEXT:    lxvw4x v2, 0, r6
+; P8BE-NEXT:    clrldi r6, r3, 48
 ; P8BE-NEXT:    clrlwi r6, r6, 16
-; P8BE-NEXT:    rldicl r4, r4, 16, 48
-; P8BE-NEXT:    mulhwu r3, r5, r3
-; P8BE-NEXT:    clrlwi r7, r7, 16
-; P8BE-NEXT:    clrlwi r4, r4, 16
-; P8BE-NEXT:    mulhwu r8, r6, r8
-; P8BE-NEXT:    mulhwu r9, r7, r9
-; P8BE-NEXT:    mulhwu r10, r4, r10
-; P8BE-NEXT:    mulli r3, r3, 1003
-; P8BE-NEXT:    mulli r8, r8, 98
-; P8BE-NEXT:    mulli r9, r9, 124
-; P8BE-NEXT:    mulli r10, r10, 95
-; P8BE-NEXT:    sub r3, r5, r3
-; P8BE-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
-; P8BE-NEXT:    mtvsrwz v2, r3
-; P8BE-NEXT:    addi r3, r5, .LCPI0_0 at toc@l
-; P8BE-NEXT:    sub r6, r6, r8
-; P8BE-NEXT:    lxvw4x v3, 0, r3
-; P8BE-NEXT:    sub r3, r7, r9
-; P8BE-NEXT:    sub r4, r4, r10
-; P8BE-NEXT:    mtvsrwz v4, r6
+; P8BE-NEXT:    mulhwu r4, r6, r4
+; P8BE-NEXT:    mulli r4, r4, 1003
+; P8BE-NEXT:    sub r4, r6, r4
+; P8BE-NEXT:    rldicl r6, r3, 48, 48
+; P8BE-NEXT:    clrlwi r6, r6, 16
+; P8BE-NEXT:    mtvsrwz v3, r4
+; P8BE-NEXT:    lis r4, 689
+; P8BE-NEXT:    mulhwu r5, r6, r5
+; P8BE-NEXT:    ori r4, r4, 55879
+; P8BE-NEXT:    mulli r5, r5, 98
+; P8BE-NEXT:    sub r5, r6, r5
+; P8BE-NEXT:    rldicl r6, r3, 32, 48
+; P8BE-NEXT:    rldicl r3, r3, 16, 48
+; P8BE-NEXT:    clrlwi r6, r6, 16
+; P8BE-NEXT:    clrlwi r3, r3, 16
+; P8BE-NEXT:    mtvsrwz v4, r5
+; P8BE-NEXT:    mulhwu r7, r6, r7
+; P8BE-NEXT:    mulhwu r4, r3, r4
+; P8BE-NEXT:    mulli r7, r7, 124
+; P8BE-NEXT:    mulli r4, r4, 95
+; P8BE-NEXT:    sub r6, r6, r7
+; P8BE-NEXT:    sub r3, r3, r4
 ; P8BE-NEXT:    mtvsrwz v5, r3
-; P8BE-NEXT:    mtvsrwz v0, r4
-; P8BE-NEXT:    vperm v2, v4, v2, v3
-; P8BE-NEXT:    vperm v3, v0, v5, v3
-; P8BE-NEXT:    xxmrghw v2, v3, v2
+; P8BE-NEXT:    vperm v3, v4, v3, v2
+; P8BE-NEXT:    mtvsrwz v4, r6
+; P8BE-NEXT:    vperm v2, v5, v4, v2
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
   ret <4 x i16> %1
@@ -266,73 +266,73 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P8LE-LABEL: fold_urem_vec_2:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r3, 689
-; P8LE-NEXT:    ori r3, r3, 55879
-; P8LE-NEXT:    mffprd r4, f0
-; P8LE-NEXT:    clrldi r5, r4, 48
-; P8LE-NEXT:    rldicl r6, r4, 48, 48
-; P8LE-NEXT:    rldicl r7, r4, 32, 48
-; P8LE-NEXT:    rldicl r4, r4, 16, 48
+; P8LE-NEXT:    lis r4, 689
+; P8LE-NEXT:    mffprd r3, f0
+; P8LE-NEXT:    ori r4, r4, 55879
+; P8LE-NEXT:    clrldi r5, r3, 48
+; P8LE-NEXT:    rldicl r6, r3, 48, 48
+; P8LE-NEXT:    rldicl r7, r3, 32, 48
+; P8LE-NEXT:    rldicl r3, r3, 16, 48
 ; P8LE-NEXT:    clrlwi r5, r5, 16
 ; P8LE-NEXT:    clrlwi r6, r6, 16
-; P8LE-NEXT:    mulhwu r8, r5, r3
 ; P8LE-NEXT:    clrlwi r7, r7, 16
-; P8LE-NEXT:    clrlwi r4, r4, 16
-; P8LE-NEXT:    mulhwu r9, r6, r3
-; P8LE-NEXT:    mulhwu r10, r7, r3
-; P8LE-NEXT:    mulhwu r3, r4, r3
+; P8LE-NEXT:    clrlwi r3, r3, 16
+; P8LE-NEXT:    mulhwu r8, r5, r4
 ; P8LE-NEXT:    mulli r8, r8, 95
-; P8LE-NEXT:    mulli r9, r9, 95
-; P8LE-NEXT:    mulli r10, r10, 95
-; P8LE-NEXT:    mulli r3, r3, 95
 ; P8LE-NEXT:    sub r5, r5, r8
-; P8LE-NEXT:    sub r6, r6, r9
+; P8LE-NEXT:    mulhwu r8, r6, r4
 ; P8LE-NEXT:    mtvsrd v2, r5
-; P8LE-NEXT:    sub r5, r7, r10
-; P8LE-NEXT:    sub r3, r4, r3
+; P8LE-NEXT:    mulli r8, r8, 95
+; P8LE-NEXT:    sub r6, r6, r8
+; P8LE-NEXT:    mulhwu r8, r7, r4
+; P8LE-NEXT:    mulhwu r4, r3, r4
 ; P8LE-NEXT:    mtvsrd v3, r6
-; P8LE-NEXT:    mtvsrd v4, r5
-; P8LE-NEXT:    mtvsrd v5, r3
+; P8LE-NEXT:    mulli r8, r8, 95
+; P8LE-NEXT:    mulli r4, r4, 95
+; P8LE-NEXT:    sub r7, r7, r8
+; P8LE-NEXT:    sub r3, r3, r4
+; P8LE-NEXT:    mtvsrd v4, r7
 ; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    vmrghh v3, v5, v4
+; P8LE-NEXT:    mtvsrd v3, r3
+; P8LE-NEXT:    vmrghh v3, v3, v4
 ; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fold_urem_vec_2:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r4, v2
-; P8BE-NEXT:    lis r3, 689
-; P8BE-NEXT:    ori r3, r3, 55879
-; P8BE-NEXT:    clrldi r5, r4, 48
-; P8BE-NEXT:    rldicl r6, r4, 48, 48
+; P8BE-NEXT:    mfvsrd r3, v2
+; P8BE-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
+; P8BE-NEXT:    lis r4, 689
+; P8BE-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
+; P8BE-NEXT:    ori r4, r4, 55879
+; P8BE-NEXT:    lxvw4x v2, 0, r5
+; P8BE-NEXT:    clrldi r5, r3, 48
+; P8BE-NEXT:    rldicl r6, r3, 48, 48
+; P8BE-NEXT:    rldicl r7, r3, 32, 48
+; P8BE-NEXT:    rldicl r3, r3, 16, 48
 ; P8BE-NEXT:    clrlwi r5, r5, 16
-; P8BE-NEXT:    rldicl r7, r4, 32, 48
 ; P8BE-NEXT:    clrlwi r6, r6, 16
-; P8BE-NEXT:    rldicl r4, r4, 16, 48
-; P8BE-NEXT:    mulhwu r8, r5, r3
 ; P8BE-NEXT:    clrlwi r7, r7, 16
-; P8BE-NEXT:    clrlwi r4, r4, 16
-; P8BE-NEXT:    mulhwu r9, r6, r3
-; P8BE-NEXT:    mulhwu r10, r7, r3
-; P8BE-NEXT:    mulhwu r3, r4, r3
+; P8BE-NEXT:    clrlwi r3, r3, 16
+; P8BE-NEXT:    mulhwu r8, r5, r4
 ; P8BE-NEXT:    mulli r8, r8, 95
-; P8BE-NEXT:    mulli r9, r9, 95
-; P8BE-NEXT:    mulli r10, r10, 95
-; P8BE-NEXT:    mulli r3, r3, 95
 ; P8BE-NEXT:    sub r5, r5, r8
-; P8BE-NEXT:    addis r8, r2, .LCPI1_0 at toc@ha
-; P8BE-NEXT:    mtvsrwz v2, r5
-; P8BE-NEXT:    addi r5, r8, .LCPI1_0 at toc@l
-; P8BE-NEXT:    sub r6, r6, r9
-; P8BE-NEXT:    lxvw4x v3, 0, r5
-; P8BE-NEXT:    sub r5, r7, r10
-; P8BE-NEXT:    sub r3, r4, r3
+; P8BE-NEXT:    mulhwu r8, r6, r4
+; P8BE-NEXT:    mtvsrwz v3, r5
+; P8BE-NEXT:    mulli r8, r8, 95
+; P8BE-NEXT:    sub r6, r6, r8
+; P8BE-NEXT:    mulhwu r8, r7, r4
+; P8BE-NEXT:    mulhwu r4, r3, r4
 ; P8BE-NEXT:    mtvsrwz v4, r6
-; P8BE-NEXT:    mtvsrwz v5, r5
-; P8BE-NEXT:    mtvsrwz v0, r3
-; P8BE-NEXT:    vperm v2, v4, v2, v3
-; P8BE-NEXT:    vperm v3, v0, v5, v3
-; P8BE-NEXT:    xxmrghw v2, v3, v2
+; P8BE-NEXT:    mulli r8, r8, 95
+; P8BE-NEXT:    mulli r4, r4, 95
+; P8BE-NEXT:    sub r7, r7, r8
+; P8BE-NEXT:    sub r3, r3, r4
+; P8BE-NEXT:    mtvsrwz v5, r7
+; P8BE-NEXT:    vperm v3, v4, v3, v2
+; P8BE-NEXT:    mtvsrwz v4, r3
+; P8BE-NEXT:    vperm v2, v4, v5, v2
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   ret <4 x i16> %1
@@ -437,87 +437,87 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P8LE-LABEL: combine_urem_udiv:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r3, 689
-; P8LE-NEXT:    ori r3, r3, 55879
-; P8LE-NEXT:    mffprd r4, f0
-; P8LE-NEXT:    clrldi r5, r4, 48
-; P8LE-NEXT:    rldicl r6, r4, 48, 48
+; P8LE-NEXT:    lis r4, 689
+; P8LE-NEXT:    mffprd r3, f0
+; P8LE-NEXT:    ori r4, r4, 55879
+; P8LE-NEXT:    clrldi r5, r3, 48
+; P8LE-NEXT:    rldicl r6, r3, 48, 48
+; P8LE-NEXT:    rldicl r7, r3, 32, 48
+; P8LE-NEXT:    rldicl r3, r3, 16, 48
 ; P8LE-NEXT:    clrlwi r5, r5, 16
 ; P8LE-NEXT:    clrlwi r8, r6, 16
-; P8LE-NEXT:    rldicl r7, r4, 32, 48
-; P8LE-NEXT:    rldicl r4, r4, 16, 48
-; P8LE-NEXT:    mulhwu r9, r5, r3
-; P8LE-NEXT:    mulhwu r8, r8, r3
-; P8LE-NEXT:    clrlwi r10, r7, 16
-; P8LE-NEXT:    clrlwi r11, r4, 16
-; P8LE-NEXT:    mulhwu r10, r10, r3
-; P8LE-NEXT:    mulhwu r3, r11, r3
-; P8LE-NEXT:    mulli r11, r9, 95
-; P8LE-NEXT:    mtvsrd v2, r9
-; P8LE-NEXT:    mulli r9, r8, 95
+; P8LE-NEXT:    clrlwi r9, r7, 16
+; P8LE-NEXT:    clrlwi r10, r3, 16
+; P8LE-NEXT:    mulhwu r11, r5, r4
+; P8LE-NEXT:    mulhwu r8, r8, r4
+; P8LE-NEXT:    mulhwu r9, r9, r4
+; P8LE-NEXT:    mulhwu r4, r10, r4
+; P8LE-NEXT:    mulli r10, r11, 95
+; P8LE-NEXT:    mtvsrd v2, r11
 ; P8LE-NEXT:    mtvsrd v3, r8
-; P8LE-NEXT:    mulli r8, r10, 95
-; P8LE-NEXT:    mtvsrd v4, r10
-; P8LE-NEXT:    mulli r10, r3, 95
-; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    sub r5, r5, r11
-; P8LE-NEXT:    sub r6, r6, r9
-; P8LE-NEXT:    mtvsrd v3, r5
-; P8LE-NEXT:    sub r5, r7, r8
+; P8LE-NEXT:    sub r5, r5, r10
+; P8LE-NEXT:    mulli r10, r8, 95
+; P8LE-NEXT:    mtvsrd v4, r5
+; P8LE-NEXT:    sub r6, r6, r10
+; P8LE-NEXT:    mulli r10, r9, 95
 ; P8LE-NEXT:    mtvsrd v5, r6
-; P8LE-NEXT:    sub r4, r4, r10
-; P8LE-NEXT:    mtvsrd v0, r5
-; P8LE-NEXT:    mtvsrd v1, r4
-; P8LE-NEXT:    vmrghh v3, v5, v3
-; P8LE-NEXT:    mtvsrd v5, r3
-; P8LE-NEXT:    vmrghh v0, v1, v0
+; P8LE-NEXT:    sub r7, r7, r10
+; P8LE-NEXT:    mulli r10, r4, 95
+; P8LE-NEXT:    mtvsrd v0, r7
+; P8LE-NEXT:    sub r3, r3, r10
+; P8LE-NEXT:    vmrghh v2, v3, v2
+; P8LE-NEXT:    mtvsrd v3, r9
 ; P8LE-NEXT:    vmrghh v4, v5, v4
-; P8LE-NEXT:    xxmrglw v3, v0, v3
-; P8LE-NEXT:    xxmrglw v2, v4, v2
-; P8LE-NEXT:    vadduhm v2, v3, v2
+; P8LE-NEXT:    mtvsrd v5, r3
+; P8LE-NEXT:    vmrghh v5, v5, v0
+; P8LE-NEXT:    mtvsrd v0, r4
+; P8LE-NEXT:    xxmrglw v4, v5, v4
+; P8LE-NEXT:    vmrghh v3, v0, v3
+; P8LE-NEXT:    xxmrglw v2, v3, v2
+; P8LE-NEXT:    vadduhm v2, v4, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: combine_urem_udiv:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r4, v2
-; P8BE-NEXT:    lis r3, 689
-; P8BE-NEXT:    addis r11, r2, .LCPI2_0 at toc@ha
-; P8BE-NEXT:    ori r3, r3, 55879
-; P8BE-NEXT:    addi r11, r11, .LCPI2_0 at toc@l
-; P8BE-NEXT:    clrldi r5, r4, 48
-; P8BE-NEXT:    rldicl r6, r4, 48, 48
-; P8BE-NEXT:    lxvw4x v2, 0, r11
+; P8BE-NEXT:    mfvsrd r3, v2
+; P8BE-NEXT:    lis r4, 689
+; P8BE-NEXT:    ori r4, r4, 55879
+; P8BE-NEXT:    clrldi r5, r3, 48
+; P8BE-NEXT:    rldicl r6, r3, 48, 48
+; P8BE-NEXT:    rldicl r7, r3, 32, 48
+; P8BE-NEXT:    rldicl r3, r3, 16, 48
 ; P8BE-NEXT:    clrlwi r8, r5, 16
 ; P8BE-NEXT:    clrlwi r9, r6, 16
-; P8BE-NEXT:    rldicl r7, r4, 32, 48
-; P8BE-NEXT:    rldicl r4, r4, 16, 48
-; P8BE-NEXT:    mulhwu r8, r8, r3
-; P8BE-NEXT:    mulhwu r9, r9, r3
 ; P8BE-NEXT:    clrlwi r10, r7, 16
-; P8BE-NEXT:    clrlwi r4, r4, 16
-; P8BE-NEXT:    mulhwu r10, r10, r3
-; P8BE-NEXT:    mulhwu r3, r4, r3
-; P8BE-NEXT:    mulli r12, r8, 95
+; P8BE-NEXT:    clrlwi r3, r3, 16
+; P8BE-NEXT:    mulhwu r8, r8, r4
+; P8BE-NEXT:    mulhwu r9, r9, r4
+; P8BE-NEXT:    mulhwu r10, r10, r4
+; P8BE-NEXT:    mulhwu r4, r3, r4
+; P8BE-NEXT:    mulli r11, r8, 95
 ; P8BE-NEXT:    mtvsrwz v3, r8
-; P8BE-NEXT:    mulli r8, r9, 95
 ; P8BE-NEXT:    mtvsrwz v4, r9
-; P8BE-NEXT:    mulli r9, r10, 95
-; P8BE-NEXT:    mtvsrwz v5, r10
-; P8BE-NEXT:    mulli r10, r3, 95
-; P8BE-NEXT:    vperm v3, v4, v3, v2
-; P8BE-NEXT:    sub r5, r5, r12
-; P8BE-NEXT:    sub r6, r6, r8
-; P8BE-NEXT:    mtvsrwz v4, r5
-; P8BE-NEXT:    sub r5, r7, r9
+; P8BE-NEXT:    sub r5, r5, r11
+; P8BE-NEXT:    mulli r11, r9, 95
+; P8BE-NEXT:    mtvsrwz v5, r5
+; P8BE-NEXT:    sub r6, r6, r11
+; P8BE-NEXT:    mulli r11, r10, 95
 ; P8BE-NEXT:    mtvsrwz v0, r6
-; P8BE-NEXT:    sub r4, r4, r10
-; P8BE-NEXT:    mtvsrwz v1, r5
-; P8BE-NEXT:    mtvsrwz v6, r4
-; P8BE-NEXT:    vperm v4, v0, v4, v2
+; P8BE-NEXT:    sub r7, r7, r11
+; P8BE-NEXT:    mulli r11, r4, 95
+; P8BE-NEXT:    mtvsrwz v1, r7
+; P8BE-NEXT:    sub r3, r3, r11
+; P8BE-NEXT:    addis r11, r2, .LCPI2_0 at toc@ha
+; P8BE-NEXT:    addi r11, r11, .LCPI2_0 at toc@l
+; P8BE-NEXT:    lxvw4x v2, 0, r11
+; P8BE-NEXT:    vperm v5, v0, v5, v2
 ; P8BE-NEXT:    mtvsrwz v0, r3
-; P8BE-NEXT:    vperm v1, v6, v1, v2
-; P8BE-NEXT:    vperm v2, v0, v5, v2
-; P8BE-NEXT:    xxmrghw v4, v1, v4
+; P8BE-NEXT:    vperm v3, v4, v3, v2
+; P8BE-NEXT:    mtvsrwz v4, r10
+; P8BE-NEXT:    vperm v0, v0, v1, v2
+; P8BE-NEXT:    mtvsrwz v1, r4
+; P8BE-NEXT:    vperm v2, v1, v4, v2
+; P8BE-NEXT:    xxmrghw v4, v0, v5
 ; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    vadduhm v2, v4, v2
 ; P8BE-NEXT:    blr
@@ -591,54 +591,54 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P8LE-LABEL: dont_fold_urem_power_of_two:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r3, 689
-; P8LE-NEXT:    ori r3, r3, 55879
-; P8LE-NEXT:    mffprd r4, f0
-; P8LE-NEXT:    rldicl r5, r4, 16, 48
-; P8LE-NEXT:    clrldi r6, r4, 48
-; P8LE-NEXT:    clrlwi r5, r5, 16
-; P8LE-NEXT:    clrlwi r6, r6, 26
-; P8LE-NEXT:    mulhwu r3, r5, r3
-; P8LE-NEXT:    rldicl r7, r4, 48, 48
-; P8LE-NEXT:    mtvsrd v2, r6
-; P8LE-NEXT:    rldicl r4, r4, 32, 48
-; P8LE-NEXT:    clrlwi r6, r7, 27
+; P8LE-NEXT:    mffprd r3, f0
+; P8LE-NEXT:    clrldi r4, r3, 48
+; P8LE-NEXT:    clrlwi r4, r4, 26
+; P8LE-NEXT:    mtvsrd v2, r4
+; P8LE-NEXT:    rldicl r4, r3, 48, 48
+; P8LE-NEXT:    clrlwi r4, r4, 27
+; P8LE-NEXT:    mtvsrd v3, r4
+; P8LE-NEXT:    rldicl r4, r3, 32, 48
+; P8LE-NEXT:    rldicl r3, r3, 16, 48
 ; P8LE-NEXT:    clrlwi r4, r4, 29
-; P8LE-NEXT:    mtvsrd v3, r6
-; P8LE-NEXT:    mtvsrd v4, r4
-; P8LE-NEXT:    mulli r3, r3, 95
+; P8LE-NEXT:    clrlwi r3, r3, 16
 ; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    sub r3, r5, r3
-; P8LE-NEXT:    mtvsrd v5, r3
-; P8LE-NEXT:    vmrghh v3, v5, v4
+; P8LE-NEXT:    mtvsrd v3, r4
+; P8LE-NEXT:    lis r4, 689
+; P8LE-NEXT:    ori r4, r4, 55879
+; P8LE-NEXT:    mulhwu r4, r3, r4
+; P8LE-NEXT:    mulli r4, r4, 95
+; P8LE-NEXT:    sub r3, r3, r4
+; P8LE-NEXT:    mtvsrd v4, r3
+; P8LE-NEXT:    vmrghh v3, v4, v3
 ; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_urem_power_of_two:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r4, v2
-; P8BE-NEXT:    lis r3, 689
-; P8BE-NEXT:    addis r7, r2, .LCPI3_0 at toc@ha
-; P8BE-NEXT:    ori r3, r3, 55879
-; P8BE-NEXT:    clrldi r5, r4, 48
-; P8BE-NEXT:    rldicl r6, r4, 32, 48
-; P8BE-NEXT:    clrlwi r5, r5, 16
-; P8BE-NEXT:    clrlwi r6, r6, 27
-; P8BE-NEXT:    mulhwu r3, r5, r3
-; P8BE-NEXT:    rldicl r8, r4, 16, 48
-; P8BE-NEXT:    mtvsrwz v2, r6
-; P8BE-NEXT:    addi r6, r7, .LCPI3_0 at toc@l
-; P8BE-NEXT:    rldicl r4, r4, 48, 48
-; P8BE-NEXT:    clrlwi r7, r8, 26
-; P8BE-NEXT:    lxvw4x v3, 0, r6
-; P8BE-NEXT:    clrlwi r4, r4, 29
-; P8BE-NEXT:    mtvsrwz v4, r7
-; P8BE-NEXT:    mtvsrwz v0, r4
-; P8BE-NEXT:    mulli r3, r3, 95
-; P8BE-NEXT:    vperm v2, v4, v2, v3
-; P8BE-NEXT:    sub r3, r5, r3
+; P8BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; P8BE-NEXT:    lis r5, 689
+; P8BE-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
+; P8BE-NEXT:    ori r5, r5, 55879
+; P8BE-NEXT:    lxvw4x v3, 0, r3
+; P8BE-NEXT:    mfvsrd r3, v2
+; P8BE-NEXT:    rldicl r4, r3, 32, 48
+; P8BE-NEXT:    clrlwi r4, r4, 27
+; P8BE-NEXT:    mtvsrwz v2, r4
+; P8BE-NEXT:    rldicl r4, r3, 16, 48
+; P8BE-NEXT:    clrlwi r4, r4, 26
+; P8BE-NEXT:    mtvsrwz v4, r4
+; P8BE-NEXT:    clrldi r4, r3, 48
+; P8BE-NEXT:    rldicl r3, r3, 48, 48
+; P8BE-NEXT:    clrlwi r4, r4, 16
+; P8BE-NEXT:    clrlwi r3, r3, 29
+; P8BE-NEXT:    mulhwu r5, r4, r5
 ; P8BE-NEXT:    mtvsrwz v5, r3
-; P8BE-NEXT:    vperm v3, v0, v5, v3
+; P8BE-NEXT:    mulli r5, r5, 95
+; P8BE-NEXT:    sub r4, r4, r5
+; P8BE-NEXT:    vperm v2, v4, v2, v3
+; P8BE-NEXT:    mtvsrwz v4, r4
+; P8BE-NEXT:    vperm v3, v5, v4, v3
 ; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
@@ -725,73 +725,73 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P8LE-LABEL: dont_fold_urem_one:
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    lis r3, 100
-; P8LE-NEXT:    lis r7, 2849
-; P8LE-NEXT:    lis r8, 12
-; P8LE-NEXT:    li r9, 0
-; P8LE-NEXT:    ori r3, r3, 13629
-; P8LE-NEXT:    ori r7, r7, 25645
-; P8LE-NEXT:    ori r8, r8, 5560
-; P8LE-NEXT:    mtvsrd v2, r9
-; P8LE-NEXT:    mffprd r4, f0
-; P8LE-NEXT:    rldicl r5, r4, 48, 48
-; P8LE-NEXT:    rldicl r6, r4, 32, 48
-; P8LE-NEXT:    rldicl r4, r4, 16, 48
-; P8LE-NEXT:    clrlwi r5, r5, 16
+; P8LE-NEXT:    li r4, 0
+; P8LE-NEXT:    lis r5, 100
+; P8LE-NEXT:    lis r6, 2849
+; P8LE-NEXT:    mffprd r3, f0
+; P8LE-NEXT:    mtvsrd v2, r4
+; P8LE-NEXT:    ori r4, r5, 13629
+; P8LE-NEXT:    ori r5, r6, 25645
+; P8LE-NEXT:    rldicl r6, r3, 48, 48
 ; P8LE-NEXT:    clrlwi r6, r6, 16
-; P8LE-NEXT:    mulhwu r3, r5, r3
-; P8LE-NEXT:    clrlwi r4, r4, 16
-; P8LE-NEXT:    mulhwu r7, r6, r7
-; P8LE-NEXT:    mulhwu r8, r4, r8
-; P8LE-NEXT:    mulli r3, r3, 654
-; P8LE-NEXT:    mulli r7, r7, 23
-; P8LE-NEXT:    mulli r8, r8, 5423
-; P8LE-NEXT:    sub r3, r5, r3
-; P8LE-NEXT:    sub r5, r6, r7
-; P8LE-NEXT:    mtvsrd v3, r3
-; P8LE-NEXT:    sub r3, r4, r8
-; P8LE-NEXT:    mtvsrd v4, r5
-; P8LE-NEXT:    mtvsrd v5, r3
+; P8LE-NEXT:    mulhwu r4, r6, r4
+; P8LE-NEXT:    mulli r4, r4, 654
+; P8LE-NEXT:    sub r4, r6, r4
+; P8LE-NEXT:    rldicl r6, r3, 32, 48
+; P8LE-NEXT:    rldicl r3, r3, 16, 48
+; P8LE-NEXT:    mtvsrd v3, r4
+; P8LE-NEXT:    lis r4, 12
+; P8LE-NEXT:    clrlwi r6, r6, 16
+; P8LE-NEXT:    clrlwi r3, r3, 16
+; P8LE-NEXT:    ori r4, r4, 5560
+; P8LE-NEXT:    mulhwu r5, r6, r5
+; P8LE-NEXT:    mulhwu r4, r3, r4
+; P8LE-NEXT:    mulli r5, r5, 23
+; P8LE-NEXT:    mulli r4, r4, 5423
+; P8LE-NEXT:    sub r5, r6, r5
+; P8LE-NEXT:    sub r3, r3, r4
+; P8LE-NEXT:    mtvsrd v4, r3
 ; P8LE-NEXT:    vmrghh v2, v3, v2
-; P8LE-NEXT:    vmrghh v3, v5, v4
+; P8LE-NEXT:    mtvsrd v3, r5
+; P8LE-NEXT:    vmrghh v3, v4, v3
 ; P8LE-NEXT:    xxmrglw v2, v3, v2
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_urem_one:
 ; P8BE:       # %bb.0:
-; P8BE-NEXT:    mfvsrd r4, v2
-; P8BE-NEXT:    lis r3, 12
-; P8BE-NEXT:    lis r7, 2849
-; P8BE-NEXT:    lis r8, 100
-; P8BE-NEXT:    addis r9, r2, .LCPI4_0 at toc@ha
-; P8BE-NEXT:    li r10, 0
-; P8BE-NEXT:    ori r3, r3, 5560
-; P8BE-NEXT:    ori r7, r7, 25645
-; P8BE-NEXT:    ori r8, r8, 13629
-; P8BE-NEXT:    mtvsrwz v2, r10
-; P8BE-NEXT:    clrldi r5, r4, 48
-; P8BE-NEXT:    rldicl r6, r4, 48, 48
-; P8BE-NEXT:    rldicl r4, r4, 32, 48
-; P8BE-NEXT:    clrlwi r5, r5, 16
+; P8BE-NEXT:    mfvsrd r3, v2
+; P8BE-NEXT:    addis r6, r2, .LCPI4_0 at toc@ha
+; P8BE-NEXT:    lis r4, 12
+; P8BE-NEXT:    lis r5, 2849
+; P8BE-NEXT:    addi r6, r6, .LCPI4_0 at toc@l
+; P8BE-NEXT:    ori r4, r4, 5560
+; P8BE-NEXT:    ori r5, r5, 25645
+; P8BE-NEXT:    lxvw4x v2, 0, r6
+; P8BE-NEXT:    clrldi r6, r3, 48
 ; P8BE-NEXT:    clrlwi r6, r6, 16
-; P8BE-NEXT:    mulhwu r3, r5, r3
-; P8BE-NEXT:    clrlwi r4, r4, 16
-; P8BE-NEXT:    mulhwu r7, r6, r7
-; P8BE-NEXT:    mulhwu r8, r4, r8
-; P8BE-NEXT:    mulli r3, r3, 5423
-; P8BE-NEXT:    mulli r7, r7, 23
-; P8BE-NEXT:    mulli r8, r8, 654
-; P8BE-NEXT:    sub r3, r5, r3
-; P8BE-NEXT:    addi r5, r9, .LCPI4_0 at toc@l
-; P8BE-NEXT:    lxvw4x v3, 0, r5
-; P8BE-NEXT:    sub r5, r6, r7
+; P8BE-NEXT:    mulhwu r4, r6, r4
+; P8BE-NEXT:    mulli r4, r4, 5423
+; P8BE-NEXT:    sub r4, r6, r4
+; P8BE-NEXT:    rldicl r6, r3, 48, 48
+; P8BE-NEXT:    rldicl r3, r3, 32, 48
+; P8BE-NEXT:    clrlwi r6, r6, 16
+; P8BE-NEXT:    clrlwi r3, r3, 16
+; P8BE-NEXT:    mtvsrwz v3, r4
+; P8BE-NEXT:    mulhwu r5, r6, r5
+; P8BE-NEXT:    mulli r5, r5, 23
+; P8BE-NEXT:    sub r5, r6, r5
+; P8BE-NEXT:    lis r6, 100
+; P8BE-NEXT:    ori r6, r6, 13629
+; P8BE-NEXT:    mtvsrwz v4, r5
+; P8BE-NEXT:    mulhwu r6, r3, r6
+; P8BE-NEXT:    mulli r6, r6, 654
+; P8BE-NEXT:    sub r3, r3, r6
+; P8BE-NEXT:    vperm v3, v4, v3, v2
 ; P8BE-NEXT:    mtvsrwz v4, r3
-; P8BE-NEXT:    sub r3, r4, r8
-; P8BE-NEXT:    mtvsrwz v5, r5
-; P8BE-NEXT:    mtvsrwz v0, r3
-; P8BE-NEXT:    vperm v4, v5, v4, v3
-; P8BE-NEXT:    vperm v2, v2, v0, v3
-; P8BE-NEXT:    xxmrghw v2, v2, v4
+; P8BE-NEXT:    li r3, 0
+; P8BE-NEXT:    mtvsrwz v5, r3
+; P8BE-NEXT:    vperm v2, v5, v4, v2
+; P8BE-NEXT:    xxmrghw v2, v2, v3
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
   ret <4 x i16> %1
@@ -893,93 +893,93 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
 ; P8LE:       # %bb.0:
 ; P8LE-NEXT:    lis r3, 1602
 ; P8LE-NEXT:    xxswapd vs0, v3
-; P8LE-NEXT:    lis r4, -16037
 ; P8LE-NEXT:    lis r5, 3206
 ; P8LE-NEXT:    mfvsrd r6, v2
+; P8LE-NEXT:    mfvsrd r8, v3
 ; P8LE-NEXT:    ori r3, r3, 51289
-; P8LE-NEXT:    ori r4, r4, 28749
+; P8LE-NEXT:    mffprd r4, f0
 ; P8LE-NEXT:    ori r5, r5, 42889
-; P8LE-NEXT:    mfvsrd r8, v3
 ; P8LE-NEXT:    rldic r3, r3, 36, 1
-; P8LE-NEXT:    rldic r4, r4, 32, 0
-; P8LE-NEXT:    oris r3, r3, 45590
-; P8LE-NEXT:    mffprd r7, f0
 ; P8LE-NEXT:    rldic r5, r5, 35, 1
-; P8LE-NEXT:    oris r4, r4, 52170
-; P8LE-NEXT:    ori r3, r3, 17097
+; P8LE-NEXT:    rldicl r7, r6, 63, 1
+; P8LE-NEXT:    oris r3, r3, 45590
 ; P8LE-NEXT:    oris r5, r5, 1603
-; P8LE-NEXT:    ori r4, r4, 12109
-; P8LE-NEXT:    mulhdu r3, r7, r3
-; P8LE-NEXT:    rldicl r9, r6, 63, 1
+; P8LE-NEXT:    ori r3, r3, 17097
 ; P8LE-NEXT:    ori r5, r5, 21445
-; P8LE-NEXT:    mulhdu r4, r8, r4
-; P8LE-NEXT:    mulhdu r5, r9, r5
-; P8LE-NEXT:    sub r9, r7, r3
-; P8LE-NEXT:    rldicl r9, r9, 63, 1
-; P8LE-NEXT:    rldicl r4, r4, 52, 12
-; P8LE-NEXT:    add r3, r9, r3
+; P8LE-NEXT:    mulhdu r3, r4, r3
+; P8LE-NEXT:    mulhdu r5, r7, r5
+; P8LE-NEXT:    sub r7, r4, r3
 ; P8LE-NEXT:    rldicl r5, r5, 57, 7
-; P8LE-NEXT:    mulli r4, r4, 5423
-; P8LE-NEXT:    rldicl r3, r3, 60, 4
+; P8LE-NEXT:    rldicl r7, r7, 63, 1
 ; P8LE-NEXT:    mulli r5, r5, 654
-; P8LE-NEXT:    mulli r3, r3, 23
-; P8LE-NEXT:    sub r4, r8, r4
+; P8LE-NEXT:    add r3, r7, r3
+; P8LE-NEXT:    lis r7, -16037
+; P8LE-NEXT:    ori r7, r7, 28749
+; P8LE-NEXT:    rldicl r3, r3, 60, 4
 ; P8LE-NEXT:    sub r5, r6, r5
-; P8LE-NEXT:    mtfprd f0, r4
-; P8LE-NEXT:    sub r3, r7, r3
-; P8LE-NEXT:    li r4, 0
-; P8LE-NEXT:    mtfprd f1, r5
-; P8LE-NEXT:    mtfprd f2, r3
-; P8LE-NEXT:    mtfprd f3, r4
-; P8LE-NEXT:    xxmrghd v3, vs0, vs2
-; P8LE-NEXT:    xxmrghd v2, vs1, vs3
+; P8LE-NEXT:    rldic r7, r7, 32, 0
+; P8LE-NEXT:    mulli r3, r3, 23
+; P8LE-NEXT:    oris r7, r7, 52170
+; P8LE-NEXT:    ori r7, r7, 12109
+; P8LE-NEXT:    sub r3, r4, r3
+; P8LE-NEXT:    mulhdu r7, r8, r7
+; P8LE-NEXT:    mtfprd f1, r3
+; P8LE-NEXT:    li r3, 0
+; P8LE-NEXT:    rldicl r7, r7, 52, 12
+; P8LE-NEXT:    mulli r7, r7, 5423
+; P8LE-NEXT:    sub r7, r8, r7
+; P8LE-NEXT:    mtfprd f0, r7
+; P8LE-NEXT:    xxmrghd v3, vs0, vs1
+; P8LE-NEXT:    mtfprd f0, r5
+; P8LE-NEXT:    mtfprd f1, r3
+; P8LE-NEXT:    xxmrghd v2, vs0, vs1
 ; P8LE-NEXT:    blr
 ;
 ; P8BE-LABEL: dont_fold_urem_i64:
 ; P8BE:       # %bb.0:
 ; P8BE-NEXT:    lis r3, 1602
-; P8BE-NEXT:    lis r4, -16037
-; P8BE-NEXT:    xxswapd vs0, v3
-; P8BE-NEXT:    xxswapd vs1, v2
+; P8BE-NEXT:    mfvsrd r4, v3
 ; P8BE-NEXT:    lis r5, 3206
+; P8BE-NEXT:    xxswapd vs0, v2
+; P8BE-NEXT:    xxswapd vs1, v3
 ; P8BE-NEXT:    ori r3, r3, 51289
-; P8BE-NEXT:    ori r4, r4, 28749
-; P8BE-NEXT:    mfvsrd r6, v3
 ; P8BE-NEXT:    ori r5, r5, 42889
+; P8BE-NEXT:    mffprd r6, f0
+; P8BE-NEXT:    mffprd r8, f1
 ; P8BE-NEXT:    rldic r3, r3, 36, 1
-; P8BE-NEXT:    rldic r4, r4, 32, 0
-; P8BE-NEXT:    oris r3, r3, 45590
 ; P8BE-NEXT:    rldic r5, r5, 35, 1
-; P8BE-NEXT:    mffprd r7, f0
-; P8BE-NEXT:    oris r4, r4, 52170
-; P8BE-NEXT:    ori r3, r3, 17097
-; P8BE-NEXT:    mffprd r8, f1
+; P8BE-NEXT:    oris r3, r3, 45590
 ; P8BE-NEXT:    oris r5, r5, 1603
-; P8BE-NEXT:    ori r4, r4, 12109
-; P8BE-NEXT:    mulhdu r3, r6, r3
+; P8BE-NEXT:    rldicl r7, r6, 63, 1
+; P8BE-NEXT:    ori r3, r3, 17097
 ; P8BE-NEXT:    ori r5, r5, 21445
-; P8BE-NEXT:    mulhdu r4, r7, r4
-; P8BE-NEXT:    rldicl r9, r8, 63, 1
-; P8BE-NEXT:    mulhdu r5, r9, r5
-; P8BE-NEXT:    sub r9, r6, r3
-; P8BE-NEXT:    rldicl r9, r9, 63, 1
-; P8BE-NEXT:    rldicl r4, r4, 52, 12
-; P8BE-NEXT:    add r3, r9, r3
-; P8BE-NEXT:    mulli r4, r4, 5423
+; P8BE-NEXT:    mulhdu r3, r4, r3
+; P8BE-NEXT:    mulhdu r5, r7, r5
+; P8BE-NEXT:    sub r7, r4, r3
 ; P8BE-NEXT:    rldicl r5, r5, 57, 7
-; P8BE-NEXT:    rldicl r3, r3, 60, 4
+; P8BE-NEXT:    rldicl r7, r7, 63, 1
 ; P8BE-NEXT:    mulli r5, r5, 654
+; P8BE-NEXT:    add r3, r7, r3
+; P8BE-NEXT:    lis r7, -16037
+; P8BE-NEXT:    ori r7, r7, 28749
+; P8BE-NEXT:    rldicl r3, r3, 60, 4
+; P8BE-NEXT:    sub r5, r6, r5
+; P8BE-NEXT:    rldic r7, r7, 32, 0
 ; P8BE-NEXT:    mulli r3, r3, 23
-; P8BE-NEXT:    sub r4, r7, r4
-; P8BE-NEXT:    mtfprd f0, r4
-; P8BE-NEXT:    sub r4, r8, r5
-; P8BE-NEXT:    sub r3, r6, r3
-; P8BE-NEXT:    mtfprd f1, r4
-; P8BE-NEXT:    li r4, 0
-; P8BE-NEXT:    mtfprd f2, r3
-; P8BE-NEXT:    mtfprd f3, r4
-; P8BE-NEXT:    xxmrghd v3, vs2, vs0
-; P8BE-NEXT:    xxmrghd v2, vs3, vs1
+; P8BE-NEXT:    oris r7, r7, 52170
+; P8BE-NEXT:    ori r7, r7, 12109
+; P8BE-NEXT:    sub r3, r4, r3
+; P8BE-NEXT:    mulhdu r7, r8, r7
+; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    li r3, 0
+; P8BE-NEXT:    rldicl r7, r7, 52, 12
+; P8BE-NEXT:    mulli r7, r7, 5423
+; P8BE-NEXT:    sub r7, r8, r7
+; P8BE-NEXT:    mtfprd f0, r7
+; P8BE-NEXT:    xxmrghd v3, vs1, vs0
+; P8BE-NEXT:    mtfprd f0, r5
+; P8BE-NEXT:    mtfprd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
   %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
   ret <4 x i64> %1

diff  --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 500e031fa600a0c..25697311df37381 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -45,12 +45,12 @@ define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-BE-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-P8-NEXT:    lbz r4, 0(r4)
 ; CHECK-BE-P8-NEXT:    lbz r3, 0(r3)
+; CHECK-BE-P8-NEXT:    lbz r4, 0(r4)
 ; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI0_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r5
 ; CHECK-BE-P8-NEXT:    mtvsrwz v3, r4
 ; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r5
 ; CHECK-BE-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -66,11 +66,11 @@ define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C0(r2) # %const.0
-; CHECK-AIX-64-P8-NEXT:    lbz r4, 0(r4)
 ; CHECK-AIX-64-P8-NEXT:    lbz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-AIX-64-P8-NEXT:    lbz r4, 0(r4)
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C0(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r5
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-AIX-64-P8-NEXT:    blr
@@ -86,11 +86,11 @@ define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C0(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    lbz r4, 0(r4)
 ; CHECK-AIX-32-P8-NEXT:    lbz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-AIX-32-P8-NEXT:    lbz r4, 0(r4)
+; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C0(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r4
 ; CHECK-AIX-32-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -183,13 +183,13 @@ entry:
 define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_none_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
-; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -205,12 +205,12 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-BE-P8-LABEL: test_none_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r4
-; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
-; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_none_v16i8:
@@ -225,11 +225,11 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C2(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C2(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v16i8:
@@ -243,10 +243,10 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v16i8:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -268,9 +268,9 @@ define <16 x i8> @test_v16i8_v8i16(i16 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -321,9 +321,9 @@ define <16 x i8> @test_v16i8_v8i16(i16 %arg, i8 %arg1) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    stb r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
@@ -350,9 +350,9 @@ define <16 x i8> @test_v8i16_v16i8(i16 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -403,9 +403,9 @@ define <16 x i8> @test_v8i16_v16i8(i16 %arg, i8 %arg1) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    stb r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
@@ -431,13 +431,13 @@ entry:
 define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_none_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
-; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -453,12 +453,12 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-BE-P8-LABEL: test_none_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r4
-; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
-; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_none_v8i16:
@@ -473,11 +473,11 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C3(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
@@ -491,10 +491,10 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -561,11 +561,11 @@ define <8 x i16> @test_v8i16_none(<8 x i16> %a, i16 %b) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C2(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -583,9 +583,9 @@ define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %
 ; CHECK-LE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -600,9 +600,9 @@ define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %
 ; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 32
 ; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r4
+; CHECK-BE-P8-NEXT:    sldi r3, r4, 32
+; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-P8-NEXT:    vmrghb v2, v2, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -617,9 +617,9 @@ define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
 ; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
+; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-AIX-64-P8-NEXT:    vmrghb v2, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -633,10 +633,10 @@ define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vmrghb v2, v2, v3
@@ -663,9 +663,9 @@ define <16 x i8> @test_v4i32_v16i8(i32 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -713,9 +713,9 @@ define <16 x i8> @test_v4i32_v16i8(i32 %arg, i8 %arg1) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    stb r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
@@ -803,9 +803,9 @@ define <4 x i32> @test_none_v4i32(<4 x i32> %a, i64 %b) {
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C4(r2) # %const.1
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
@@ -828,15 +828,15 @@ entry:
 define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_v4i32_none:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI10_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lbzx r4, 0, r4
-; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r3
-; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI10_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT:    lxsiwzx v4, 0, r3
 ; CHECK-LE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI10_0 at toc@ha
+; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI10_0 at toc@l
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    vspltb v2, v2, 7
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_none:
@@ -853,11 +853,11 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-BE-P8-LABEL: test_v4i32_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lbzx r4, 0, r4
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI10_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI10_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI10_0 at toc@l
 ; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
-; CHECK-BE-P8-NEXT:    addi r4, r5, .LCPI10_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-BE-P8-NEXT:    vspltb v2, v2, 7
 ; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    blr
@@ -876,10 +876,10 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lbzx r4, 0, r4
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C7(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C7(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vspltb v2, v2, 7
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
@@ -897,10 +897,10 @@ define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lbzx r4, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C5(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C5(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r4
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -931,9 +931,9 @@ define <16 x i8> @test_v16i8_v2i64(i8 %arg, i64 %arg1, <16 x i8> %a, <2 x i64> %
 ; CHECK-LE-P8-LABEL: test_v16i8_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -980,10 +980,10 @@ define <16 x i8> @test_v16i8_v2i64(i8 %arg, i64 %arg1, <16 x i8> %a, <2 x i64> %
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vmrghb v2, v2, v3
@@ -1010,9 +1010,9 @@ define <16 x i8> @test_v2i64_v16i8(i64 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1087,15 +1087,15 @@ entry:
 define dso_local <16 x i8> @test_1_2(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_1_2:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI13_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lbzx r3, 0, r3
-; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
-; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI13_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT:    lxsdx v4, 0, r4
 ; CHECK-LE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI13_0 at toc@ha
+; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI13_0 at toc@l
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    vspltb v2, v2, 7
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_1_2:
@@ -1146,10 +1146,10 @@ define dso_local <16 x i8> @test_1_2(ptr nocapture noundef readonly %a, ptr noca
 ; CHECK-AIX-32-P8-LABEL: test_1_2:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lbzx r3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C6(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C6(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -1179,15 +1179,15 @@ entry:
 define <16 x i8> @test_none_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_none_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI14_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lbzx r3, 0, r3
-; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
-; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI14_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT:    lxsdx v4, 0, r4
 ; CHECK-LE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI14_0 at toc@ha
+; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI14_0 at toc@l
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    vspltb v2, v2, 7
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_none_v2i64:
@@ -1238,10 +1238,10 @@ define <16 x i8> @test_none_v2i64(ptr nocapture noundef readonly %a, ptr nocaptu
 ; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lbzx r3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C7(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C7(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -1271,15 +1271,15 @@ entry:
 define <16 x i8> @test_v2i64_none(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_v2i64_none:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI15_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lbzx r4, 0, r4
-; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r3
-; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI15_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT:    lxsdx v4, 0, r3
 ; CHECK-LE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI15_0 at toc@ha
+; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI15_0 at toc@l
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    vspltb v2, v2, 7
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_none:
@@ -1329,12 +1329,12 @@ define <16 x i8> @test_v2i64_none(ptr nocapture noundef readonly %a, ptr nocaptu
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lbzx r4, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r4
-; CHECK-AIX-32-P8-NEXT:    xxspltw v3, vs0, 1
-; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
-; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT:    lbzx r3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT:    vspltb v3, v3, 7
+; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
@@ -1376,8 +1376,8 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI16_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    mtvsrwz v3, r4
-; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI16_0 at toc@l
 ; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI16_0 at toc@l
 ; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r5
 ; CHECK-BE-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-BE-P8-NEXT:    blr
@@ -1395,10 +1395,10 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    ld r5, L..C8(r2) # %const.0
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r5
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs:
@@ -1412,12 +1412,12 @@ define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -1442,9 +1442,9 @@ define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %ar
 ; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r7
-; CHECK-LE-P8-NEXT:    mtfprd f1, r8
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r8
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1459,9 +1459,9 @@ define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %ar
 ; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    sldi r3, r7, 48
-; CHECK-BE-P8-NEXT:    sldi r4, r8, 32
 ; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r4
+; CHECK-BE-P8-NEXT:    sldi r3, r8, 32
+; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-P8-NEXT:    vmrghb v2, v2, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -1476,9 +1476,9 @@ define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %ar
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
 ; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
+; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-AIX-64-P8-NEXT:    vmrghb v2, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -1492,10 +1492,10 @@ define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %ar
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vmrghb v2, v2, v3
@@ -1522,9 +1522,9 @@ define <16 x i8> @test_v8i16_v2i64(<8 x i16> %a, <2 x i64> %b, i16 %arg, i64 %ar
 ; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r7
-; CHECK-LE-P8-NEXT:    mtfprd f1, r8
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r8
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1571,10 +1571,10 @@ define <16 x i8> @test_v8i16_v2i64(<8 x i16> %a, <2 x i64> %b, i16 %arg, i64 %ar
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vmrghb v2, v2, v3
@@ -1642,12 +1642,12 @@ define <16 x i8> @test_v4i32_v4i32(i32 %arg, i32 %arg1, <4 x i32> %a, <4 x i32>
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -1672,9 +1672,9 @@ define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1689,9 +1689,9 @@ define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) {
 ; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r4
+; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
+; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -1706,9 +1706,9 @@ define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) {
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
 ; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
+; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -1722,10 +1722,10 @@ define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
@@ -1793,11 +1793,11 @@ define <16 x i8> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C8(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C8(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -1872,12 +1872,12 @@ define <16 x i8> @test_v2i64_v4i32(i64 %arg, i32 %arg1, <2 x i64> %a, <4 x i32>
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -1902,9 +1902,9 @@ define <16 x i8> @test_v2i64_v8i16(i64 %arg, i16 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1951,10 +1951,10 @@ define <16 x i8> @test_v2i64_v8i16(i64 %arg, i16 %arg1) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    sth r5, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
@@ -1980,14 +1980,14 @@ entry:
 define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI24_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT:    lfdx f1, 0, r4
-; CHECK-LE-P8-NEXT:    addi r3, r5, .LCPI24_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs2, 0, r3
+; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI24_0 at toc@ha
+; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI24_0 at toc@l
 ; CHECK-LE-P8-NEXT:    xxswapd v2, f0
-; CHECK-LE-P8-NEXT:    xxswapd v3, f1
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs2
+; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v3, f0
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -2006,9 +2006,9 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI24_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI24_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
-; CHECK-BE-P8-NEXT:    addi r3, r5, .LCPI24_0 at toc@l
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI24_0 at toc@l
 ; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
 ; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
@@ -2027,11 +2027,11 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C9(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C9(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r5
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -2047,16 +2047,16 @@ define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, 4(r4)
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lwz r3, 4(r4)
+; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw v3, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4

diff  --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
index d96cf3eb72277f1..8bb71e073e81462 100644
--- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
@@ -70,10 +70,10 @@ define <2 x i64> @test_v16i8_v16i8(i8 %arg1, i8 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stb r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, vs1
@@ -114,8 +114,8 @@ define <2 x i64> @test_none_v16i8(i8 %arg1, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-BE-P8-LABEL: test_none_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -128,8 +128,8 @@ define <2 x i64> @test_none_v16i8(i8 %arg1, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -142,10 +142,10 @@ define <2 x i64> @test_none_v16i8(i8 %arg1, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v16i8:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, v2, vs0
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -182,8 +182,8 @@ define <2 x i64> @test_v16i8_none(i8 %arg1, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, v2
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -196,8 +196,8 @@ define <2 x i64> @test_v16i8_none(i8 %arg1, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, v2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -210,10 +210,10 @@ define <2 x i64> @test_v16i8_none(i8 %arg1, ptr nocapture noundef readonly %b) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -254,9 +254,9 @@ define <2 x i64> @test_v16i8_v8i16(i8 %arg1, i16 %arg) {
 ; CHECK-BE-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
+; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
+; CHECK-BE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -272,9 +272,9 @@ define <2 x i64> @test_v16i8_v8i16(i8 %arg1, i16 %arg) {
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
+; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -289,10 +289,10 @@ define <2 x i64> @test_v16i8_v8i16(i8 %arg1, i16 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, vs1
@@ -337,9 +337,9 @@ define <2 x i64> @test_v8i16_v16i8(i8 %arg1, i16 %arg) {
 ; CHECK-BE-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
+; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
+; CHECK-BE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -355,9 +355,9 @@ define <2 x i64> @test_v8i16_v16i8(i8 %arg1, i16 %arg) {
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
+; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -372,10 +372,10 @@ define <2 x i64> @test_v8i16_v16i8(i8 %arg1, i16 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs1, vs0
@@ -416,8 +416,8 @@ define <2 x i64> @test_v8i16_none(i16 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, v2
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -430,8 +430,8 @@ define <2 x i64> @test_v8i16_none(i16 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, v2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -444,10 +444,10 @@ define <2 x i64> @test_v8i16_none(i16 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -484,8 +484,8 @@ define <2 x i64> @test_none_v8i16(i16 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-BE-P8-LABEL: test_none_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -498,8 +498,8 @@ define <2 x i64> @test_none_v8i16(i16 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -512,10 +512,10 @@ define <2 x i64> @test_none_v8i16(i16 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, v2, vs0
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -555,9 +555,9 @@ define <2 x i64> @test_v16i8_v4i32(i8 %arg1, i32 %arg) {
 ; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 32
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
+; CHECK-BE-P8-NEXT:    sldi r3, r4, 32
+; CHECK-BE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -572,9 +572,9 @@ define <2 x i64> @test_v16i8_v4i32(i8 %arg1, i32 %arg) {
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
+; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -588,10 +588,10 @@ define <2 x i64> @test_v16i8_v4i32(i8 %arg1, i32 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, vs1
@@ -635,9 +635,9 @@ define <2 x i64> @test_v4i32_v16i8(i8 %arg1, i32 %arg) {
 ; CHECK-BE-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 32
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
+; CHECK-BE-P8-NEXT:    sldi r3, r4, 32
+; CHECK-BE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -652,9 +652,9 @@ define <2 x i64> @test_v4i32_v16i8(i8 %arg1, i32 %arg) {
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
+; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -668,10 +668,10 @@ define <2 x i64> @test_v4i32_v16i8(i8 %arg1, i32 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs1, vs0
@@ -712,8 +712,8 @@ define <2 x i64> @test_none_v4i32(i32 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-BE-P8-LABEL: test_none_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -726,8 +726,8 @@ define <2 x i64> @test_none_v4i32(i32 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -740,10 +740,10 @@ define <2 x i64> @test_none_v4i32(i32 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, v2, vs0
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -780,8 +780,8 @@ define <2 x i64> @test_v4i32_none(i32 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, v2
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -794,8 +794,8 @@ define <2 x i64> @test_v4i32_none(i32 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, v2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -808,10 +808,10 @@ define <2 x i64> @test_v4i32_none(i32 %arg1, ptr nocapture noundef readonly %b)
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -884,19 +884,19 @@ define <2 x i64> @test_v16i8_v2i64(i8 %arg1, i64 %arg) {
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C0(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    addi r6, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r6
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    stw r5, -48(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C1(r2) # %const.1
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -48
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -48
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -969,15 +969,15 @@ define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r6, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r6
 ; CHECK-AIX-32-P8-NEXT:    stw r5, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -48(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -48
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -48
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs1, vs2, vs1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -1048,18 +1048,18 @@ define <2 x i64> @test_none_v2i64(ptr nocapture noundef readonly %b, i64 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r6, L..C2(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    stw r5, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C2(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.1
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r6
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v4, v2
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -1097,9 +1097,9 @@ define <2 x i64> @test_v2i64_none(ptr nocapture noundef readonly %b, i64 %arg) {
 ; CHECK-BE-P8-LABEL: test_v2i64_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r4
-; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r3
-; CHECK-BE-P8-NEXT:    xxspltd v3, vs0, 0
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v3, v2
+; CHECK-BE-P8-NEXT:    lxvd2x v3, 0, r3
+; CHECK-BE-P8-NEXT:    xxspltd v2, vs0, 0
+; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v2i64_none:
@@ -1112,9 +1112,9 @@ define <2 x i64> @test_v2i64_none(ptr nocapture noundef readonly %b, i64 %arg) {
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r4
-; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v3, vs0, vs0
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v3, v2
+; CHECK-AIX-64-P8-NEXT:    lxvd2x v3, 0, r3
+; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs0
+; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
@@ -1127,12 +1127,12 @@ define <2 x i64> @test_v2i64_none(ptr nocapture noundef readonly %b, i64 %arg) {
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lxvd2x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -1199,10 +1199,10 @@ define <2 x i64> @test_v8i16_v8i16(i16 %arg1, i16 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, vs1
@@ -1246,9 +1246,9 @@ define <2 x i64> @test_v8i16_v4i32(i16 %arg1, i32 %arg) {
 ; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 32
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
+; CHECK-BE-P8-NEXT:    sldi r3, r4, 32
+; CHECK-BE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -1263,9 +1263,9 @@ define <2 x i64> @test_v8i16_v4i32(i16 %arg1, i32 %arg) {
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
+; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -1279,10 +1279,10 @@ define <2 x i64> @test_v8i16_v4i32(i16 %arg1, i32 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, vs1
@@ -1359,19 +1359,19 @@ define <2 x i64> @test_v8i16_v2i64(i16 %arg1, i64 %arg) {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C4(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    addi r6, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r6
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    stw r5, -48(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C5(r2) # %const.1
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -48
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -48
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -1438,10 +1438,10 @@ define <2 x i64> @test_v4i32_v4i32(i32 %arg1, i32 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, vs1
@@ -1485,9 +1485,9 @@ define <2 x i64> @test_v4i32_v8i16(i32 %arg1, i16 %arg) {
 ; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
+; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
+; CHECK-BE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -1502,9 +1502,9 @@ define <2 x i64> @test_v4i32_v8i16(i32 %arg1, i16 %arg) {
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
+; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -1518,10 +1518,10 @@ define <2 x i64> @test_v4i32_v8i16(i32 %arg1, i16 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs0, vs1
@@ -1594,17 +1594,17 @@ define <2 x i64> @test_v4i32_v2i64(i32 %arg1, i64 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -48(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -48
+; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -48
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C6(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -1634,10 +1634,10 @@ define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-LE-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    ld r3, 0(r3)
-; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P8-NEXT:    xxmrghd v3, vs0, vs1
+; CHECK-LE-P8-NEXT:    lfdx f1, 0, r4
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v3, vs1, vs0
 ; CHECK-LE-P8-NEXT:    vaddudm v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1686,23 +1686,23 @@ define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapt
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lwz r5, 4(r3)
-; CHECK-AIX-32-P8-NEXT:    addi r6, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 4(r4)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r6
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -48(r1)
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -64
-; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -64(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -48
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs2, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs3, vs2
+; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -64
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v3, v2, vs0
 ; CHECK-AIX-32-P8-NEXT:    vaddudm v2, v3, v2
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -1744,9 +1744,9 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1790,15 +1790,15 @@ define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r6, r1, -48
 ; CHECK-AIX-32-P8-NEXT:    stw r5, -48(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r6
-; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -48
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs1, vs2, vs1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -1827,9 +1827,9 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs1
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1874,15 +1874,15 @@ define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r6, r1, -48
 ; CHECK-AIX-32-P8-NEXT:    sth r5, -48(r1)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r6
-; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -48
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs1, vs2, vs1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
index 24d3f8273d79c36..4ca55d276647bf1 100644
--- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -29,13 +29,13 @@ define void @test_none_v8i16(ptr %a) {
 ; CHECK-LE-P8-LABEL: test_none_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxsdx v2, 0, r3
+; CHECK-LE-P8-NEXT:    lxsdx v4, 0, r3
 ; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-LE-P8-NEXT:    mtvsrd v4, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v3, v4, v2
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stfdx f0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -55,10 +55,10 @@ define void @test_none_v8i16(ptr %a) {
 ; CHECK-BE-P8-LABEL: test_none_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
+; CHECK-BE-P8-NEXT:    mtfprd f0, r4
+; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-BE-P8-NEXT:    stfdx f0, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -74,10 +74,10 @@ define void @test_none_v8i16(ptr %a) {
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
+; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-64-P8-NEXT:    stfdx f0, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -95,8 +95,8 @@ define void @test_none_v8i16(ptr %a) {
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    stw r3, 0(r3)
@@ -150,9 +150,9 @@ define void @test_v8i16_none(ptr %a) {
 ; CHECK-BE-P8-LABEL: test_v8i16_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-BE-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-BE-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r4
+; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -167,9 +167,9 @@ define void @test_v8i16_none(ptr %a) {
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-64-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -184,9 +184,9 @@ define void @test_v8i16_none(ptr %a) {
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    mtfprwz f0, r4
+; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -216,8 +216,8 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-LE-P8-NEXT:    mffprwz r3, f0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-LE-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r5
@@ -238,12 +238,12 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-BE-P8-LABEL: test_none_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-P8-NEXT:    mffprwz r3, f0
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-BE-P8-NEXT:    mffprwz r4, f0
-; CHECK-BE-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r5
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -262,11 +262,11 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-AIX-64-P8-NEXT:    mffprwz r4, f0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
 ; CHECK-AIX-64-P8-NEXT:    ld r4, L..C0(r2) # %const.0
-; CHECK-AIX-64-P8-NEXT:    mffprwz r5, f0
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r5
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -283,10 +283,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
+; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C0(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
-; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r5
-; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
@@ -321,8 +321,8 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-LE-P8-NEXT:    mffprwz r3, f0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-LE-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r5
@@ -343,12 +343,12 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-BE-P8-LABEL: test_v4i32_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-BE-P8-NEXT:    mffprwz r3, f0
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
 ; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
-; CHECK-BE-P8-NEXT:    mffprwz r4, f0
-; CHECK-BE-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r5
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -367,11 +367,11 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-AIX-64-P8-NEXT:    mffprwz r4, f0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
 ; CHECK-AIX-64-P8-NEXT:    ld r4, L..C1(r2) # %const.0
-; CHECK-AIX-64-P8-NEXT:    mffprwz r5, f0
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r5
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -388,10 +388,10 @@ define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
+; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C1(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
-; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r5
-; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
@@ -430,9 +430,9 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
-; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT:    lxsdx v4, 0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ;
@@ -451,13 +451,13 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI4_1 at toc@ha
 ; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI4_0 at toc@l
-; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI4_1 at toc@l
 ; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
 ; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r3
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI4_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI4_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
 ;
@@ -476,11 +476,11 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    ld r5, L..C2(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C3(r2) # %const.1
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r3
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C3(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
 ;
@@ -496,13 +496,13 @@ define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_ad
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C2(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C2(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v5, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.1
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v5, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
@@ -679,11 +679,11 @@ define void @test_v8i16_v4i32(ptr %a) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT:    lfiwzx f1, 0, r3
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
+; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -701,10 +701,10 @@ define void @test_v8i16_v4i32(ptr %a) {
 ; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P8-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-P8-NEXT:    mtfprd f1, r3
+; CHECK-BE-P8-NEXT:    xxsldwi vs0, f0, f0, 1
 ; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
@@ -722,10 +722,10 @@ define void @test_v8i16_v4i32(ptr %a) {
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
+; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, f0, f0, 1
 ; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
@@ -782,11 +782,11 @@ define void @test_v8i16_v2i64(ptr %a) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-LE-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
+; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -804,10 +804,10 @@ define void @test_v8i16_v2i64(ptr %a) {
 ; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-BE-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
+; CHECK-BE-P8-NEXT:    mtfprd f0, r4
+; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -823,10 +823,10 @@ define void @test_v8i16_v2i64(ptr %a) {
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-64-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
+; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -881,12 +881,12 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI9_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-LE-P8-NEXT:    lxsiwzx v4, 0, r4
 ; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI9_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
@@ -901,11 +901,11 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI9_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI9_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -921,10 +921,10 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C4(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C4(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -939,10 +939,10 @@ define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C4(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C4(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -966,11 +966,11 @@ define void @test_v4i32_v8i16(ptr %a) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT:    lfiwzx f1, 0, r3
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
+; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -988,10 +988,10 @@ define void @test_v4i32_v8i16(ptr %a) {
 ; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P8-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-P8-NEXT:    mtfprd f1, r3
+; CHECK-BE-P8-NEXT:    xxsldwi vs0, f0, f0, 1
 ; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
@@ -1009,10 +1009,10 @@ define void @test_v4i32_v8i16(ptr %a) {
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
+; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, f0, f0, 1
 ; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
@@ -1089,10 +1089,10 @@ define void @test_v4i32_v2i64(ptr %a) {
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lfiwzx f1, 0, r3
-; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-BE-P8-NEXT:    xxsldwi vs1, f1, f1, 1
-; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
+; CHECK-BE-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-BE-P8-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -1107,10 +1107,10 @@ define void @test_v4i32_v2i64(ptr %a) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lfiwzx f1, 0, r3
-; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    xxsldwi vs1, f1, f1, 1
-; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
+; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -1127,12 +1127,12 @@ define void @test_v4i32_v2i64(ptr %a) {
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lwz r4, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -1212,17 +1212,17 @@ define void @test_v2i64_v2i64(ptr %a) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, 4(r3)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
-; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
+; CHECK-AIX-32-P8-NEXT:    lwz r4, 4(r3)
+; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lfiwzx f2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
-; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs2, 1
+; CHECK-AIX-32-P8-NEXT:    lfiwzx f1, 0, r3
+; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -1272,10 +1272,10 @@ define void @test_v2i64_v4i32(ptr %a) {
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lfiwzx f1, 0, r3
-; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-BE-P8-NEXT:    xxsldwi vs1, f1, f1, 1
-; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
+; CHECK-BE-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-BE-P8-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -1290,10 +1290,10 @@ define void @test_v2i64_v4i32(ptr %a) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lfiwzx f1, 0, r3
-; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    xxsldwi vs1, f1, f1, 1
-; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-AIX-64-P8-NEXT:    xxsldwi vs0, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -1311,11 +1311,11 @@ define void @test_v2i64_v4i32(ptr %a) {
 ; CHECK-AIX-32-P8-NEXT:    lwz r4, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -1347,11 +1347,11 @@ define void @test_v2i64_v8i16(ptr %a) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-LE-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, f0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-LE-P8-NEXT:    mtfprd f0, r4
+; CHECK-LE-P8-NEXT:    xxswapd vs1, f1
+; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT:    xxmrglw vs0, vs0, vs1
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -1369,10 +1369,10 @@ define void @test_v2i64_v8i16(ptr %a) {
 ; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-BE-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P8-NEXT:    mtfprd f1, r3
-; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
+; CHECK-BE-P8-NEXT:    mtfprd f0, r4
+; CHECK-BE-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-BE-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -1388,10 +1388,10 @@ define void @test_v2i64_v8i16(ptr %a) {
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lfdx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    lfdx f1, 0, r3
+; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
+; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrghw vs0, vs1, vs0
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x vs0, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
index ad4b112bebe0702..37820afeae082fa 100644
--- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -31,8 +31,8 @@ define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
@@ -51,12 +51,12 @@ define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-BE-P8-LABEL: test_none_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -72,11 +72,11 @@ define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C0(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C0(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -91,11 +91,11 @@ define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C0(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C0(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -131,8 +131,8 @@ define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
@@ -157,12 +157,12 @@ define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
 ; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
-; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
-; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
-; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI1_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI1_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -184,11 +184,11 @@ define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r5
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C2(r2) # %const.1
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C2(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -209,11 +209,11 @@ define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %a
 ; CHECK-AIX-32-P8-NEXT:    mtvsrwz v4, r5
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C2(r2) # %const.1
-; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C2(r2) # %const.1
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -243,6 +243,7 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    mtvsrd v3, r5
+; CHECK-LE-P8-NEXT:    lxsiwzx v4, 0, r3
 ; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
@@ -250,9 +251,8 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stfdx f0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -276,14 +276,14 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ; CHECK-BE-P8-LABEL: test_none_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-BE-P8-NEXT:    mtvsrwz v3, r5
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r5
 ; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r4
-; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
-; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r4
-; CHECK-BE-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    stxsdx v2, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
@@ -306,12 +306,12 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    ld r4, L..C3(r2) # %const.0
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r5
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r4
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C4(r2) # %const.1
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v3, v2
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r5
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r3
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r4
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C4(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    stxsdx v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
@@ -331,11 +331,11 @@ define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    stb r5, -32(r1)
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v3, v3, v3
@@ -375,12 +375,12 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-LE-P8-LABEL: test_v4i32_none:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r3
 ; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -398,11 +398,11 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
@@ -420,10 +420,10 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C5(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C5(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
@@ -440,10 +440,10 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C4(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C4(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    blr
@@ -469,17 +469,17 @@ entry:
 define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) {
 ; CHECK-LE-P8-LABEL: test_none_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI4_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-LE-P8-NEXT:    lxsdx v2, 0, r3
+; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
+; CHECK-LE-P8-NEXT:    lxsdx v4, 0, r3
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI4_1 at toc@ha
-; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI4_0 at toc@l
+; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI4_1 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs1
-; CHECK-LE-P8-NEXT:    vperm v2, v2, v3, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
@@ -505,11 +505,11 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-BE-P8-LABEL: test_none_v2i64:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsdx v2, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI4_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
@@ -531,10 +531,10 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C6(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C6(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    xxlxor v3, v3, v3
 ; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
@@ -555,10 +555,10 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C5(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C5(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    xxlxor v3, v3, v3
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
@@ -590,12 +590,12 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ; CHECK-LE-P8-LABEL: test_v2i64_none:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxsdx v2, 0, r3
+; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r3
 ; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-LE-P8-NEXT:    blr
@@ -613,11 +613,11 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsdx v2, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-BE-P8-NEXT:    blr
@@ -635,10 +635,10 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C7(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C7(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    blr
@@ -658,13 +658,13 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ; CHECK-AIX-32-P8-NEXT:    lwz r4, 4(r3)
 ; CHECK-AIX-32-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C6(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
@@ -699,13 +699,13 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-LE-P8-NEXT:    lhz r4, 0(r4)
 ; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P8-NEXT:    lhz r3, 0(r4)
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-LE-P8-NEXT:    mtvsrd v4, r4
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v8i16:
@@ -720,14 +720,14 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT:    lhz r4, 0(r4)
-; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r5
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT:    lhz r3, 0(r4)
 ; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
-; CHECK-BE-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-BE-P8-NEXT:    vperm v2, v3, v4, v2
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI6_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v8i16:
@@ -742,13 +742,13 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C8(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lhz r4, 0(r4)
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
-; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r4)
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
@@ -762,13 +762,13 @@ define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C7(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-32-P8-NEXT:    lhz r4, 0(r4)
 ; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r5
-; CHECK-AIX-32-P8-NEXT:    mtvsrwz v4, r4
-; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r4)
+; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C7(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
 ; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16:
@@ -791,10 +791,10 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -809,11 +809,11 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
 ; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
+; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
 ; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -828,11 +828,11 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
 ; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
 ; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -849,9 +849,9 @@ define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
@@ -885,10 +885,10 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -904,10 +904,10 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT:    lxsdx v2, 0, r4
+; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v2i64:
@@ -921,10 +921,10 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r4
+; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
+; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64:
@@ -939,9 +939,9 @@ define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
@@ -975,17 +975,17 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-LE-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI9_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r4
+; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r3
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI9_1 at toc@ha
+; CHECK-LE-P8-NEXT:    lxsiwzx v4, 0, r4
 ; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI9_0 at toc@l
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI9_1 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P8-NEXT:    vperm v2, v2, v3, v4
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v3, v4, v2
 ; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
@@ -1009,11 +1009,11 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI9_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI9_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI9_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI9_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
@@ -1035,10 +1035,10 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C9(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C9(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    xxlxor v3, v3, v3
 ; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
@@ -1059,10 +1059,10 @@ define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readon
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C8(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C8(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    xxlxor v3, v3, v3
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
@@ -1093,10 +1093,10 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1111,11 +1111,11 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
 ; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
+; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
 ; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
@@ -1130,11 +1130,11 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
 ; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
 ; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
@@ -1151,9 +1151,9 @@ define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
@@ -1187,9 +1187,9 @@ define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT:    lfdx f1, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, f0
-; CHECK-LE-P8-NEXT:    xxswapd v3, f1
+; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v3, f0
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1236,10 +1236,10 @@ define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C9(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -1268,17 +1268,17 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ; CHECK-LE-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI12_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxsdx v2, 0, r3
-; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r3
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI12_1 at toc@ha
+; CHECK-LE-P8-NEXT:    lxsdx v4, 0, r4
 ; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI12_0 at toc@l
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI12_1 at toc@l
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-LE-P8-NEXT:    vperm v2, v2, v3, v4
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    vperm v2, v3, v4, v2
 ; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
@@ -1302,11 +1302,11 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI12_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsdx v2, 0, r3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI12_0 at toc@ha
 ; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
-; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI12_0 at toc@l
-; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI12_0 at toc@l
+; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P8-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
@@ -1328,10 +1328,10 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C10(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C10(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
-; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P8-NEXT:    xxlxor v3, v3, v3
 ; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
@@ -1352,10 +1352,10 @@ define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readon
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C10(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C10(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    xxlxor v3, v3, v3
 ; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
@@ -1386,9 +1386,9 @@ define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT:    lfdx f1, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, f0
-; CHECK-LE-P8-NEXT:    xxswapd v3, f1
+; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v3, f0
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1435,10 +1435,10 @@ define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C11(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C11(r2) # %const.0
 ; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-32-P8-NEXT:    blr
 ;
@@ -1467,10 +1467,10 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
 ; CHECK-LE-P8-NEXT:    xxswapd v3, f0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
 ; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -1486,10 +1486,10 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-BE-P8-NEXT:    lxsdx v2, 0, r4
+; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
 ; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
@@ -1503,10 +1503,10 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
-; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r4
+; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
+; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
@@ -1521,9 +1521,9 @@ define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
 ; CHECK-AIX-32-P8:       # %bb.0: # %entry
 ; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
 ; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
 ; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
 ; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
index 3aa84a00070f48d..183f5566e3f2803 100644
--- a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
+++ b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
@@ -15,8 +15,8 @@ define zeroext i32 @geti(<4 x i32> %a, i32 zeroext %b) {
 ; CHECK-NEXT:    lvsl 3, 0, 3
 ; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:    and 3, 3, 5
-; CHECK-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-NEXT:    sldi 3, 3, 5
+; CHECK-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-NEXT:    mfvsrd 4, 34
 ; CHECK-NEXT:    srd 3, 4, 3
 ; CHECK-NEXT:    clrldi 3, 3, 32
@@ -24,11 +24,11 @@ define zeroext i32 @geti(<4 x i32> %a, i32 zeroext %b) {
 ;
 ; CHECK-BE-LABEL: geti:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    andi. 4, 5, 2
+; CHECK-BE-NEXT:    andi. 3, 5, 2
+; CHECK-BE-NEXT:    sldi 3, 3, 2
+; CHECK-BE-NEXT:    lvsl 3, 0, 3
 ; CHECK-BE-NEXT:    li 3, 1
-; CHECK-BE-NEXT:    sldi 4, 4, 2
 ; CHECK-BE-NEXT:    andc 3, 3, 5
-; CHECK-BE-NEXT:    lvsl 3, 0, 4
 ; CHECK-BE-NEXT:    sldi 3, 3, 5
 ; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-BE-NEXT:    mfvsrd 4, 34

diff  --git a/llvm/test/CodeGen/PowerPC/vavg.ll b/llvm/test/CodeGen/PowerPC/vavg.ll
index 482e76c3b7d7e0f..f762eda1fbc17d7 100644
--- a/llvm/test/CodeGen/PowerPC/vavg.ll
+++ b/llvm/test/CodeGen/PowerPC/vavg.ll
@@ -150,12 +150,12 @@ define <8 x i16> @test_v8i16_sign_negative(<8 x i16> %m, <8 x i16> %n) {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
 ; CHECK-P8-NEXT:    vadduhm 2, 2, 3
-; CHECK-P8-NEXT:    vspltish 3, 1
+; CHECK-P8-NEXT:    vspltish 5, 1
 ; CHECK-P8-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-P8-NEXT:    xxswapd 36, 0
 ; CHECK-P8-NEXT:    vadduhm 2, 2, 4
-; CHECK-P8-NEXT:    vsrah 2, 2, 3
+; CHECK-P8-NEXT:    vsrah 2, 2, 5
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_v8i16_sign_negative:
@@ -219,11 +219,11 @@ define <4 x i32> @test_v4i32_sign_negative(<4 x i32> %m, <4 x i32> %n) {
 ;
 ; CHECK-P8-LABEL: test_v4i32_sign_negative:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xxleqv 36, 36, 36
 ; CHECK-P8-NEXT:    vadduwm 2, 2, 3
-; CHECK-P8-NEXT:    vspltisw 3, 1
-; CHECK-P8-NEXT:    vadduwm 2, 2, 4
-; CHECK-P8-NEXT:    vsraw 2, 2, 3
+; CHECK-P8-NEXT:    xxleqv 35, 35, 35
+; CHECK-P8-NEXT:    vspltisw 4, 1
+; CHECK-P8-NEXT:    vadduwm 2, 2, 3
+; CHECK-P8-NEXT:    vsraw 2, 2, 4
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_v4i32_sign_negative:

diff  --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
index 51f0d75d27bb5eb..37a1e46927b1e17 100644
--- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
@@ -12,43 +12,43 @@
 define void @test8(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-P8-LABEL: test8:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI0_2 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-P8-NEXT:    xxlxor v1, v1, v1
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_3 at toc@ha
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI0_0 at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_3 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI0_2 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI0_1 at toc@ha
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_2 at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI0_1 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs4, 0, r5
-; CHECK-P8-NEXT:    xxswapd v0, vs3
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    vperm v4, v1, v2, v4
 ; CHECK-P8-NEXT:    xxswapd v5, vs2
-; CHECK-P8-NEXT:    xxswapd v1, vs4
-; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
-; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
-; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v1
-; CHECK-P8-NEXT:    xvcvuxddp vs2, v0
-; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P8-NEXT:    vperm v5, v1, v2, v5
 ; CHECK-P8-NEXT:    xvcvuxddp vs1, v5
+; CHECK-P8-NEXT:    xxswapd v3, vs3
+; CHECK-P8-NEXT:    vperm v3, v1, v2, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs2, v3
+; CHECK-P8-NEXT:    xxswapd v0, vs0
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v4
+; CHECK-P8-NEXT:    vperm v2, v1, v2, v0
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xvcvuxddp vs3, v2
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -121,21 +121,21 @@ entry:
 define void @test4(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-P8-LABEL: test4:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
-; CHECK-P8-NEXT:    addi r4, r6, .LCPI1_1 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
-; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-P8-NEXT:    xxlxor v5, v5, v5
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_1 at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xxswapd v5, vs2
-; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v5
+; CHECK-P8-NEXT:    vperm v3, v5, v2, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P8-NEXT:    vperm v2, v5, v2, v4
 ; CHECK-P8-NEXT:    xvcvuxddp vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
@@ -188,13 +188,13 @@ entry:
 define void @test2(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-P8-LABEL: test2:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
@@ -234,53 +234,53 @@ entry:
 define void @stest8(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-P8-LABEL: stest8:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_4 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_4 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI3_2 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_4 at toc@ha
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_4 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs4, 0, r5
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xxswapd v5, vs3
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    xxswapd v0, vs4
-; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P8-NEXT:    xxswapd v5, vs2
 ; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P8-NEXT:    xxswapd v3, vs3
+; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd v0, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v0
 ; CHECK-P8-NEXT:    lxvd2x v0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    vsld v3, v3, v0
+; CHECK-P8-NEXT:    vsld v2, v2, v0
 ; CHECK-P8-NEXT:    vsld v4, v4, v0
 ; CHECK-P8-NEXT:    vsld v5, v5, v0
-; CHECK-P8-NEXT:    vsld v2, v2, v0
 ; CHECK-P8-NEXT:    vsrad v3, v3, v0
 ; CHECK-P8-NEXT:    vsrad v2, v2, v0
 ; CHECK-P8-NEXT:    vsrad v4, v4, v0
 ; CHECK-P8-NEXT:    vsrad v5, v5, v0
-; CHECK-P8-NEXT:    xvcvsxddp vs2, v2
-; CHECK-P8-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs2, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs3, v2
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v4
 ; CHECK-P8-NEXT:    xvcvsxddp vs1, v5
-; CHECK-P8-NEXT:    xvcvsxddp vs3, v4
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r4, 32
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -359,19 +359,19 @@ entry:
 define void @stest4(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-P8-LABEL: stest4:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI4_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI4_2 at toc@ha
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI4_0 at toc@l
-; CHECK-P8-NEXT:    addi r4, r6, .LCPI4_2 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_1 at toc@ha
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_2 at toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_1 at toc@l
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xxswapd v4, vs2
 ; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v4
 ; CHECK-P8-NEXT:    lxvd2x v4, 0, r4
 ; CHECK-P8-NEXT:    li r4, 16
@@ -381,8 +381,8 @@ define void @stest4(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-P8-NEXT:    vsrad v2, v2, v4
 ; CHECK-P8-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-P8-NEXT:    xvcvsxddp vs1, v2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
@@ -434,14 +434,14 @@ entry:
 define void @stest2(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-P8-LABEL: stest2:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P8-NEXT:    lxvd2x v3, 0, r4
 ; CHECK-P8-NEXT:    vsld v2, v2, v3

diff  --git a/llvm/test/CodeGen/PowerPC/vec-min-max.ll b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
index a3718bab682fd3a..2b94002b733755d 100644
--- a/llvm/test/CodeGen/PowerPC/vec-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
@@ -241,14 +241,14 @@ define i128 @invalidv1i128(<2 x i128> %v1, <2 x i128> %v2) {
 ; CHECK-LABEL: invalidv1i128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mfvsrd 3, 36
-; CHECK-NEXT:    xxswapd 0, 36
 ; CHECK-NEXT:    mfvsrd 4, 34
-; CHECK-NEXT:    xxswapd 1, 34
 ; CHECK-NEXT:    cmpld 4, 3
+; CHECK-NEXT:    xxswapd 0, 36
+; CHECK-NEXT:    xxswapd 1, 34
 ; CHECK-NEXT:    cmpd 1, 4, 3
 ; CHECK-NEXT:    mffprd 3, 0
-; CHECK-NEXT:    crandc 20, 4, 2
 ; CHECK-NEXT:    mffprd 4, 1
+; CHECK-NEXT:    crandc 20, 4, 2
 ; CHECK-NEXT:    cmpld 1, 4, 3
 ; CHECK-NEXT:    bc 12, 20, .LBB12_3
 ; CHECK-NEXT:  # %bb.1:

diff  --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll
index 1fbb0e8f4205e0d..628c5101c079652 100644
--- a/llvm/test/CodeGen/PowerPC/vec-promote.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll
@@ -43,24 +43,24 @@ entry:
 define noundef <4 x float> @vec_promote_float_zeroed(ptr nocapture noundef readonly %p) {
 ; CHECK-BE-LABEL: vec_promote_float_zeroed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lfs 1, 0(3)
-; CHECK-BE-NEXT:    xxlxor 0, 0, 0
-; CHECK-BE-NEXT:    xxspltd 2, 0, 0
-; CHECK-BE-NEXT:    xxmrghd 0, 1, 0
-; CHECK-BE-NEXT:    xvcvdpsp 34, 2
-; CHECK-BE-NEXT:    xvcvdpsp 35, 0
-; CHECK-BE-NEXT:    vmrgew 2, 3, 2
+; CHECK-BE-NEXT:    lfs 0, 0(3)
+; CHECK-BE-NEXT:    xxlxor 1, 1, 1
+; CHECK-BE-NEXT:    xxmrghd 0, 0, 1
+; CHECK-BE-NEXT:    xxspltd 1, 1, 0
+; CHECK-BE-NEXT:    xvcvdpsp 34, 0
+; CHECK-BE-NEXT:    xvcvdpsp 35, 1
+; CHECK-BE-NEXT:    vmrgew 2, 2, 3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: vec_promote_float_zeroed:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    lfs 1, 0(3)
-; CHECK-LE-NEXT:    xxlxor 0, 0, 0
-; CHECK-LE-NEXT:    xxspltd 2, 0, 0
-; CHECK-LE-NEXT:    xxmrghd 0, 0, 1
-; CHECK-LE-NEXT:    xvcvdpsp 34, 2
-; CHECK-LE-NEXT:    xvcvdpsp 35, 0
-; CHECK-LE-NEXT:    vmrgew 2, 2, 3
+; CHECK-LE-NEXT:    lfs 0, 0(3)
+; CHECK-LE-NEXT:    xxlxor 1, 1, 1
+; CHECK-LE-NEXT:    xxmrghd 0, 1, 0
+; CHECK-LE-NEXT:    xxspltd 1, 1, 0
+; CHECK-LE-NEXT:    xvcvdpsp 34, 0
+; CHECK-LE-NEXT:    xvcvdpsp 35, 1
+; CHECK-LE-NEXT:    vmrgew 2, 3, 2
 ; CHECK-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 8
@@ -132,10 +132,10 @@ define noundef <4 x i32> @vec_promote_int_zeroed(ptr nocapture noundef readonly
 ; CHECK-BE-NEXT:    lwz 3, 0(3)
 ; CHECK-BE-NEXT:    li 4, 0
 ; CHECK-BE-NEXT:    li 5, 0
-; CHECK-BE-NEXT:    rldimi 5, 5, 32, 0
-; CHECK-BE-NEXT:    rldimi 4, 3, 32, 0
-; CHECK-BE-NEXT:    mtfprd 1, 5
-; CHECK-BE-NEXT:    mtfprd 0, 4
+; CHECK-BE-NEXT:    rldimi 4, 4, 32, 0
+; CHECK-BE-NEXT:    rldimi 5, 3, 32, 0
+; CHECK-BE-NEXT:    mtfprd 1, 4
+; CHECK-BE-NEXT:    mtfprd 0, 5
 ; CHECK-BE-NEXT:    xxmrghd 34, 0, 1
 ; CHECK-BE-NEXT:    blr
 ;
@@ -178,11 +178,11 @@ define noundef <8 x i16> @vec_promote_short_zeroed(ptr nocapture noundef readonl
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
 ; CHECK-BE-NEXT:    lhz 3, 0(3)
-; CHECK-BE-NEXT:    li 5, 0
 ; CHECK-BE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
-; CHECK-BE-NEXT:    mtvsrwz 35, 5
-; CHECK-BE-NEXT:    lxvw4x 34, 0, 4
 ; CHECK-BE-NEXT:    mtvsrwz 36, 3
+; CHECK-BE-NEXT:    lxvw4x 34, 0, 4
+; CHECK-BE-NEXT:    li 4, 0
+; CHECK-BE-NEXT:    mtvsrwz 35, 4
 ; CHECK-BE-NEXT:    vperm 2, 4, 3, 2
 ; CHECK-BE-NEXT:    blr
 ;
@@ -191,12 +191,12 @@ define noundef <8 x i16> @vec_promote_short_zeroed(ptr nocapture noundef readonl
 ; CHECK-LE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
 ; CHECK-LE-NEXT:    lhz 3, 0(3)
 ; CHECK-LE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
+; CHECK-LE-NEXT:    mtvsrd 36, 3
 ; CHECK-LE-NEXT:    lxvd2x 0, 0, 4
 ; CHECK-LE-NEXT:    li 4, 0
-; CHECK-LE-NEXT:    mtvsrd 36, 3
-; CHECK-LE-NEXT:    mtvsrd 34, 4
-; CHECK-LE-NEXT:    xxswapd 35, 0
-; CHECK-LE-NEXT:    vperm 2, 2, 4, 3
+; CHECK-LE-NEXT:    mtvsrd 35, 4
+; CHECK-LE-NEXT:    xxswapd 34, 0
+; CHECK-LE-NEXT:    vperm 2, 3, 4, 2
 ; CHECK-LE-NEXT:    blr
 entry:
   %0 = load i16, ptr %p, align 2
@@ -229,11 +229,11 @@ define noundef <16 x i8> @vec_promote_char_zeroed(ptr nocapture noundef readonly
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis 4, 2, .LCPI10_0 at toc@ha
 ; CHECK-BE-NEXT:    lbz 3, 0(3)
-; CHECK-BE-NEXT:    li 5, 0
 ; CHECK-BE-NEXT:    addi 4, 4, .LCPI10_0 at toc@l
-; CHECK-BE-NEXT:    mtvsrwz 35, 5
-; CHECK-BE-NEXT:    lxvw4x 34, 0, 4
 ; CHECK-BE-NEXT:    mtvsrwz 36, 3
+; CHECK-BE-NEXT:    lxvw4x 34, 0, 4
+; CHECK-BE-NEXT:    li 4, 0
+; CHECK-BE-NEXT:    mtvsrwz 35, 4
 ; CHECK-BE-NEXT:    vperm 2, 4, 3, 2
 ; CHECK-BE-NEXT:    blr
 ;
@@ -242,12 +242,12 @@ define noundef <16 x i8> @vec_promote_char_zeroed(ptr nocapture noundef readonly
 ; CHECK-LE-NEXT:    addis 4, 2, .LCPI10_0 at toc@ha
 ; CHECK-LE-NEXT:    lbz 3, 0(3)
 ; CHECK-LE-NEXT:    addi 4, 4, .LCPI10_0 at toc@l
+; CHECK-LE-NEXT:    mtvsrd 36, 3
 ; CHECK-LE-NEXT:    lxvd2x 0, 0, 4
 ; CHECK-LE-NEXT:    li 4, 0
-; CHECK-LE-NEXT:    mtvsrd 36, 3
-; CHECK-LE-NEXT:    mtvsrd 34, 4
-; CHECK-LE-NEXT:    xxswapd 35, 0
-; CHECK-LE-NEXT:    vperm 2, 2, 4, 3
+; CHECK-LE-NEXT:    mtvsrd 35, 4
+; CHECK-LE-NEXT:    xxswapd 34, 0
+; CHECK-LE-NEXT:    vperm 2, 3, 4, 2
 ; CHECK-LE-NEXT:    blr
 entry:
   %0 = load i8, ptr %p, align 1

diff  --git a/llvm/test/CodeGen/PowerPC/vec-trunc.ll b/llvm/test/CodeGen/PowerPC/vec-trunc.ll
index e7f4ec1b978c7ff..b28e8c3a774208e 100644
--- a/llvm/test/CodeGen/PowerPC/vec-trunc.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-trunc.ll
@@ -61,12 +61,12 @@ entry:
 define void @test4i8w(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-LABEL: test4i8w:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
 ; CHECK-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
 ; CHECK-NEXT:    xxswapd v2, vs0
-; CHECK-NEXT:    xxswapd v3, vs1
+; CHECK-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-NEXT:    xxswapd v3, vs0
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 2
 ; CHECK-NEXT:    stfiwx f0, 0, r3
@@ -174,12 +174,12 @@ entry:
 define void @test2i16d(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ; CHECK-LABEL: test2i16d:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
 ; CHECK-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
-; CHECK-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
+; CHECK-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
 ; CHECK-NEXT:    xxswapd v2, vs0
-; CHECK-NEXT:    xxswapd v3, vs1
+; CHECK-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-NEXT:    xxswapd v3, vs0
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 2
 ; CHECK-NEXT:    stfiwx f0, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
index f5f09b4e85a6d80..7a09d5a5e8bb2e2 100644
--- a/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-trunc2.ll
@@ -9,20 +9,20 @@
 define dso_local <8 x i8> @test8x32(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) {
 ; CHECK-LABEL: test8x32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addis r11, r2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    rldimi r3, r4, 32, 0
 ; CHECK-NEXT:    rldimi r5, r6, 32, 0
 ; CHECK-NEXT:    mtfprd f0, r3
-; CHECK-NEXT:    addi r3, r11, .LCPI0_0 at toc@l
+; CHECK-NEXT:    mtfprd f1, r5
 ; CHECK-NEXT:    rldimi r7, r8, 32, 0
 ; CHECK-NEXT:    rldimi r9, r10, 32, 0
-; CHECK-NEXT:    lxvd2x vs3, 0, r3
-; CHECK-NEXT:    mtfprd f1, r5
 ; CHECK-NEXT:    mtfprd f2, r7
-; CHECK-NEXT:    mtfprd f4, r9
+; CHECK-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
 ; CHECK-NEXT:    xxmrghd v2, vs1, vs0
-; CHECK-NEXT:    xxswapd v4, vs3
-; CHECK-NEXT:    xxmrghd v3, vs4, vs2
+; CHECK-NEXT:    mtfprd f0, r9
+; CHECK-NEXT:    xxmrghd v3, vs0, vs2
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    xxswapd v4, vs0
 ; CHECK-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-NEXT:    blr
 ;
@@ -78,16 +78,16 @@ ret <8 x i8> %v2
 define dso_local <4 x i16> @test4x64(i64 %i1, i64 %i2, i64 %i3, i64 %i4) {
 ; CHECK-LABEL: test4x64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addis r7, r2, .LCPI1_0 at toc@ha
 ; CHECK-NEXT:    mtfprd f0, r5
-; CHECK-NEXT:    addi r5, r7, .LCPI1_0 at toc@l
 ; CHECK-NEXT:    mtfprd f1, r6
-; CHECK-NEXT:    lxvd2x vs3, 0, r5
-; CHECK-NEXT:    mtfprd f2, r3
-; CHECK-NEXT:    mtfprd f4, r4
 ; CHECK-NEXT:    xxmrghd v2, vs1, vs0
-; CHECK-NEXT:    xxmrghd v3, vs4, vs2
-; CHECK-NEXT:    xxswapd v4, vs3
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    mtfprd f1, r4
+; CHECK-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-NEXT:    xxmrghd v3, vs1, vs0
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    xxswapd v4, vs0
 ; CHECK-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-NEXT:    blr
 ;
@@ -122,15 +122,15 @@ define dso_local <8 x i16> @test8x24(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5
 ; CHECK-NEXT:    mtvsrd v4, r5
 ; CHECK-NEXT:    mtvsrd v5, r6
 ; CHECK-NEXT:    mtvsrd v0, r7
-; CHECK-NEXT:    mtvsrd v1, r8
 ; CHECK-NEXT:    vmrghh v2, v3, v2
-; CHECK-NEXT:    mtvsrd v3, r9
+; CHECK-NEXT:    mtvsrd v3, r8
 ; CHECK-NEXT:    vmrghh v4, v5, v4
-; CHECK-NEXT:    mtvsrd v5, r10
-; CHECK-NEXT:    vmrghh v0, v1, v0
-; CHECK-NEXT:    vmrghh v3, v5, v3
+; CHECK-NEXT:    mtvsrd v5, r9
 ; CHECK-NEXT:    xxmrglw vs0, v4, v2
-; CHECK-NEXT:    xxmrglw vs1, v3, v0
+; CHECK-NEXT:    vmrghh v3, v3, v0
+; CHECK-NEXT:    mtvsrd v0, r10
+; CHECK-NEXT:    vmrghh v5, v0, v5
+; CHECK-NEXT:    xxmrglw vs1, v5, v3
 ; CHECK-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
index eadb1c4371c5bf5..07a8fb06caa1169 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
@@ -15,14 +15,14 @@ define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprwz r3, f0
@@ -76,26 +76,26 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
-; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvspdpn f3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
 ; CHECK-P8-NEXT:    vmrghh v3, v4, v3
-; CHECK-P8-NEXT:    vmrghh v2, v2, v5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    vmrghh v2, v2, v4
 ; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -168,51 +168,51 @@ define <8 x i16> @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT:    xscvspdpn f3, v2
-; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
-; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f5, v3
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f6
-; CHECK-P8-NEXT:    xscvdpsxws f2, f7
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    vmrghh v2, v2, v4
-; CHECK-P8-NEXT:    vmrghh v3, v3, v5
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxsldwi vs4, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 1
+; CHECK-P8-NEXT:    vmrghh v4, v4, v5
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs5
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    vmrghh v2, v2, v5
+; CHECK-P8-NEXT:    mtvsrd v5, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghh v5, v5, v0
 ; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT:    vmrghh v4, v4, v0
-; CHECK-P8-NEXT:    vmrghh v5, v5, v1
-; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
+; CHECK-P8-NEXT:    xxmrglw vs0, v2, v4
+; CHECK-P8-NEXT:    vmrghh v3, v3, v0
+; CHECK-P8-NEXT:    xxmrglw vs1, v3, v5
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -330,102 +330,102 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr n
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r6
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r6
-; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs8, r4, r6
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f4, vs1
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs2
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    xxsldwi vs6, v2, v2, 3
-; CHECK-P8-NEXT:    xxsldwi vs8, v2, v2, 1
-; CHECK-P8-NEXT:    xscvspdpn f7, v2
-; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 3
-; CHECK-P8-NEXT:    xscvspdpn f5, vs3
-; CHECK-P8-NEXT:    xxswapd v0, vs3
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvspdpn f8, vs8
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    xscvspdpn f9, vs9
-; CHECK-P8-NEXT:    xscvspdpn f2, v3
-; CHECK-P8-NEXT:    xscvdpsxws f6, f6
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f7
-; CHECK-P8-NEXT:    xxsldwi vs7, v0, v0, 3
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f8
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xscvdpsxws f6, f9
-; CHECK-P8-NEXT:    xscvspdpn f10, v4
+; CHECK-P8-NEXT:    xscvspdpn f0, v3
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs3
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    xxswapd v4, vs6
+; CHECK-P8-NEXT:    xxswapd v5, vs8
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxsldwi vs0, v3, v3, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvspdpn f3, v0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs7, v4, v4, 3
+; CHECK-P8-NEXT:    xxsldwi vs9, v4, v4, 1
+; CHECK-P8-NEXT:    xxsldwi vs10, v5, v5, 3
+; CHECK-P8-NEXT:    xxsldwi vs11, v5, v5, 1
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    xxsldwi vs4, v4, v4, 3
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    vmrghh v2, v2, v1
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    vmrghh v0, v0, v1
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xxsldwi vs6, v4, v4, 1
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    vmrghh v3, v3, v1
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f10
-; CHECK-P8-NEXT:    vmrghh v4, v5, v1
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xxsldwi vs2, v0, v0, 1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs5
+; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs6
+; CHECK-P8-NEXT:    mtvsrd v2, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    vmrghh v1, v1, v6
+; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v4
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    vmrghh v2, v2, v6
 ; CHECK-P8-NEXT:    mtvsrd v6, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f1, f4
+; CHECK-P8-NEXT:    xscvspdpn f1, vs9
 ; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    xscvdpsxws f3, f6
-; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs2
-; CHECK-P8-NEXT:    mtvsrd v8, r4
+; CHECK-P8-NEXT:    xscvspdpn f0, vs10
+; CHECK-P8-NEXT:    mtvsrd v4, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f7
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v6, v6, v8
+; CHECK-P8-NEXT:    vmrghh v6, v6, v7
+; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs8
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxmrglw vs1, v2, v1
+; CHECK-P8-NEXT:    vmrghh v4, v4, v7
+; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v5
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    vmrghh v5, v5, v9
-; CHECK-P8-NEXT:    mtvsrd v9, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs11
+; CHECK-P8-NEXT:    xxmrglw vs2, v4, v6
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT:    vmrghh v7, v7, v8
-; CHECK-P8-NEXT:    xxmrglw vs1, v6, v4
+; CHECK-P8-NEXT:    vmrghh v7, v8, v7
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    vmrghh v1, v1, v9
-; CHECK-P8-NEXT:    vmrghh v0, v0, v8
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v0
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT:    xxmrglw vs2, v7, v5
 ; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    xxmrglw vs3, v0, v1
+; CHECK-P8-NEXT:    vmrghh v5, v5, v8
+; CHECK-P8-NEXT:    xxmrglw vs3, v5, v7
 ; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
 ; CHECK-P8-NEXT:    xxswapd vs0, v3
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
@@ -647,14 +647,14 @@ define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprwz r3, f0
@@ -708,26 +708,26 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
-; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvspdpn f3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
 ; CHECK-P8-NEXT:    vmrghh v3, v4, v3
-; CHECK-P8-NEXT:    vmrghh v2, v2, v5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    vmrghh v2, v2, v4
 ; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -800,51 +800,51 @@ define <8 x i16> @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT:    xscvspdpn f3, v2
-; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
-; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f5, v3
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f6
-; CHECK-P8-NEXT:    xscvdpsxws f2, f7
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    vmrghh v2, v2, v4
-; CHECK-P8-NEXT:    vmrghh v3, v3, v5
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxsldwi vs4, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 1
+; CHECK-P8-NEXT:    vmrghh v4, v4, v5
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs5
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    vmrghh v2, v2, v5
+; CHECK-P8-NEXT:    mtvsrd v5, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghh v5, v5, v0
 ; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT:    vmrghh v4, v4, v0
-; CHECK-P8-NEXT:    vmrghh v5, v5, v1
-; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
+; CHECK-P8-NEXT:    xxmrglw vs0, v2, v4
+; CHECK-P8-NEXT:    vmrghh v3, v3, v0
+; CHECK-P8-NEXT:    xxmrglw vs1, v3, v5
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -962,102 +962,102 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r6
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r6
-; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs8, r4, r6
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f4, vs1
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs2
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    xxsldwi vs6, v2, v2, 3
-; CHECK-P8-NEXT:    xxsldwi vs8, v2, v2, 1
-; CHECK-P8-NEXT:    xscvspdpn f7, v2
-; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 3
-; CHECK-P8-NEXT:    xscvspdpn f5, vs3
-; CHECK-P8-NEXT:    xxswapd v0, vs3
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvspdpn f8, vs8
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    xscvspdpn f9, vs9
-; CHECK-P8-NEXT:    xscvspdpn f2, v3
-; CHECK-P8-NEXT:    xscvdpsxws f6, f6
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvdpsxws f0, f7
-; CHECK-P8-NEXT:    xxsldwi vs7, v0, v0, 3
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f8
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xscvdpsxws f6, f9
-; CHECK-P8-NEXT:    xscvspdpn f10, v4
+; CHECK-P8-NEXT:    xscvspdpn f0, v3
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs3
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    xxswapd v4, vs6
+; CHECK-P8-NEXT:    xxswapd v5, vs8
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxsldwi vs0, v3, v3, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvspdpn f3, v0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
+; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs7, v4, v4, 3
+; CHECK-P8-NEXT:    xxsldwi vs9, v4, v4, 1
+; CHECK-P8-NEXT:    xxsldwi vs10, v5, v5, 3
+; CHECK-P8-NEXT:    xxsldwi vs11, v5, v5, 1
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    xxsldwi vs4, v4, v4, 3
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    vmrghh v2, v2, v1
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    vmrghh v0, v0, v1
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xxsldwi vs6, v4, v4, 1
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    vmrghh v3, v3, v1
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f10
-; CHECK-P8-NEXT:    vmrghh v4, v5, v1
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xxsldwi vs2, v0, v0, 1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs5
+; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs6
+; CHECK-P8-NEXT:    mtvsrd v2, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    vmrghh v1, v1, v6
+; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v4
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    vmrghh v2, v2, v6
 ; CHECK-P8-NEXT:    mtvsrd v6, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f1, f4
+; CHECK-P8-NEXT:    xscvspdpn f1, vs9
 ; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    xscvdpsxws f3, f6
-; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xscvspdpn f0, vs2
-; CHECK-P8-NEXT:    mtvsrd v8, r4
+; CHECK-P8-NEXT:    xscvspdpn f0, vs10
+; CHECK-P8-NEXT:    mtvsrd v4, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f1, f7
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v6, v6, v8
+; CHECK-P8-NEXT:    vmrghh v6, v6, v7
+; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs8
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxmrglw vs1, v2, v1
+; CHECK-P8-NEXT:    vmrghh v4, v4, v7
+; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v5
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    vmrghh v5, v5, v9
-; CHECK-P8-NEXT:    mtvsrd v9, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs11
+; CHECK-P8-NEXT:    xxmrglw vs2, v4, v6
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT:    vmrghh v7, v7, v8
-; CHECK-P8-NEXT:    xxmrglw vs1, v6, v4
+; CHECK-P8-NEXT:    vmrghh v7, v8, v7
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    vmrghh v1, v1, v9
-; CHECK-P8-NEXT:    vmrghh v0, v0, v8
+; CHECK-P8-NEXT:    xxmrglw vs0, v3, v0
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT:    xxmrglw vs2, v7, v5
 ; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    xxmrglw vs3, v0, v1
+; CHECK-P8-NEXT:    vmrghh v5, v5, v8
+; CHECK-P8-NEXT:    xxmrglw vs3, v5, v7
 ; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
 ; CHECK-P8-NEXT:    xxswapd vs0, v3
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
index 3728311bfa2e7ca..e5d5e3a1eb6f26b 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
@@ -48,9 +48,9 @@ define void @test4elt(ptr noalias nocapture sret(<4 x i64>) %agg.result, <4 x fl
 ; CHECK-P8-NEXT:    xxmrghw vs1, v2, v2
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
-; CHECK-P8-NEXT:    xvcvspdp vs1, vs1
 ; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
-; CHECK-P8-NEXT:    xvcvdpuxds v3, vs1
+; CHECK-P8-NEXT:    xvcvspdp vs0, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v3
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
@@ -90,30 +90,30 @@ define void @test8elt(ptr noalias nocapture sret(<8 x i64>) %agg.result, ptr noc
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxmrglw vs1, v2, v2
+; CHECK-P8-NEXT:    xvcvspdp vs1, vs1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxmrghw vs0, v2, v2
+; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs1
 ; CHECK-P8-NEXT:    xxmrglw vs2, v3, v3
 ; CHECK-P8-NEXT:    xxmrghw vs3, v3, v3
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v2
-; CHECK-P8-NEXT:    xxmrghw vs1, v2, v2
 ; CHECK-P8-NEXT:    xvcvspdp vs2, vs2
-; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
-; CHECK-P8-NEXT:    xvcvspdp vs1, vs1
 ; CHECK-P8-NEXT:    xvcvspdp vs3, vs3
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs0
 ; CHECK-P8-NEXT:    xvcvdpuxds v4, vs2
-; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
-; CHECK-P8-NEXT:    xvcvdpuxds v3, vs1
-; CHECK-P8-NEXT:    xvcvdpuxds v5, vs3
-; CHECK-P8-NEXT:    xxswapd vs3, v4
 ; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v3
-; CHECK-P8-NEXT:    xxswapd vs2, v5
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, v4
+; CHECK-P8-NEXT:    xxswapd vs2, v3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
 ; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
@@ -171,60 +171,60 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x i64>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r8, 64
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    li r7, 16
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
 ; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xxswapd v4, vs3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xxmrghw vs3, v4, v4
+; CHECK-P8-NEXT:    xxmrglw vs6, v4, v4
+; CHECK-P8-NEXT:    xxmrghw vs7, v4, v4
+; CHECK-P8-NEXT:    xvcvspdp vs6, vs6
+; CHECK-P8-NEXT:    xvcvspdp vs7, vs7
+; CHECK-P8-NEXT:    xvcvdpuxds v1, vs7
+; CHECK-P8-NEXT:    xxmrghw vs0, v2, v2
 ; CHECK-P8-NEXT:    xxmrglw vs1, v2, v2
-; CHECK-P8-NEXT:    xxmrghw vs2, v2, v2
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxmrglw vs5, v4, v4
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v3
-; CHECK-P8-NEXT:    xvcvspdp vs3, vs3
-; CHECK-P8-NEXT:    xxmrghw vs4, v3, v3
-; CHECK-P8-NEXT:    xxmrglw vs6, v2, v2
-; CHECK-P8-NEXT:    xxmrghw vs7, v2, v2
-; CHECK-P8-NEXT:    xvcvspdp vs5, vs5
 ; CHECK-P8-NEXT:    xvcvspdp vs1, vs1
-; CHECK-P8-NEXT:    xvcvspdp vs2, vs2
 ; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    xxmrghw vs3, v3, v3
+; CHECK-P8-NEXT:    xxmrglw vs2, v3, v3
+; CHECK-P8-NEXT:    xvcvspdp vs3, vs3
+; CHECK-P8-NEXT:    xvcvspdp vs2, vs2
+; CHECK-P8-NEXT:    xvcvdpuxds v4, vs0
+; CHECK-P8-NEXT:    xvcvdpuxds v0, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v5, vs3
+; CHECK-P8-NEXT:    xxmrglw vs4, v2, v2
+; CHECK-P8-NEXT:    xxmrghw vs5, v2, v2
 ; CHECK-P8-NEXT:    xvcvspdp vs4, vs4
-; CHECK-P8-NEXT:    xvcvspdp vs6, vs6
-; CHECK-P8-NEXT:    xvcvspdp vs7, vs7
-; CHECK-P8-NEXT:    xvcvdpuxds v3, vs3
-; CHECK-P8-NEXT:    xvcvdpuxds v5, vs5
-; CHECK-P8-NEXT:    xvcvdpuxds v2, vs1
+; CHECK-P8-NEXT:    xvcvspdp vs5, vs5
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs4
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs5
+; CHECK-P8-NEXT:    xxswapd vs4, v1
+; CHECK-P8-NEXT:    xxswapd vs0, v4
 ; CHECK-P8-NEXT:    xvcvdpuxds v4, vs2
-; CHECK-P8-NEXT:    xvcvdpuxds v0, vs4
-; CHECK-P8-NEXT:    xvcvdpuxds v1, vs0
-; CHECK-P8-NEXT:    xvcvdpuxds v6, vs6
-; CHECK-P8-NEXT:    xxswapd vs0, v3
-; CHECK-P8-NEXT:    xvcvdpuxds v7, vs7
-; CHECK-P8-NEXT:    xxswapd vs1, v5
-; CHECK-P8-NEXT:    xxswapd vs4, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v0
+; CHECK-P8-NEXT:    xvcvdpuxds v0, vs6
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 96
-; CHECK-P8-NEXT:    xxswapd vs3, v4
-; CHECK-P8-NEXT:    xxswapd vs2, v0
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xxswapd vs0, v1
-; CHECK-P8-NEXT:    xxswapd vs5, v6
-; CHECK-P8-NEXT:    xxswapd vs1, v7
+; CHECK-P8-NEXT:    xxswapd vs0, v5
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    li r4, 64
+; CHECK-P8-NEXT:    xxswapd vs3, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v3
+; CHECK-P8-NEXT:    xxswapd vs2, v4
+; CHECK-P8-NEXT:    xxswapd vs5, v0
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r8
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs4, r3, r6
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -353,9 +353,9 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x i64>) %agg.result,
 ; CHECK-P8-NEXT:    xxmrghw vs1, v2, v2
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
-; CHECK-P8-NEXT:    xvcvspdp vs1, vs1
 ; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
-; CHECK-P8-NEXT:    xvcvdpuxds v3, vs1
+; CHECK-P8-NEXT:    xvcvspdp vs0, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v3
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
@@ -395,30 +395,30 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x i64>) %agg.result,
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxmrglw vs1, v2, v2
+; CHECK-P8-NEXT:    xvcvspdp vs1, vs1
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xxmrghw vs0, v2, v2
+; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs1
 ; CHECK-P8-NEXT:    xxmrglw vs2, v3, v3
 ; CHECK-P8-NEXT:    xxmrghw vs3, v3, v3
-; CHECK-P8-NEXT:    xxmrglw vs0, v2, v2
-; CHECK-P8-NEXT:    xxmrghw vs1, v2, v2
 ; CHECK-P8-NEXT:    xvcvspdp vs2, vs2
-; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
-; CHECK-P8-NEXT:    xvcvspdp vs1, vs1
 ; CHECK-P8-NEXT:    xvcvspdp vs3, vs3
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs0
 ; CHECK-P8-NEXT:    xvcvdpuxds v4, vs2
-; CHECK-P8-NEXT:    xvcvdpuxds v2, vs0
-; CHECK-P8-NEXT:    xvcvdpuxds v3, vs1
-; CHECK-P8-NEXT:    xvcvdpuxds v5, vs3
-; CHECK-P8-NEXT:    xxswapd vs3, v4
 ; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v3
-; CHECK-P8-NEXT:    xxswapd vs2, v5
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, v4
+; CHECK-P8-NEXT:    xxswapd vs2, v3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
 ; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
@@ -476,60 +476,60 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x i64>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r8, 64
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
+; CHECK-P8-NEXT:    li r7, 16
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
 ; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    xxswapd v4, vs3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xxmrghw vs3, v4, v4
+; CHECK-P8-NEXT:    xxmrglw vs6, v4, v4
+; CHECK-P8-NEXT:    xxmrghw vs7, v4, v4
+; CHECK-P8-NEXT:    xvcvspdp vs6, vs6
+; CHECK-P8-NEXT:    xvcvspdp vs7, vs7
+; CHECK-P8-NEXT:    xvcvdpuxds v1, vs7
+; CHECK-P8-NEXT:    xxmrghw vs0, v2, v2
 ; CHECK-P8-NEXT:    xxmrglw vs1, v2, v2
-; CHECK-P8-NEXT:    xxmrghw vs2, v2, v2
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxmrglw vs5, v4, v4
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v3
-; CHECK-P8-NEXT:    xvcvspdp vs3, vs3
-; CHECK-P8-NEXT:    xxmrghw vs4, v3, v3
-; CHECK-P8-NEXT:    xxmrglw vs6, v2, v2
-; CHECK-P8-NEXT:    xxmrghw vs7, v2, v2
-; CHECK-P8-NEXT:    xvcvspdp vs5, vs5
 ; CHECK-P8-NEXT:    xvcvspdp vs1, vs1
-; CHECK-P8-NEXT:    xvcvspdp vs2, vs2
 ; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    xxmrghw vs3, v3, v3
+; CHECK-P8-NEXT:    xxmrglw vs2, v3, v3
+; CHECK-P8-NEXT:    xvcvspdp vs3, vs3
+; CHECK-P8-NEXT:    xvcvspdp vs2, vs2
+; CHECK-P8-NEXT:    xvcvdpuxds v4, vs0
+; CHECK-P8-NEXT:    xvcvdpuxds v0, vs1
+; CHECK-P8-NEXT:    xvcvdpuxds v5, vs3
+; CHECK-P8-NEXT:    xxmrglw vs4, v2, v2
+; CHECK-P8-NEXT:    xxmrghw vs5, v2, v2
 ; CHECK-P8-NEXT:    xvcvspdp vs4, vs4
-; CHECK-P8-NEXT:    xvcvspdp vs6, vs6
-; CHECK-P8-NEXT:    xvcvspdp vs7, vs7
-; CHECK-P8-NEXT:    xvcvdpuxds v3, vs3
-; CHECK-P8-NEXT:    xvcvdpuxds v5, vs5
-; CHECK-P8-NEXT:    xvcvdpuxds v2, vs1
+; CHECK-P8-NEXT:    xvcvspdp vs5, vs5
+; CHECK-P8-NEXT:    xvcvdpuxds v2, vs4
+; CHECK-P8-NEXT:    xvcvdpuxds v3, vs5
+; CHECK-P8-NEXT:    xxswapd vs4, v1
+; CHECK-P8-NEXT:    xxswapd vs0, v4
 ; CHECK-P8-NEXT:    xvcvdpuxds v4, vs2
-; CHECK-P8-NEXT:    xvcvdpuxds v0, vs4
-; CHECK-P8-NEXT:    xvcvdpuxds v1, vs0
-; CHECK-P8-NEXT:    xvcvdpuxds v6, vs6
-; CHECK-P8-NEXT:    xxswapd vs0, v3
-; CHECK-P8-NEXT:    xvcvdpuxds v7, vs7
-; CHECK-P8-NEXT:    xxswapd vs1, v5
-; CHECK-P8-NEXT:    xxswapd vs4, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v0
+; CHECK-P8-NEXT:    xvcvdpuxds v0, vs6
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 96
-; CHECK-P8-NEXT:    xxswapd vs3, v4
-; CHECK-P8-NEXT:    xxswapd vs2, v0
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xxswapd vs0, v1
-; CHECK-P8-NEXT:    xxswapd vs5, v6
-; CHECK-P8-NEXT:    xxswapd vs1, v7
+; CHECK-P8-NEXT:    xxswapd vs0, v5
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    li r4, 64
+; CHECK-P8-NEXT:    xxswapd vs3, v2
+; CHECK-P8-NEXT:    xxswapd vs1, v3
+; CHECK-P8-NEXT:    xxswapd vs2, v4
+; CHECK-P8-NEXT:    xxswapd vs5, v0
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r8
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs4, r3, r6
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
index 908a711195ee2e4..dfa49a82781570e 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
@@ -15,14 +15,14 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    vmrghb v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -83,26 +83,26 @@ define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
-; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvspdpn f3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
 ; CHECK-P8-NEXT:    vmrghb v3, v4, v3
-; CHECK-P8-NEXT:    vmrghb v2, v2, v5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    vmrghb v2, v2, v4
 ; CHECK-P8-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprwz r3, f0
@@ -177,51 +177,51 @@ define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT:    xscvspdpn f3, v2
-; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
-; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f5, v3
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f6
-; CHECK-P8-NEXT:    xscvdpsxws f2, f7
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    vmrghb v2, v2, v4
-; CHECK-P8-NEXT:    vmrghb v3, v3, v5
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxsldwi vs4, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 1
+; CHECK-P8-NEXT:    vmrghb v4, v4, v5
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs5
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    vmrghb v2, v2, v5
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    vmrglh v2, v2, v4
+; CHECK-P8-NEXT:    vmrghb v5, v5, v0
 ; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    vmrghb v4, v4, v0
-; CHECK-P8-NEXT:    vmrghb v5, v5, v1
-; CHECK-P8-NEXT:    vmrglh v2, v3, v2
-; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    vmrghb v3, v3, v0
+; CHECK-P8-NEXT:    vmrglh v3, v3, v5
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -347,98 +347,98 @@ define <16 x i8> @test16elt(ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs8, r3, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs2
-; CHECK-P8-NEXT:    xscvspdpn f5, vs2
-; CHECK-P8-NEXT:    xxswapd v5, vs3
-; CHECK-P8-NEXT:    xscvspdpn f6, vs3
-; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f8, v2
-; CHECK-P8-NEXT:    xxsldwi vs2, v3, v3, 3
-; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f3, v5
-; CHECK-P8-NEXT:    xxswapd v7, vs4
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    xscvdpsxws f8, f8
-; CHECK-P8-NEXT:    xscvspdpn f9, vs9
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    mffprwz r3, f5
+; CHECK-P8-NEXT:    xscvspdpn f0, v3
+; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs6
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, v3
-; CHECK-P8-NEXT:    mtvsrd v0, r4
 ; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f8
-; CHECK-P8-NEXT:    vmrghb v2, v4, v0
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f5, f9
-; CHECK-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxsldwi vs0, v5, v5, 3
-; CHECK-P8-NEXT:    xscvspdpn f7, vs4
-; CHECK-P8-NEXT:    xxsldwi vs4, v7, v7, 3
-; CHECK-P8-NEXT:    mtvsrd v1, r3
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xxsldwi vs2, v5, v5, 1
-; CHECK-P8-NEXT:    mffprwz r3, f5
-; CHECK-P8-NEXT:    xscvdpsxws f5, f6
-; CHECK-P8-NEXT:    xxsldwi vs6, v7, v7, 1
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvspdpn f1, v7
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs7, v4, v4, 3
+; CHECK-P8-NEXT:    xxswapd v5, vs8
+; CHECK-P8-NEXT:    xxsldwi vs9, v4, v4, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xxsldwi vs10, v5, v5, 3
+; CHECK-P8-NEXT:    xxsldwi vs11, v5, v5, 1
+; CHECK-P8-NEXT:    vmrghb v0, v0, v1
+; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs5
 ; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f5
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    vmrghb v4, v4, v1
-; CHECK-P8-NEXT:    vmrghb v5, v0, v8
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    vmrghb v3, v3, v1
+; CHECK-P8-NEXT:    mtvsrd v1, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f4
-; CHECK-P8-NEXT:    xscvdpsxws f2, f6
-; CHECK-P8-NEXT:    vmrghb v3, v3, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r3
+; CHECK-P8-NEXT:    xscvspdpn f0, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v4
+; CHECK-P8-NEXT:    vmrglh v3, v3, v0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    vmrghb v1, v1, v6
+; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs9
 ; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r3, f7
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    vmrghb v0, v0, v6
-; CHECK-P8-NEXT:    vmrghb v1, v1, v7
+; CHECK-P8-NEXT:    vmrghb v2, v2, v6
 ; CHECK-P8-NEXT:    mtvsrd v6, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs10
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs8
+; CHECK-P8-NEXT:    vmrglh v2, v2, v1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    vmrghb v6, v6, v7
 ; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    vmrghb v4, v4, v7
+; CHECK-P8-NEXT:    mtvsrd v7, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f0, v5
 ; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    vmrghb v6, v6, v8
-; CHECK-P8-NEXT:    vmrghb v7, v7, v9
-; CHECK-P8-NEXT:    vmrglh v2, v4, v2
-; CHECK-P8-NEXT:    vmrglh v3, v5, v3
-; CHECK-P8-NEXT:    vmrglh v4, v1, v0
-; CHECK-P8-NEXT:    vmrglh v5, v7, v6
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs11
+; CHECK-P8-NEXT:    vmrglh v4, v4, v6
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghb v7, v8, v7
+; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    vmrghb v5, v5, v8
+; CHECK-P8-NEXT:    vmrglh v5, v5, v7
 ; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
@@ -655,14 +655,14 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    vmrghb v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -723,26 +723,26 @@ define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f1, v2
-; CHECK-P8-NEXT:    xxswapd vs2, v2
-; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvspdpn f3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mffprwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mtvsrd v5, r3
 ; CHECK-P8-NEXT:    vmrghb v3, v4, v3
-; CHECK-P8-NEXT:    vmrghb v2, v2, v5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    vmrghb v2, v2, v4
 ; CHECK-P8-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprwz r3, f0
@@ -817,51 +817,51 @@ define i64 @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    xxsldwi vs2, v2, v2, 3
-; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 1
-; CHECK-P8-NEXT:    xscvspdpn f3, v2
-; CHECK-P8-NEXT:    xxsldwi vs6, v3, v3, 3
-; CHECK-P8-NEXT:    xxsldwi vs7, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f5, v3
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    xscvdpsxws f4, f4
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f6
-; CHECK-P8-NEXT:    xscvdpsxws f2, f7
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs3, v2, v2, 1
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r3, f1
-; CHECK-P8-NEXT:    mffprwz r4, f5
-; CHECK-P8-NEXT:    vmrghb v2, v2, v4
-; CHECK-P8-NEXT:    vmrghb v3, v3, v5
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xxsldwi vs4, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs5, v3, v3, 1
+; CHECK-P8-NEXT:    vmrghb v4, v4, v5
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs5
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    vmrghb v2, v2, v5
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    vmrglh v2, v2, v4
+; CHECK-P8-NEXT:    vmrghb v5, v5, v0
 ; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    vmrghb v4, v4, v0
-; CHECK-P8-NEXT:    vmrghb v5, v5, v1
-; CHECK-P8-NEXT:    vmrglh v2, v3, v2
-; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    vmrghb v3, v3, v0
+; CHECK-P8-NEXT:    vmrglh v3, v3, v5
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -987,98 +987,98 @@ define <16 x i8> @test16elt_signed(ptr nocapture readonly) local_unnamed_addr #3
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs8, r3, r4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs2
-; CHECK-P8-NEXT:    xscvspdpn f5, vs2
-; CHECK-P8-NEXT:    xxswapd v5, vs3
-; CHECK-P8-NEXT:    xscvspdpn f6, vs3
-; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
-; CHECK-P8-NEXT:    xscvspdpn f8, v2
-; CHECK-P8-NEXT:    xxsldwi vs2, v3, v3, 3
-; CHECK-P8-NEXT:    xxsldwi vs9, v3, v3, 1
-; CHECK-P8-NEXT:    xscvspdpn f3, v5
-; CHECK-P8-NEXT:    xxswapd v7, vs4
-; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    xscvdpsxws f8, f8
-; CHECK-P8-NEXT:    xscvspdpn f9, vs9
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxsldwi vs0, v2, v2, 1
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    mffprwz r3, f5
+; CHECK-P8-NEXT:    xscvspdpn f0, v3
+; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xxsldwi vs1, v3, v3, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs1
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs3, v3, v3, 1
+; CHECK-P8-NEXT:    xscvspdpn f0, vs2
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs3
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs6
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f1, v3
-; CHECK-P8-NEXT:    mtvsrd v0, r4
 ; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mffprwz r3, f8
-; CHECK-P8-NEXT:    vmrghb v2, v4, v0
-; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f5, f9
-; CHECK-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    xxsldwi vs0, v5, v5, 3
-; CHECK-P8-NEXT:    xscvspdpn f7, vs4
-; CHECK-P8-NEXT:    xxsldwi vs4, v7, v7, 3
-; CHECK-P8-NEXT:    mtvsrd v1, r3
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvspdpn f0, vs0
-; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xxsldwi vs2, v5, v5, 1
-; CHECK-P8-NEXT:    mffprwz r3, f5
-; CHECK-P8-NEXT:    xscvdpsxws f5, f6
-; CHECK-P8-NEXT:    xxsldwi vs6, v7, v7, 1
-; CHECK-P8-NEXT:    xscvspdpn f2, vs2
-; CHECK-P8-NEXT:    xscvspdpn f1, v7
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    xscvspdpn f4, vs4
-; CHECK-P8-NEXT:    xscvspdpn f6, vs6
+; CHECK-P8-NEXT:    xscvspdpn f0, v2
+; CHECK-P8-NEXT:    xxsldwi vs4, v2, v2, 3
+; CHECK-P8-NEXT:    xscvspdpn f1, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxsldwi vs5, v2, v2, 1
+; CHECK-P8-NEXT:    xxsldwi vs7, v4, v4, 3
+; CHECK-P8-NEXT:    xxswapd v5, vs8
+; CHECK-P8-NEXT:    xxsldwi vs9, v4, v4, 1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xxsldwi vs10, v5, v5, 3
+; CHECK-P8-NEXT:    xxsldwi vs11, v5, v5, 1
+; CHECK-P8-NEXT:    vmrghb v0, v0, v1
+; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs5
 ; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f5
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    vmrghb v4, v4, v1
-; CHECK-P8-NEXT:    vmrghb v5, v0, v8
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs7
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    vmrghb v3, v3, v1
+; CHECK-P8-NEXT:    mtvsrd v1, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvdpsxws f0, f4
-; CHECK-P8-NEXT:    xscvdpsxws f2, f6
-; CHECK-P8-NEXT:    vmrghb v3, v3, v6
-; CHECK-P8-NEXT:    mtvsrd v6, r3
+; CHECK-P8-NEXT:    xscvspdpn f0, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, v4
+; CHECK-P8-NEXT:    vmrglh v3, v3, v0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    vmrghb v1, v1, v6
+; CHECK-P8-NEXT:    mtvsrd v6, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    xscvspdpn f1, vs9
 ; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r3, f7
+; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    vmrghb v0, v0, v6
-; CHECK-P8-NEXT:    vmrghb v1, v1, v7
+; CHECK-P8-NEXT:    vmrghb v2, v2, v6
 ; CHECK-P8-NEXT:    mtvsrd v6, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs10
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs8
+; CHECK-P8-NEXT:    vmrglh v2, v2, v1
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    vmrghb v6, v6, v7
 ; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    vmrghb v4, v4, v7
+; CHECK-P8-NEXT:    mtvsrd v7, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    xscvspdpn f0, v5
 ; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    vmrghb v6, v6, v8
-; CHECK-P8-NEXT:    vmrghb v7, v7, v9
-; CHECK-P8-NEXT:    vmrglh v2, v4, v2
-; CHECK-P8-NEXT:    vmrglh v3, v5, v3
-; CHECK-P8-NEXT:    vmrglh v4, v1, v0
-; CHECK-P8-NEXT:    vmrglh v5, v7, v6
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvspdpn f0, vs11
+; CHECK-P8-NEXT:    vmrglh v4, v4, v6
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghb v7, v8, v7
+; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    xxmrglw vs0, v2, v3
+; CHECK-P8-NEXT:    vmrghb v5, v5, v8
+; CHECK-P8-NEXT:    vmrglh v5, v5, v7
 ; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
index 256ad2769587e87..5dcb2e4be3e37f1 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
@@ -12,13 +12,13 @@
 define i32 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprwz r3, f0
@@ -63,25 +63,25 @@ entry:
 define i64 @test4elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f2, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f3, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mtvsrd v3, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f2
 ; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mffprwz r3, f3
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    vmrghh v2, v4, v2
-; CHECK-P8-NEXT:    vmrghh v3, v5, v3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    vmrghh v3, v4, v3
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -148,45 +148,45 @@ define <8 x i16> @test8elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f5, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f6, f2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f7, f3
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    xxswapd vs2, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f6
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f7
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    xxswapd vs5, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    mffprwz r3, f4
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    vmrghh v2, v0, v2
-; CHECK-P8-NEXT:    vmrghh v3, v1, v3
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    xxswapd vs7, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    vmrghh v2, v5, v2
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    mffprwz r3, f7
 ; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    vmrghh v4, v0, v4
-; CHECK-P8-NEXT:    vmrghh v5, v1, v5
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghh v3, v5, v3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    vmrghh v4, v5, v4
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    vmrghh v5, v0, v5
 ; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
@@ -288,92 +288,92 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    li r5, 80
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r6
 ; CHECK-P8-NEXT:    li r6, 64
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
-; CHECK-P8-NEXT:    lxvd2x vs5, r4, r6
-; CHECK-P8-NEXT:    li r7, 80
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs8, r4, r6
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs9, r4, r6
 ; CHECK-P8-NEXT:    li r6, 96
-; CHECK-P8-NEXT:    xscvdpsxws f4, f0
-; CHECK-P8-NEXT:    lxvd2x vs7, r4, r7
-; CHECK-P8-NEXT:    lxvd2x vs10, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs12, r4, r6
 ; CHECK-P8-NEXT:    li r6, 112
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f6, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f8, f2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f9, f3
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f11, f5
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f12, f7
-; CHECK-P8-NEXT:    xxswapd vs7, vs7
-; CHECK-P8-NEXT:    mffprwz r7, f4
-; CHECK-P8-NEXT:    lxvd2x vs4, r4, r6
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xscvdpsxws f13, f10
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f8
-; CHECK-P8-NEXT:    xscvdpsxws f6, f4
+; CHECK-P8-NEXT:    lxvd2x v2, r4, r6
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mffprwz r4, f9
+; CHECK-P8-NEXT:    xxswapd vs7, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f6, f6
+; CHECK-P8-NEXT:    xxswapd vs1, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f11
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxswapd vs5, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xxswapd vs10, vs8
+; CHECK-P8-NEXT:    xscvdpsxws f8, f8
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    xxswapd vs11, vs9
+; CHECK-P8-NEXT:    xscvdpsxws f9, f9
+; CHECK-P8-NEXT:    mffprwz r4, f6
+; CHECK-P8-NEXT:    xscvdpsxws f11, f11
 ; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mffprwz r4, f12
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r4, f9
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f13
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mffprwz r4, f8
+; CHECK-P8-NEXT:    xxswapd vs13, vs12
+; CHECK-P8-NEXT:    xscvdpsxws f13, f13
 ; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xxswapd vs6, vs10
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mtvsrd v7, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs4
-; CHECK-P8-NEXT:    mtvsrd v2, r7
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvdpsxws f4, f6
-; CHECK-P8-NEXT:    vmrghh v2, v8, v2
-; CHECK-P8-NEXT:    mtvsrd v8, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f12
+; CHECK-P8-NEXT:    mtvsrd v7, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v3, v9, v3
-; CHECK-P8-NEXT:    mtvsrd v9, r4
+; CHECK-P8-NEXT:    mtvsrd v8, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    xxswapd v3, v2
+; CHECK-P8-NEXT:    mtvsrd v9, r4
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mtvsrd v10, r4
+; CHECK-P8-NEXT:    mffprwz r4, f11
 ; CHECK-P8-NEXT:    vmrghh v4, v8, v4
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mffprwz r4, f10
 ; CHECK-P8-NEXT:    vmrghh v5, v9, v5
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    vmrghh v0, v8, v0
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    vmrghh v0, v10, v0
+; CHECK-P8-NEXT:    mtvsrd v10, r4
+; CHECK-P8-NEXT:    mffprwz r4, f13
+; CHECK-P8-NEXT:    vmrghh v1, v8, v1
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT:    vmrghh v1, v9, v1
-; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
+; CHECK-P8-NEXT:    xscvdpsxws f0, v3
+; CHECK-P8-NEXT:    xxmrglw vs1, v1, v0
+; CHECK-P8-NEXT:    vmrghh v6, v9, v6
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    vmrghh v6, v8, v6
-; CHECK-P8-NEXT:    vmrghh v7, v9, v7
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mtvsrd v2, r4
+; CHECK-P8-NEXT:    vmrghh v7, v10, v7
+; CHECK-P8-NEXT:    xxmrglw vs2, v7, v6
+; CHECK-P8-NEXT:    vmrghh v8, v8, v9
+; CHECK-P8-NEXT:    xxmrglw vs0, v5, v4
+; CHECK-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-P8-NEXT:    xxmrglw vs3, v2, v8
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT:    xxmrglw vs2, v1, v0
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    xxmrglw vs3, v7, v6
 ; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
+; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v3
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
 ; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
@@ -559,13 +559,13 @@ entry:
 define i32 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprwz r3, f0
@@ -610,25 +610,25 @@ entry:
 define i64 @test4elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f2, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f3, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mtvsrd v3, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f2
 ; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mffprwz r3, f3
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    vmrghh v2, v4, v2
-; CHECK-P8-NEXT:    vmrghh v3, v5, v3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    vmrghh v3, v4, v3
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -695,45 +695,45 @@ define <8 x i16> @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #2
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f5, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f6, f2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f7, f3
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    xxswapd vs2, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f6
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f7
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    xxswapd vs5, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    mffprwz r3, f4
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    vmrghh v2, v0, v2
-; CHECK-P8-NEXT:    vmrghh v3, v1, v3
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    xxswapd vs7, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    vmrghh v2, v5, v2
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    mffprwz r3, f7
 ; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    vmrghh v4, v0, v4
-; CHECK-P8-NEXT:    vmrghh v5, v1, v5
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghh v3, v5, v3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
+; CHECK-P8-NEXT:    vmrghh v4, v5, v4
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    vmrghh v5, v0, v5
 ; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
@@ -835,92 +835,92 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x i16>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    li r5, 80
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 48
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r6
 ; CHECK-P8-NEXT:    li r6, 64
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
-; CHECK-P8-NEXT:    lxvd2x vs5, r4, r6
-; CHECK-P8-NEXT:    li r7, 80
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs8, r4, r6
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs9, r4, r6
 ; CHECK-P8-NEXT:    li r6, 96
-; CHECK-P8-NEXT:    xscvdpsxws f4, f0
-; CHECK-P8-NEXT:    lxvd2x vs7, r4, r7
-; CHECK-P8-NEXT:    lxvd2x vs10, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs12, r4, r6
 ; CHECK-P8-NEXT:    li r6, 112
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f6, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f8, f2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f9, f3
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f11, f5
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f12, f7
-; CHECK-P8-NEXT:    xxswapd vs7, vs7
-; CHECK-P8-NEXT:    mffprwz r7, f4
-; CHECK-P8-NEXT:    lxvd2x vs4, r4, r6
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xscvdpsxws f13, f10
-; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f8
-; CHECK-P8-NEXT:    xscvdpsxws f6, f4
+; CHECK-P8-NEXT:    lxvd2x v2, r4, r6
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    mtvsrd v4, r4
-; CHECK-P8-NEXT:    mffprwz r4, f9
+; CHECK-P8-NEXT:    xxswapd vs7, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f6, f6
+; CHECK-P8-NEXT:    xxswapd vs1, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f11
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
+; CHECK-P8-NEXT:    xxswapd vs5, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xxswapd vs10, vs8
+; CHECK-P8-NEXT:    xscvdpsxws f8, f8
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    xxswapd vs11, vs9
+; CHECK-P8-NEXT:    xscvdpsxws f9, f9
+; CHECK-P8-NEXT:    mffprwz r4, f6
+; CHECK-P8-NEXT:    xscvdpsxws f11, f11
 ; CHECK-P8-NEXT:    mtvsrd v0, r4
-; CHECK-P8-NEXT:    mffprwz r4, f12
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mffprwz r4, f9
 ; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f13
-; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mffprwz r4, f8
+; CHECK-P8-NEXT:    xxswapd vs13, vs12
+; CHECK-P8-NEXT:    xscvdpsxws f13, f13
 ; CHECK-P8-NEXT:    mtvsrd v6, r4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xxswapd vs6, vs10
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mtvsrd v7, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs4
-; CHECK-P8-NEXT:    mtvsrd v2, r7
-; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r4, f2
-; CHECK-P8-NEXT:    xscvdpsxws f4, f6
-; CHECK-P8-NEXT:    vmrghh v2, v8, v2
-; CHECK-P8-NEXT:    mtvsrd v8, r4
+; CHECK-P8-NEXT:    xscvdpsxws f0, f12
+; CHECK-P8-NEXT:    mtvsrd v7, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    vmrghh v3, v9, v3
-; CHECK-P8-NEXT:    mtvsrd v9, r4
+; CHECK-P8-NEXT:    mtvsrd v8, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    xxswapd v3, v2
+; CHECK-P8-NEXT:    mtvsrd v9, r4
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mtvsrd v10, r4
+; CHECK-P8-NEXT:    mffprwz r4, f11
 ; CHECK-P8-NEXT:    vmrghh v4, v8, v4
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
-; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mffprwz r4, f10
 ; CHECK-P8-NEXT:    vmrghh v5, v9, v5
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r4, f4
-; CHECK-P8-NEXT:    vmrghh v0, v8, v0
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    vmrghh v0, v10, v0
+; CHECK-P8-NEXT:    mtvsrd v10, r4
+; CHECK-P8-NEXT:    mffprwz r4, f13
+; CHECK-P8-NEXT:    vmrghh v1, v8, v1
 ; CHECK-P8-NEXT:    mtvsrd v8, r4
 ; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
-; CHECK-P8-NEXT:    vmrghh v1, v9, v1
-; CHECK-P8-NEXT:    xxmrglw vs1, v5, v4
+; CHECK-P8-NEXT:    xscvdpsxws f0, v3
+; CHECK-P8-NEXT:    xxmrglw vs1, v1, v0
+; CHECK-P8-NEXT:    vmrghh v6, v9, v6
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    vmrghh v6, v8, v6
-; CHECK-P8-NEXT:    vmrghh v7, v9, v7
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mtvsrd v2, r4
+; CHECK-P8-NEXT:    vmrghh v7, v10, v7
+; CHECK-P8-NEXT:    xxmrglw vs2, v7, v6
+; CHECK-P8-NEXT:    vmrghh v8, v8, v9
+; CHECK-P8-NEXT:    xxmrglw vs0, v5, v4
+; CHECK-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-P8-NEXT:    xxmrglw vs3, v2, v8
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
-; CHECK-P8-NEXT:    xxmrglw vs2, v1, v0
-; CHECK-P8-NEXT:    xxswapd vs1, v2
-; CHECK-P8-NEXT:    xxmrglw vs3, v7, v6
 ; CHECK-P8-NEXT:    xxmrgld v3, vs3, vs2
+; CHECK-P8-NEXT:    xxswapd vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v3
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
 ; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
index d339a1218bbedba..1cd78ecd5cede4a 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -88,24 +88,24 @@ entry:
 define void @test8elt(ptr noalias nocapture sret(<8 x i32>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
-; CHECK-P8-NEXT:    xxmrghd vs4, vs0, vs1
-; CHECK-P8-NEXT:    xxmrgld vs0, vs0, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P8-NEXT:    xxmrgld vs2, vs3, vs2
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    xxmrghd vs4, vs1, vs2
+; CHECK-P8-NEXT:    xxmrgld vs1, vs1, vs2
+; CHECK-P8-NEXT:    xxmrghd vs2, vs3, vs0
+; CHECK-P8-NEXT:    xxmrgld vs0, vs3, vs0
 ; CHECK-P8-NEXT:    xvcvdpuxws v2, vs4
-; CHECK-P8-NEXT:    xvcvdpuxws v3, vs0
-; CHECK-P8-NEXT:    xvcvdpuxws v4, vs1
-; CHECK-P8-NEXT:    xvcvdpuxws v5, vs2
+; CHECK-P8-NEXT:    xvcvdpuxws v3, vs1
+; CHECK-P8-NEXT:    xvcvdpuxws v4, vs0
 ; CHECK-P8-NEXT:    vmrgew v2, v3, v2
-; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    xvcvdpuxws v3, vs2
 ; CHECK-P8-NEXT:    stxvd2x v2, r3, r5
+; CHECK-P8-NEXT:    vmrgew v3, v4, v3
 ; CHECK-P8-NEXT:    stxvd2x v3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -158,44 +158,44 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x i32>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    li r7, 64
-; CHECK-P8-NEXT:    li r8, 80
-; CHECK-P8-NEXT:    lxvd2x vs8, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
-; CHECK-P8-NEXT:    li r7, 96
-; CHECK-P8-NEXT:    li r8, 112
-; CHECK-P8-NEXT:    lxvd2x vs5, r4, r7
-; CHECK-P8-NEXT:    li r7, 16
-; CHECK-P8-NEXT:    lxvd2x vs6, r4, r8
-; CHECK-P8-NEXT:    xxmrghd vs4, vs0, vs1
-; CHECK-P8-NEXT:    xxmrgld vs0, vs0, vs1
-; CHECK-P8-NEXT:    lxvd2x vs7, r4, r7
-; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    li r7, 112
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r6
+; CHECK-P8-NEXT:    li r6, 64
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r7
+; CHECK-P8-NEXT:    li r7, 80
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r7
+; CHECK-P8-NEXT:    xxmrghd vs8, vs5, vs6
+; CHECK-P8-NEXT:    xxmrgld vs5, vs5, vs6
+; CHECK-P8-NEXT:    xxmrghd vs6, vs2, vs3
 ; CHECK-P8-NEXT:    xxmrgld vs2, vs2, vs3
-; CHECK-P8-NEXT:    xxmrghd vs3, vs5, vs6
-; CHECK-P8-NEXT:    xvcvdpuxws v3, vs0
-; CHECK-P8-NEXT:    xxmrgld vs0, vs5, vs6
-; CHECK-P8-NEXT:    xvcvdpuxws v4, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs8, vs7
+; CHECK-P8-NEXT:    xxmrghd vs3, vs0, vs1
+; CHECK-P8-NEXT:    xxmrgld vs0, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs7, vs4
+; CHECK-P8-NEXT:    xxmrgld vs4, vs7, vs4
+; CHECK-P8-NEXT:    xvcvdpuxws v2, vs8
+; CHECK-P8-NEXT:    xvcvdpuxws v3, vs5
+; CHECK-P8-NEXT:    xvcvdpuxws v4, vs6
 ; CHECK-P8-NEXT:    xvcvdpuxws v5, vs2
-; CHECK-P8-NEXT:    xxmrgld vs2, vs8, vs7
-; CHECK-P8-NEXT:    xvcvdpuxws v2, vs4
 ; CHECK-P8-NEXT:    xvcvdpuxws v0, vs3
-; CHECK-P8-NEXT:    xvcvdpuxws v1, vs0
-; CHECK-P8-NEXT:    xvcvdpuxws v6, vs1
-; CHECK-P8-NEXT:    xvcvdpuxws v7, vs2
 ; CHECK-P8-NEXT:    vmrgew v2, v3, v2
-; CHECK-P8-NEXT:    vmrgew v3, v5, v4
-; CHECK-P8-NEXT:    vmrgew v4, v1, v0
-; CHECK-P8-NEXT:    vmrgew v5, v7, v6
+; CHECK-P8-NEXT:    xvcvdpuxws v3, vs0
+; CHECK-P8-NEXT:    vmrgew v4, v5, v4
+; CHECK-P8-NEXT:    xvcvdpuxws v5, vs1
+; CHECK-P8-NEXT:    vmrgew v3, v3, v0
+; CHECK-P8-NEXT:    xvcvdpuxws v0, vs4
+; CHECK-P8-NEXT:    stxvd2x v3, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x v4, r3, r6
-; CHECK-P8-NEXT:    stxvd2x v3, r3, r5
-; CHECK-P8-NEXT:    stxvd2x v2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x v2, r3, r5
+; CHECK-P8-NEXT:    vmrgew v5, v0, v5
 ; CHECK-P8-NEXT:    stxvd2x v5, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -356,24 +356,24 @@ entry:
 define void @test8elt_signed(ptr noalias nocapture sret(<8 x i32>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
-; CHECK-P8-NEXT:    xxmrghd vs4, vs0, vs1
-; CHECK-P8-NEXT:    xxmrgld vs0, vs0, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs3, vs2
-; CHECK-P8-NEXT:    xxmrgld vs2, vs3, vs2
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    xxmrghd vs4, vs1, vs2
+; CHECK-P8-NEXT:    xxmrgld vs1, vs1, vs2
+; CHECK-P8-NEXT:    xxmrghd vs2, vs3, vs0
+; CHECK-P8-NEXT:    xxmrgld vs0, vs3, vs0
 ; CHECK-P8-NEXT:    xvcvdpsxws v2, vs4
-; CHECK-P8-NEXT:    xvcvdpsxws v3, vs0
-; CHECK-P8-NEXT:    xvcvdpsxws v4, vs1
-; CHECK-P8-NEXT:    xvcvdpsxws v5, vs2
+; CHECK-P8-NEXT:    xvcvdpsxws v3, vs1
+; CHECK-P8-NEXT:    xvcvdpsxws v4, vs0
 ; CHECK-P8-NEXT:    vmrgew v2, v3, v2
-; CHECK-P8-NEXT:    vmrgew v3, v5, v4
+; CHECK-P8-NEXT:    xvcvdpsxws v3, vs2
 ; CHECK-P8-NEXT:    stxvd2x v2, r3, r5
+; CHECK-P8-NEXT:    vmrgew v3, v4, v3
 ; CHECK-P8-NEXT:    stxvd2x v3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -426,44 +426,44 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x i32>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    li r7, 64
-; CHECK-P8-NEXT:    li r8, 80
-; CHECK-P8-NEXT:    lxvd2x vs8, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r8
-; CHECK-P8-NEXT:    li r7, 96
-; CHECK-P8-NEXT:    li r8, 112
-; CHECK-P8-NEXT:    lxvd2x vs5, r4, r7
-; CHECK-P8-NEXT:    li r7, 16
-; CHECK-P8-NEXT:    lxvd2x vs6, r4, r8
-; CHECK-P8-NEXT:    xxmrghd vs4, vs0, vs1
-; CHECK-P8-NEXT:    xxmrgld vs0, vs0, vs1
-; CHECK-P8-NEXT:    lxvd2x vs7, r4, r7
-; CHECK-P8-NEXT:    xxmrghd vs1, vs2, vs3
+; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    li r7, 112
+; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r6
+; CHECK-P8-NEXT:    li r6, 64
+; CHECK-P8-NEXT:    lxvd2x vs1, r4, r7
+; CHECK-P8-NEXT:    li r7, 80
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r5
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
+; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r6
+; CHECK-P8-NEXT:    lxvd2x vs6, r4, r7
+; CHECK-P8-NEXT:    xxmrghd vs8, vs5, vs6
+; CHECK-P8-NEXT:    xxmrgld vs5, vs5, vs6
+; CHECK-P8-NEXT:    xxmrghd vs6, vs2, vs3
 ; CHECK-P8-NEXT:    xxmrgld vs2, vs2, vs3
-; CHECK-P8-NEXT:    xxmrghd vs3, vs5, vs6
-; CHECK-P8-NEXT:    xvcvdpsxws v3, vs0
-; CHECK-P8-NEXT:    xxmrgld vs0, vs5, vs6
-; CHECK-P8-NEXT:    xvcvdpsxws v4, vs1
-; CHECK-P8-NEXT:    xxmrghd vs1, vs8, vs7
+; CHECK-P8-NEXT:    xxmrghd vs3, vs0, vs1
+; CHECK-P8-NEXT:    xxmrgld vs0, vs0, vs1
+; CHECK-P8-NEXT:    xxmrghd vs1, vs7, vs4
+; CHECK-P8-NEXT:    xxmrgld vs4, vs7, vs4
+; CHECK-P8-NEXT:    xvcvdpsxws v2, vs8
+; CHECK-P8-NEXT:    xvcvdpsxws v3, vs5
+; CHECK-P8-NEXT:    xvcvdpsxws v4, vs6
 ; CHECK-P8-NEXT:    xvcvdpsxws v5, vs2
-; CHECK-P8-NEXT:    xxmrgld vs2, vs8, vs7
-; CHECK-P8-NEXT:    xvcvdpsxws v2, vs4
 ; CHECK-P8-NEXT:    xvcvdpsxws v0, vs3
-; CHECK-P8-NEXT:    xvcvdpsxws v1, vs0
-; CHECK-P8-NEXT:    xvcvdpsxws v6, vs1
-; CHECK-P8-NEXT:    xvcvdpsxws v7, vs2
 ; CHECK-P8-NEXT:    vmrgew v2, v3, v2
-; CHECK-P8-NEXT:    vmrgew v3, v5, v4
-; CHECK-P8-NEXT:    vmrgew v4, v1, v0
-; CHECK-P8-NEXT:    vmrgew v5, v7, v6
+; CHECK-P8-NEXT:    xvcvdpsxws v3, vs0
+; CHECK-P8-NEXT:    vmrgew v4, v5, v4
+; CHECK-P8-NEXT:    xvcvdpsxws v5, vs1
+; CHECK-P8-NEXT:    vmrgew v3, v3, v0
+; CHECK-P8-NEXT:    xvcvdpsxws v0, vs4
+; CHECK-P8-NEXT:    stxvd2x v3, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x v4, r3, r6
-; CHECK-P8-NEXT:    stxvd2x v3, r3, r5
-; CHECK-P8-NEXT:    stxvd2x v2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x v2, r3, r5
+; CHECK-P8-NEXT:    vmrgew v5, v0, v5
 ; CHECK-P8-NEXT:    stxvd2x v5, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
index 56b47c0634f68a5..dd5cb59a48bf026 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
@@ -12,13 +12,13 @@
 define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    vmrghb v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -70,25 +70,25 @@ entry:
 define i32 @test4elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f2, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f3, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mtvsrd v3, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f2
 ; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mffprwz r3, f3
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    vmrghb v2, v4, v2
-; CHECK-P8-NEXT:    vmrghb v3, v5, v3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghb v2, v3, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    vmrghb v3, v4, v3
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprwz r3, f0
@@ -157,45 +157,45 @@ define i64 @test8elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f5, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f6, f2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f7, f3
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    xxswapd vs2, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f6
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f7
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    xxswapd vs5, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    mffprwz r3, f4
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    vmrghb v2, v0, v2
-; CHECK-P8-NEXT:    vmrghb v3, v1, v3
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    xxswapd vs7, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    vmrghb v2, v5, v2
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    mffprwz r3, f7
 ; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    vmrghb v4, v0, v4
-; CHECK-P8-NEXT:    vmrghb v5, v1, v5
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghb v3, v5, v3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrghb v4, v5, v4
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    vmrghb v5, v0, v5
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
@@ -302,91 +302,91 @@ entry:
 define <16 x i8> @test16elt(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
 ; CHECK-P8-NEXT:    li r4, 64
-; CHECK-P8-NEXT:    xscvdpsxws f4, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    lxvd2x vs5, r3, r4
-; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xscvdpsxws f6, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    lxvd2x vs7, r3, r4
-; CHECK-P8-NEXT:    li r4, 96
-; CHECK-P8-NEXT:    xscvdpsxws f8, f2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    lxvd2x vs9, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    lxvd2x vs12, r3, r4
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xscvdpsxws f10, f3
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    lxvd2x vs11, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f12, f5
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f13, f7
-; CHECK-P8-NEXT:    xxswapd vs7, vs7
-; CHECK-P8-NEXT:    xscvdpsxws v2, f9
-; CHECK-P8-NEXT:    xxswapd vs9, vs9
-; CHECK-P8-NEXT:    xscvdpsxws v3, f11
-; CHECK-P8-NEXT:    xxswapd vs11, vs11
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    lxvd2x v2, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs2, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f8
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f10
+; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    xxswapd vs10, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    xxswapd vs7, vs3
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mffprwz r3, f12
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f13
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, v2
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, v3
+; CHECK-P8-NEXT:    xxswapd vs5, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xxswapd vs8, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f6, f6
+; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    xscvdpsxws f8, f8
+; CHECK-P8-NEXT:    xscvdpsxws f0, f12
+; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    xxswapd vs11, vs9
 ; CHECK-P8-NEXT:    xscvdpsxws f9, f9
+; CHECK-P8-NEXT:    mffprwz r3, f9
+; CHECK-P8-NEXT:    mtvsrd v6, r3
+; CHECK-P8-NEXT:    mffprwz r3, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    mffprwz r3, f8
+; CHECK-P8-NEXT:    xxswapd vs13, vs12
+; CHECK-P8-NEXT:    xscvdpsxws f13, f13
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    xxswapd v3, v2
 ; CHECK-P8-NEXT:    vmrghb v4, v8, v4
-; CHECK-P8-NEXT:    vmrghb v5, v9, v5
 ; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    mffprwz r3, f11
+; CHECK-P8-NEXT:    vmrghb v5, v9, v5
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r3, f5
-; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mffprwz r4, f10
 ; CHECK-P8-NEXT:    vmrghb v0, v8, v0
-; CHECK-P8-NEXT:    vmrghb v1, v9, v1
 ; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    mffprwz r3, f13
+; CHECK-P8-NEXT:    vmrghb v1, v9, v1
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r3, f9
-; CHECK-P8-NEXT:    mffprwz r4, f11
 ; CHECK-P8-NEXT:    vmrghb v6, v8, v6
-; CHECK-P8-NEXT:    vmrghb v2, v9, v2
 ; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    vmrghb v3, v8, v3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, v3
 ; CHECK-P8-NEXT:    vmrghb v7, v9, v7
-; CHECK-P8-NEXT:    vmrglh v4, v5, v4
-; CHECK-P8-NEXT:    vmrglh v5, v1, v0
-; CHECK-P8-NEXT:    vmrglh v2, v2, v6
-; CHECK-P8-NEXT:    vmrglh v3, v7, v3
-; CHECK-P8-NEXT:    xxmrglw vs0, v5, v4
-; CHECK-P8-NEXT:    xxmrglw vs1, v3, v2
+; CHECK-P8-NEXT:    mtvsrd v9, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    vmrghb v8, v8, v9
+; CHECK-P8-NEXT:    vmrghb v2, v3, v2
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    vmrglh v4, v1, v0
+; CHECK-P8-NEXT:    vmrglh v5, v7, v6
+; CHECK-P8-NEXT:    vmrglh v2, v2, v8
+; CHECK-P8-NEXT:    xxmrglw vs0, v4, v3
+; CHECK-P8-NEXT:    xxmrglw vs1, v2, v5
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;
@@ -567,13 +567,13 @@ entry:
 define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, v2
+; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    vmrghb v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprd r3, f0
@@ -625,25 +625,25 @@ entry:
 define i32 @test4elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f2, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f3, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, vs0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mtvsrd v3, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f2
 ; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs2
+; CHECK-P8-NEXT:    xscvdpsxws f3, f3
+; CHECK-P8-NEXT:    mffprwz r3, f3
 ; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    vmrghb v2, v4, v2
-; CHECK-P8-NEXT:    vmrghb v3, v5, v3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghb v2, v3, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    vmrghb v3, v4, v3
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    mffprwz r3, f0
@@ -712,45 +712,45 @@ define i64 @test8elt_signed(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f4, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xscvdpsxws f5, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xscvdpsxws f6, f2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xscvdpsxws f7, f3
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    xxswapd vs2, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    xscvdpsxws f0, f6
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mffprwz r4, f7
-; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    xxswapd vs5, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    mffprwz r3, f4
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f3
-; CHECK-P8-NEXT:    vmrghb v2, v0, v2
-; CHECK-P8-NEXT:    vmrghb v3, v1, v3
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    xxswapd vs7, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f7, f7
+; CHECK-P8-NEXT:    vmrghb v2, v5, v2
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    mffprwz r3, f7
 ; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    vmrghb v4, v0, v4
-; CHECK-P8-NEXT:    vmrghb v5, v1, v5
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    vmrghb v3, v5, v3
+; CHECK-P8-NEXT:    mtvsrd v5, r4
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-P8-NEXT:    vmrghb v4, v5, v4
+; CHECK-P8-NEXT:    mtvsrd v5, r3
+; CHECK-P8-NEXT:    vmrghb v5, v0, v5
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
 ; CHECK-P8-NEXT:    xxmrglw vs0, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
@@ -857,91 +857,91 @@ entry:
 define <16 x i8> @test16elt_signed(ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    li r4, 32
-; CHECK-P8-NEXT:    lxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    lxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    lxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    lxvd2x vs6, r3, r4
 ; CHECK-P8-NEXT:    li r4, 64
-; CHECK-P8-NEXT:    xscvdpsxws f4, f0
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    lxvd2x vs5, r3, r4
-; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xscvdpsxws f6, f1
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    lxvd2x vs7, r3, r4
-; CHECK-P8-NEXT:    li r4, 96
-; CHECK-P8-NEXT:    xscvdpsxws f8, f2
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    lxvd2x vs9, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    lxvd2x vs12, r3, r4
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xscvdpsxws f10, f3
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    lxvd2x vs11, r3, r4
-; CHECK-P8-NEXT:    xscvdpsxws f12, f5
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    xscvdpsxws f13, f7
-; CHECK-P8-NEXT:    xxswapd vs7, vs7
-; CHECK-P8-NEXT:    xscvdpsxws v2, f9
-; CHECK-P8-NEXT:    xxswapd vs9, vs9
-; CHECK-P8-NEXT:    xscvdpsxws v3, f11
-; CHECK-P8-NEXT:    xxswapd vs11, vs11
-; CHECK-P8-NEXT:    mffprwz r3, f4
-; CHECK-P8-NEXT:    mffprwz r4, f6
-; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    lxvd2x v2, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs2, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mtvsrd v4, r3
-; CHECK-P8-NEXT:    mffprwz r3, f8
-; CHECK-P8-NEXT:    mtvsrd v5, r4
-; CHECK-P8-NEXT:    mffprwz r4, f10
+; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
+; CHECK-P8-NEXT:    mtvsrd v4, r3
+; CHECK-P8-NEXT:    xxswapd vs10, vs0
+; CHECK-P8-NEXT:    xscvdpsxws f0, f0
+; CHECK-P8-NEXT:    xscvdpsxws f10, f10
+; CHECK-P8-NEXT:    xxswapd vs7, vs3
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mtvsrd v0, r3
-; CHECK-P8-NEXT:    mffprwz r3, f12
-; CHECK-P8-NEXT:    mtvsrd v1, r4
-; CHECK-P8-NEXT:    mffprwz r4, f13
-; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    mtvsrd v6, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, v2
-; CHECK-P8-NEXT:    mtvsrd v2, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, v3
+; CHECK-P8-NEXT:    xxswapd vs5, vs4
+; CHECK-P8-NEXT:    xscvdpsxws f4, f4
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    xscvdpsxws f5, f5
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtvsrd v1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    xxswapd vs8, vs6
+; CHECK-P8-NEXT:    xscvdpsxws f6, f6
+; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    mtvsrd v0, r3
+; CHECK-P8-NEXT:    xscvdpsxws f8, f8
+; CHECK-P8-NEXT:    xscvdpsxws f0, f12
+; CHECK-P8-NEXT:    mtvsrd v7, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    xxswapd vs11, vs9
 ; CHECK-P8-NEXT:    xscvdpsxws f9, f9
+; CHECK-P8-NEXT:    mffprwz r3, f9
+; CHECK-P8-NEXT:    mtvsrd v6, r3
+; CHECK-P8-NEXT:    mffprwz r3, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    mtvsrd v7, r4
-; CHECK-P8-NEXT:    mffprwz r3, f0
-; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    mffprwz r3, f8
+; CHECK-P8-NEXT:    xxswapd vs13, vs12
+; CHECK-P8-NEXT:    xscvdpsxws f13, f13
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r3, f2
-; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    xxswapd v3, v2
 ; CHECK-P8-NEXT:    vmrghb v4, v8, v4
-; CHECK-P8-NEXT:    vmrghb v5, v9, v5
 ; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    mffprwz r3, f11
+; CHECK-P8-NEXT:    vmrghb v5, v9, v5
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r3, f5
-; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mffprwz r4, f10
 ; CHECK-P8-NEXT:    vmrghb v0, v8, v0
-; CHECK-P8-NEXT:    vmrghb v1, v9, v1
 ; CHECK-P8-NEXT:    mtvsrd v8, r3
+; CHECK-P8-NEXT:    mffprwz r3, f13
+; CHECK-P8-NEXT:    vmrghb v1, v9, v1
 ; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    mffprwz r3, f9
-; CHECK-P8-NEXT:    mffprwz r4, f11
 ; CHECK-P8-NEXT:    vmrghb v6, v8, v6
-; CHECK-P8-NEXT:    vmrghb v2, v9, v2
 ; CHECK-P8-NEXT:    mtvsrd v8, r3
-; CHECK-P8-NEXT:    mtvsrd v9, r4
-; CHECK-P8-NEXT:    vmrghb v3, v8, v3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, v3
 ; CHECK-P8-NEXT:    vmrghb v7, v9, v7
-; CHECK-P8-NEXT:    vmrglh v4, v5, v4
-; CHECK-P8-NEXT:    vmrglh v5, v1, v0
-; CHECK-P8-NEXT:    vmrglh v2, v2, v6
-; CHECK-P8-NEXT:    vmrglh v3, v7, v3
-; CHECK-P8-NEXT:    xxmrglw vs0, v5, v4
-; CHECK-P8-NEXT:    xxmrglw vs1, v3, v2
+; CHECK-P8-NEXT:    mtvsrd v9, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    xscvdpsxws f0, v2
+; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    vmrghb v8, v8, v9
+; CHECK-P8-NEXT:    vmrghb v2, v3, v2
+; CHECK-P8-NEXT:    vmrglh v3, v5, v4
+; CHECK-P8-NEXT:    vmrglh v4, v1, v0
+; CHECK-P8-NEXT:    vmrglh v5, v7, v6
+; CHECK-P8-NEXT:    vmrglh v2, v2, v8
+; CHECK-P8-NEXT:    xxmrglw vs0, v4, v3
+; CHECK-P8-NEXT:    xxmrglw vs1, v2, v5
 ; CHECK-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
index 30fb41fa77a71f5..5b360f099e34dec 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
@@ -101,21 +101,21 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x i32>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r7, 16
 ; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    xvcvspuxws vs3, vs3
 ; CHECK-P8-NEXT:    xvcvspuxws vs0, vs0
-; CHECK-P8-NEXT:    xvcvspuxws vs1, vs1
 ; CHECK-P8-NEXT:    xvcvspuxws vs2, vs2
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    xvcvspuxws vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    xvcvspuxws vs0, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -248,21 +248,21 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x i32>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r7, 16
 ; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    xvcvspsxws vs3, vs3
 ; CHECK-P8-NEXT:    xvcvspsxws vs0, vs0
-; CHECK-P8-NEXT:    xvcvspsxws vs1, vs1
 ; CHECK-P8-NEXT:    xvcvspsxws vs2, vs2
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    xvcvspsxws vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    xvcvspsxws vs0, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
index 9465689509c00e2..b1e4cc1766cfb06 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll
@@ -70,21 +70,21 @@ entry:
 define void @test8elt(ptr noalias nocapture sret(<8 x i64>) %agg.result, ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r7, 16
 ; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    xvcvdpuxds vs3, vs3
 ; CHECK-P8-NEXT:    xvcvdpuxds vs0, vs0
-; CHECK-P8-NEXT:    xvcvdpuxds vs1, vs1
 ; CHECK-P8-NEXT:    xvcvdpuxds vs2, vs2
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    xvcvdpuxds vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    xvcvdpuxds vs0, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -128,13 +128,14 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x i64>) %agg.result, ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 64
-; CHECK-P8-NEXT:    li r8, 96
-; CHECK-P8-NEXT:    li r9, 112
-; CHECK-P8-NEXT:    li r10, 80
-; CHECK-P8-NEXT:    li r11, 48
+; CHECK-P8-NEXT:    li r5, 112
+; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    li r7, 80
+; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    li r9, 48
+; CHECK-P8-NEXT:    li r10, 32
+; CHECK-P8-NEXT:    li r11, 16
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
@@ -142,23 +143,22 @@ define void @test16elt(ptr noalias nocapture sret(<16 x i64>) %agg.result, ptr n
 ; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
 ; CHECK-P8-NEXT:    lxvd2x vs5, r4, r10
 ; CHECK-P8-NEXT:    lxvd2x vs6, r4, r11
-; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
 ; CHECK-P8-NEXT:    xvcvdpuxds vs0, vs0
-; CHECK-P8-NEXT:    xvcvdpuxds vs1, vs1
-; CHECK-P8-NEXT:    xvcvdpuxds vs2, vs2
-; CHECK-P8-NEXT:    xvcvdpuxds vs3, vs3
-; CHECK-P8-NEXT:    xvcvdpuxds vs4, vs4
-; CHECK-P8-NEXT:    xvcvdpuxds vs5, vs5
 ; CHECK-P8-NEXT:    xvcvdpuxds vs6, vs6
-; CHECK-P8-NEXT:    xvcvdpuxds vs7, vs7
-; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
+; CHECK-P8-NEXT:    xvcvdpuxds vs5, vs5
+; CHECK-P8-NEXT:    xvcvdpuxds vs4, vs4
+; CHECK-P8-NEXT:    xvcvdpuxds vs3, vs3
+; CHECK-P8-NEXT:    xvcvdpuxds vs2, vs2
+; CHECK-P8-NEXT:    xvcvdpuxds vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    xvcvdpuxds vs0, vs7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
 ; CHECK-P8-NEXT:    stxvd2x vs5, r3, r10
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs6, r3, r11
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs7, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -284,21 +284,21 @@ entry:
 define void @test8elt_signed(ptr noalias nocapture sret(<8 x i64>) %agg.result, ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r7, 16
 ; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    xvcvdpsxds vs3, vs3
 ; CHECK-P8-NEXT:    xvcvdpsxds vs0, vs0
-; CHECK-P8-NEXT:    xvcvdpsxds vs1, vs1
 ; CHECK-P8-NEXT:    xvcvdpsxds vs2, vs2
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    xvcvdpsxds vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    xvcvdpsxds vs0, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -342,13 +342,14 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x i64>) %agg.result, ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 64
-; CHECK-P8-NEXT:    li r8, 96
-; CHECK-P8-NEXT:    li r9, 112
-; CHECK-P8-NEXT:    li r10, 80
-; CHECK-P8-NEXT:    li r11, 48
+; CHECK-P8-NEXT:    li r5, 112
+; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    li r7, 80
+; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    li r9, 48
+; CHECK-P8-NEXT:    li r10, 32
+; CHECK-P8-NEXT:    li r11, 16
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
@@ -356,23 +357,22 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x i64>) %agg.result
 ; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
 ; CHECK-P8-NEXT:    lxvd2x vs5, r4, r10
 ; CHECK-P8-NEXT:    lxvd2x vs6, r4, r11
-; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
 ; CHECK-P8-NEXT:    xvcvdpsxds vs0, vs0
-; CHECK-P8-NEXT:    xvcvdpsxds vs1, vs1
-; CHECK-P8-NEXT:    xvcvdpsxds vs2, vs2
-; CHECK-P8-NEXT:    xvcvdpsxds vs3, vs3
-; CHECK-P8-NEXT:    xvcvdpsxds vs4, vs4
-; CHECK-P8-NEXT:    xvcvdpsxds vs5, vs5
 ; CHECK-P8-NEXT:    xvcvdpsxds vs6, vs6
-; CHECK-P8-NEXT:    xvcvdpsxds vs7, vs7
-; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
+; CHECK-P8-NEXT:    xvcvdpsxds vs5, vs5
+; CHECK-P8-NEXT:    xvcvdpsxds vs4, vs4
+; CHECK-P8-NEXT:    xvcvdpsxds vs3, vs3
+; CHECK-P8-NEXT:    xvcvdpsxds vs2, vs2
+; CHECK-P8-NEXT:    xvcvdpsxds vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    xvcvdpsxds vs0, vs7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
 ; CHECK-P8-NEXT:    stxvd2x vs5, r3, r10
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs6, r3, r11
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs7, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index 7a689f43dfa964a..f52a92596dec82d 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -64,9 +64,9 @@ entry:
 define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    xxlxor v2, v2, v2
-; CHECK-P8-NEXT:    mtvsrd v3, r3
-; CHECK-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-P8-NEXT:    mtvsrd v2, r3
+; CHECK-P8-NEXT:    xxlxor v3, v3, v3
+; CHECK-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
@@ -103,8 +103,8 @@ define void @test8elt(ptr noalias nocapture sret(<8 x float>) %agg.result, <8 x
 ; CHECK-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-P8-NEXT:    xvcvuxwsp vs0, v4
 ; CHECK-P8-NEXT:    xvcvuxwsp vs1, v2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
@@ -139,37 +139,37 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-P8-NEXT:    li r6, 16
+; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
-; CHECK-P8-NEXT:    addi r4, r5, .LCPI3_1 at toc@l
-; CHECK-P8-NEXT:    xxswapd v2, vs1
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
-; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xxlxor v0, v0, v0
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xxswapd v0, vs3
-; CHECK-P8-NEXT:    vperm v1, v4, v2, v3
-; CHECK-P8-NEXT:    vperm v3, v4, v5, v3
-; CHECK-P8-NEXT:    vperm v5, v4, v5, v0
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v0
-; CHECK-P8-NEXT:    xvcvuxwsp vs0, v1
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    vperm v1, v0, v2, v3
+; CHECK-P8-NEXT:    vperm v3, v0, v4, v3
+; CHECK-P8-NEXT:    xvcvuxwsp vs2, v1
 ; CHECK-P8-NEXT:    xvcvuxwsp vs1, v3
-; CHECK-P8-NEXT:    xvcvuxwsp vs2, v5
+; CHECK-P8-NEXT:    xxswapd v5, vs0
+; CHECK-P8-NEXT:    vperm v4, v0, v4, v5
+; CHECK-P8-NEXT:    vperm v2, v0, v2, v5
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, v4
 ; CHECK-P8-NEXT:    xvcvuxwsp vs3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r6
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -285,12 +285,12 @@ entry:
 define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    vspltisw v3, 8
-; CHECK-P8-NEXT:    vmrghh v2, v2, v2
-; CHECK-P8-NEXT:    vadduwm v3, v3, v3
-; CHECK-P8-NEXT:    vslw v2, v2, v3
-; CHECK-P8-NEXT:    vsraw v2, v2, v3
+; CHECK-P8-NEXT:    mtvsrd v3, r3
+; CHECK-P8-NEXT:    vspltisw v2, 8
+; CHECK-P8-NEXT:    vadduwm v2, v2, v2
+; CHECK-P8-NEXT:    vmrghh v3, v3, v3
+; CHECK-P8-NEXT:    vslw v3, v3, v2
+; CHECK-P8-NEXT:    vsraw v2, v3, v2
 ; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
@@ -318,19 +318,19 @@ entry:
 define void @test8elt_signed(ptr noalias nocapture sret(<8 x float>) %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vmrglh v4, v2, v2
 ; CHECK-P8-NEXT:    vspltisw v3, 8
+; CHECK-P8-NEXT:    vmrglh v4, v2, v2
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    vmrghh v2, v2, v2
 ; CHECK-P8-NEXT:    vadduwm v3, v3, v3
+; CHECK-P8-NEXT:    vmrghh v2, v2, v2
 ; CHECK-P8-NEXT:    vslw v4, v4, v3
 ; CHECK-P8-NEXT:    vslw v2, v2, v3
 ; CHECK-P8-NEXT:    vsraw v4, v4, v3
 ; CHECK-P8-NEXT:    vsraw v2, v2, v3
 ; CHECK-P8-NEXT:    xvcvsxwsp vs0, v4
 ; CHECK-P8-NEXT:    xvcvsxwsp vs1, v2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
@@ -368,37 +368,37 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.resu
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    vspltisw v2, 8
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    vadduwm v2, v2, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    vspltisw v5, 8
-; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r5
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    vadduwm v5, v5, v5
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    vmrglh v4, v2, v2
-; CHECK-P8-NEXT:    vmrglh v0, v3, v3
-; CHECK-P8-NEXT:    vmrghh v3, v3, v3
-; CHECK-P8-NEXT:    vmrghh v2, v2, v2
-; CHECK-P8-NEXT:    vslw v4, v4, v5
-; CHECK-P8-NEXT:    vslw v0, v0, v5
-; CHECK-P8-NEXT:    vslw v3, v3, v5
-; CHECK-P8-NEXT:    vslw v2, v2, v5
-; CHECK-P8-NEXT:    vsraw v4, v4, v5
-; CHECK-P8-NEXT:    vsraw v0, v0, v5
-; CHECK-P8-NEXT:    vsraw v3, v3, v5
-; CHECK-P8-NEXT:    vsraw v2, v2, v5
-; CHECK-P8-NEXT:    xvcvsxwsp vs0, v4
-; CHECK-P8-NEXT:    xvcvsxwsp vs1, v0
-; CHECK-P8-NEXT:    xvcvsxwsp vs2, v3
-; CHECK-P8-NEXT:    xvcvsxwsp vs3, v2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    vmrghh v5, v3, v3
+; CHECK-P8-NEXT:    vmrglh v3, v3, v3
+; CHECK-P8-NEXT:    vslw v3, v3, v2
+; CHECK-P8-NEXT:    vslw v5, v5, v2
+; CHECK-P8-NEXT:    vsraw v3, v3, v2
+; CHECK-P8-NEXT:    xvcvsxwsp vs3, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    vmrglh v0, v4, v4
+; CHECK-P8-NEXT:    vmrghh v4, v4, v4
+; CHECK-P8-NEXT:    vslw v0, v0, v2
+; CHECK-P8-NEXT:    vslw v4, v4, v2
+; CHECK-P8-NEXT:    vsraw v0, v0, v2
+; CHECK-P8-NEXT:    vsraw v4, v4, v2
+; CHECK-P8-NEXT:    vsraw v2, v5, v2
+; CHECK-P8-NEXT:    xvcvsxwsp vs2, v2
+; CHECK-P8-NEXT:    xvcvsxwsp vs0, v0
+; CHECK-P8-NEXT:    xvcvsxwsp vs1, v4
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index 0cc5248af634a44..bfb8b72327f5a6f 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -13,12 +13,12 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; CHECK-P8-NEXT:    mtvsrwz v2, r3
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
+; CHECK-P8-NEXT:    mtvsrwz v3, r3
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-P8-NEXT:    xvcvuxddp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
@@ -53,20 +53,20 @@ define void @test4elt(ptr noalias nocapture sret(<4 x double>) %agg.result, i64
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
-; CHECK-P8-NEXT:    xxlxor v2, v2, v2
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
 ; CHECK-P8-NEXT:    mtvsrd v4, r4
+; CHECK-P8-NEXT:    xxlxor v5, v5, v5
 ; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI1_1 at toc@l
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_1 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xxswapd v5, vs1
-; CHECK-P8-NEXT:    vperm v3, v2, v4, v3
-; CHECK-P8-NEXT:    vperm v2, v2, v4, v5
-; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
-; CHECK-P8-NEXT:    xvcvuxddp vs1, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vperm v2, v5, v4, v2
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v3, v5, v4, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -119,40 +119,40 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, <8 x
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_2 at toc@ha
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    xxlxor v1, v1, v1
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_2 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_3 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_3 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_2 at toc@l
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    vperm v4, v1, v2, v4
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v4
 ; CHECK-P8-NEXT:    xxswapd v5, vs1
-; CHECK-P8-NEXT:    xxswapd v0, vs2
-; CHECK-P8-NEXT:    xxswapd v1, vs3
-; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
-; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
-; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v1
-; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
+; CHECK-P8-NEXT:    vperm v5, v1, v2, v5
 ; CHECK-P8-NEXT:    xvcvuxddp vs1, v5
-; CHECK-P8-NEXT:    xvcvuxddp vs2, v0
-; CHECK-P8-NEXT:    xvcvuxddp vs3, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    vperm v3, v1, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    xvcvuxddp vs2, v3
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    vperm v2, v1, v2, v0
+; CHECK-P8-NEXT:    xvcvuxddp vs3, v2
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -222,66 +222,66 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_2 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI3_2 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r6
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_1 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r5
-; CHECK-P8-NEXT:    addi r4, r6, .LCPI3_1 at toc@l
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_3 at toc@ha
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxlxor v5, v5, v5
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
+; CHECK-P8-NEXT:    xxswapd v6, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    lxvd2x vs4, 0, r4
-; CHECK-P8-NEXT:    addi r4, r6, .LCPI3_3 at toc@l
-; CHECK-P8-NEXT:    li r6, 96
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    vperm v9, v5, v2, v0
+; CHECK-P8-NEXT:    xvcvuxddp vs6, v9
+; CHECK-P8-NEXT:    vperm v0, v5, v6, v0
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v0
+; CHECK-P8-NEXT:    xxswapd v3, vs3
+; CHECK-P8-NEXT:    vperm v1, v5, v2, v3
+; CHECK-P8-NEXT:    vperm v3, v5, v6, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs4, v1
+; CHECK-P8-NEXT:    xvcvuxddp vs3, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xxswapd v0, vs3
-; CHECK-P8-NEXT:    xxswapd v1, vs4
-; CHECK-P8-NEXT:    xxswapd v8, vs0
-; CHECK-P8-NEXT:    vperm v6, v4, v2, v3
-; CHECK-P8-NEXT:    vperm v7, v4, v2, v5
-; CHECK-P8-NEXT:    vperm v3, v4, v0, v3
-; CHECK-P8-NEXT:    vperm v9, v4, v0, v1
-; CHECK-P8-NEXT:    vperm v5, v4, v0, v5
-; CHECK-P8-NEXT:    vperm v0, v4, v0, v8
-; CHECK-P8-NEXT:    vperm v1, v4, v2, v1
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v8
-; CHECK-P8-NEXT:    xvcvuxddp vs1, v3
-; CHECK-P8-NEXT:    xvcvuxddp vs4, v9
-; CHECK-P8-NEXT:    xvcvuxddp vs2, v5
-; CHECK-P8-NEXT:    xvcvuxddp vs3, v0
-; CHECK-P8-NEXT:    xvcvuxddp vs0, v7
-; CHECK-P8-NEXT:    xvcvuxddp vs5, v2
-; CHECK-P8-NEXT:    xvcvuxddp vs6, v6
+; CHECK-P8-NEXT:    vperm v8, v5, v2, v4
+; CHECK-P8-NEXT:    vperm v4, v5, v6, v4
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xvcvuxddp vs7, v1
+; CHECK-P8-NEXT:    xvcvuxddp vs7, v8
+; CHECK-P8-NEXT:    xvcvuxddp vs2, v4
+; CHECK-P8-NEXT:    xxswapd v7, vs0
 ; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    vperm v2, v5, v2, v7
+; CHECK-P8-NEXT:    vperm v7, v5, v6, v7
+; CHECK-P8-NEXT:    xvcvuxddp vs5, v2
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v7
+; CHECK-P8-NEXT:    xxswapd vs4, vs6
+; CHECK-P8-NEXT:    xxswapd vs7, vs7
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    stxvd2x vs7, r3, r4
 ; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    li r6, 64
-; CHECK-P8-NEXT:    xxswapd vs2, vs7
-; CHECK-P8-NEXT:    xxswapd vs3, vs6
 ; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
-; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r4, 64
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -424,26 +424,26 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI5_2 at toc@ha
-; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mtvsrd v4, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI5_2 at toc@l
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_2 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_2 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
+; CHECK-P8-NEXT:    vperm v2, v4, v4, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v3, v4, v4, v3
 ; CHECK-P8-NEXT:    lxvd2x v4, 0, r4
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    vsld v2, v2, v4
-; CHECK-P8-NEXT:    vsld v3, v3, v4
 ; CHECK-P8-NEXT:    vsrad v2, v2, v4
-; CHECK-P8-NEXT:    vsrad v3, v3, v4
 ; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
-; CHECK-P8-NEXT:    xvcvsxddp vs1, v3
+; CHECK-P8-NEXT:    vsld v2, v3, v4
+; CHECK-P8-NEXT:    vsrad v2, v2, v4
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -498,50 +498,50 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.resul
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_2 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_2 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_4 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_4 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_3 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_4 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_3 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_4 at toc@l
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
-; CHECK-P8-NEXT:    xxswapd v5, vs2
-; CHECK-P8-NEXT:    xxswapd v0, vs3
 ; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
-; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
-; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P8-NEXT:    xxswapd v0, vs2
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v0
 ; CHECK-P8-NEXT:    lxvd2x v0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    vsld v3, v3, v0
+; CHECK-P8-NEXT:    vsld v2, v2, v0
 ; CHECK-P8-NEXT:    vsld v4, v4, v0
 ; CHECK-P8-NEXT:    vsld v5, v5, v0
-; CHECK-P8-NEXT:    vsld v2, v2, v0
 ; CHECK-P8-NEXT:    vsrad v3, v3, v0
 ; CHECK-P8-NEXT:    vsrad v2, v2, v0
 ; CHECK-P8-NEXT:    vsrad v4, v4, v0
 ; CHECK-P8-NEXT:    vsrad v5, v5, v0
-; CHECK-P8-NEXT:    xvcvsxddp vs2, v2
-; CHECK-P8-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs2, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs3, v2
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v4
 ; CHECK-P8-NEXT:    xvcvsxddp vs1, v5
-; CHECK-P8-NEXT:    xvcvsxddp vs3, v4
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r4, 32
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -617,84 +617,84 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI7_2 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI7_2 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_3 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r6
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI7_4 at toc@ha
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_3 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI7_4 at toc@l
-; CHECK-P8-NEXT:    xxswapd v2, vs1
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs4, 0, r6
-; CHECK-P8-NEXT:    li r6, 96
-; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_4 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_4 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
+; CHECK-P8-NEXT:    xxswapd v1, vs1
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    vperm v8, v2, v2, v5
+; CHECK-P8-NEXT:    vperm v5, v1, v1, v5
+; CHECK-P8-NEXT:    xxswapd v3, vs3
+; CHECK-P8-NEXT:    vperm v0, v2, v2, v3
+; CHECK-P8-NEXT:    vperm v3, v1, v1, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
-; CHECK-P8-NEXT:    xxswapd v5, vs3
-; CHECK-P8-NEXT:    xxswapd v0, vs4
-; CHECK-P8-NEXT:    lxvd2x v9, 0, r4
-; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xxswapd v6, vs0
-; CHECK-P8-NEXT:    vperm v1, v2, v2, v3
 ; CHECK-P8-NEXT:    vperm v7, v2, v2, v4
-; CHECK-P8-NEXT:    vperm v8, v2, v2, v5
-; CHECK-P8-NEXT:    vperm v2, v2, v2, v0
-; CHECK-P8-NEXT:    vperm v5, v6, v6, v5
-; CHECK-P8-NEXT:    vperm v0, v6, v6, v0
-; CHECK-P8-NEXT:    vperm v3, v6, v6, v3
-; CHECK-P8-NEXT:    vperm v4, v6, v6, v4
-; CHECK-P8-NEXT:    vsld v1, v1, v9
-; CHECK-P8-NEXT:    vsld v6, v7, v9
-; CHECK-P8-NEXT:    vsld v5, v5, v9
-; CHECK-P8-NEXT:    vsld v0, v0, v9
-; CHECK-P8-NEXT:    vsld v3, v3, v9
-; CHECK-P8-NEXT:    vsld v4, v4, v9
-; CHECK-P8-NEXT:    vsrad v5, v5, v9
-; CHECK-P8-NEXT:    vsrad v0, v0, v9
-; CHECK-P8-NEXT:    vsld v7, v8, v9
-; CHECK-P8-NEXT:    vsld v2, v2, v9
-; CHECK-P8-NEXT:    vsrad v3, v3, v9
-; CHECK-P8-NEXT:    vsrad v4, v4, v9
-; CHECK-P8-NEXT:    xvcvsxddp vs2, v5
-; CHECK-P8-NEXT:    xvcvsxddp vs3, v0
-; CHECK-P8-NEXT:    vsrad v1, v1, v9
-; CHECK-P8-NEXT:    vsrad v6, v6, v9
-; CHECK-P8-NEXT:    vsrad v7, v7, v9
-; CHECK-P8-NEXT:    vsrad v2, v2, v9
-; CHECK-P8-NEXT:    xvcvsxddp vs1, v3
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xvcvsxddp vs4, v4
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xvcvsxddp vs0, v7
+; CHECK-P8-NEXT:    vperm v4, v1, v1, v4
+; CHECK-P8-NEXT:    xxswapd v6, vs0
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v6
+; CHECK-P8-NEXT:    vperm v6, v1, v1, v6
+; CHECK-P8-NEXT:    lxvd2x v1, 0, r4
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    vsld v0, v0, v1
+; CHECK-P8-NEXT:    vsld v7, v7, v1
+; CHECK-P8-NEXT:    vsld v8, v8, v1
+; CHECK-P8-NEXT:    vsld v2, v2, v1
+; CHECK-P8-NEXT:    vsld v3, v3, v1
+; CHECK-P8-NEXT:    vsld v6, v6, v1
+; CHECK-P8-NEXT:    vsld v5, v5, v1
+; CHECK-P8-NEXT:    vsld v4, v4, v1
+; CHECK-P8-NEXT:    vsrad v0, v0, v1
+; CHECK-P8-NEXT:    vsrad v7, v7, v1
+; CHECK-P8-NEXT:    vsrad v8, v8, v1
+; CHECK-P8-NEXT:    vsrad v2, v2, v1
+; CHECK-P8-NEXT:    vsrad v3, v3, v1
+; CHECK-P8-NEXT:    vsrad v6, v6, v1
+; CHECK-P8-NEXT:    vsrad v5, v5, v1
+; CHECK-P8-NEXT:    vsrad v4, v4, v1
+; CHECK-P8-NEXT:    xvcvsxddp vs4, v0
+; CHECK-P8-NEXT:    xvcvsxddp vs7, v7
+; CHECK-P8-NEXT:    xvcvsxddp vs6, v8
 ; CHECK-P8-NEXT:    xvcvsxddp vs5, v2
-; CHECK-P8-NEXT:    xvcvsxddp vs6, v1
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xvcvsxddp vs7, v6
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
-; CHECK-P8-NEXT:    li r6, 64
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xvcvsxddp vs3, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v6
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v5
+; CHECK-P8-NEXT:    xvcvsxddp vs2, v4
 ; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd vs7, vs7
 ; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    xxswapd vs3, vs6
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxswapd vs2, vs7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
-; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xxswapd vs4, vs6
+; CHECK-P8-NEXT:    stxvd2x vs7, r3, r4
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    li r4, 64
 ; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
index b4273f2cd464342..c001f2ff51bf6f0 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
@@ -46,8 +46,8 @@ define void @test4elt(ptr noalias nocapture sret(<4 x double>) %agg.result, <4 x
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    xvcvuxwdp vs0, v3
 ; CHECK-P8-NEXT:    xvcvuxwdp vs1, v2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
@@ -81,26 +81,26 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, ptr
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxmrghw v4, v2, v2
+; CHECK-P8-NEXT:    xxmrglw v2, v2, v2
+; CHECK-P8-NEXT:    xvcvuxwdp vs1, v4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xvcvuxwdp vs0, v2
 ; CHECK-P8-NEXT:    xxmrglw v5, v3, v3
 ; CHECK-P8-NEXT:    xxmrghw v3, v3, v3
-; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
-; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
 ; CHECK-P8-NEXT:    xvcvuxwdp vs2, v5
-; CHECK-P8-NEXT:    xvcvuxwdp vs0, v4
-; CHECK-P8-NEXT:    xvcvuxwdp vs1, v2
 ; CHECK-P8-NEXT:    xvcvuxwdp vs3, v3
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
 ; CHECK-P8-NEXT:    stxvd2x vs2, 0, r3
 ; CHECK-P8-NEXT:    blr
@@ -150,52 +150,52 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    li r7, 32
-; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    li r5, 48
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    xxswapd v0, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r6
+; CHECK-P8-NEXT:    xxmrglw v7, v0, v0
+; CHECK-P8-NEXT:    xxmrghw v0, v0, v0
+; CHECK-P8-NEXT:    xvcvuxwdp vs6, v7
+; CHECK-P8-NEXT:    xvcvuxwdp vs7, v0
+; CHECK-P8-NEXT:    xxmrghw v3, v2, v2
+; CHECK-P8-NEXT:    xxmrglw v2, v2, v2
+; CHECK-P8-NEXT:    xvcvuxwdp vs2, v3
+; CHECK-P8-NEXT:    xvcvuxwdp vs5, v2
+; CHECK-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r7
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
-; CHECK-P8-NEXT:    xxmrghw v0, v3, v3
+; CHECK-P8-NEXT:    xxmrglw v1, v4, v4
+; CHECK-P8-NEXT:    xxmrghw v4, v4, v4
+; CHECK-P8-NEXT:    xvcvuxwdp vs4, v4
+; CHECK-P8-NEXT:    xvcvuxwdp vs3, v1
 ; CHECK-P8-NEXT:    xxswapd v5, vs0
-; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
-; CHECK-P8-NEXT:    xxmrglw v3, v3, v3
-; CHECK-P8-NEXT:    xvcvuxwdp vs0, v4
-; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    xvcvuxwdp vs1, v0
-; CHECK-P8-NEXT:    xxmrghw v0, v5, v5
-; CHECK-P8-NEXT:    xxmrglw v5, v5, v5
-; CHECK-P8-NEXT:    xvcvuxwdp vs2, v2
-; CHECK-P8-NEXT:    xxmrglw v2, v4, v4
-; CHECK-P8-NEXT:    xvcvuxwdp vs3, v3
-; CHECK-P8-NEXT:    xxmrghw v3, v4, v4
-; CHECK-P8-NEXT:    xvcvuxwdp vs4, v0
-; CHECK-P8-NEXT:    xvcvuxwdp vs5, v5
-; CHECK-P8-NEXT:    xvcvuxwdp vs6, v2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xvcvuxwdp vs7, v3
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 96
-; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xxswapd vs1, vs5
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    xxswapd vs5, vs6
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
 ; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xxswapd vs3, vs7
-; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r8
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    xxmrglw v6, v5, v5
+; CHECK-P8-NEXT:    xxmrghw v5, v5, v5
+; CHECK-P8-NEXT:    xvcvuxwdp vs0, v6
+; CHECK-P8-NEXT:    xvcvuxwdp vs1, v5
+; CHECK-P8-NEXT:    xxswapd vs5, vs6
+; CHECK-P8-NEXT:    xxswapd vs2, vs4
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs4, vs7
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 64
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -306,8 +306,8 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    xvcvsxwdp vs0, v3
 ; CHECK-P8-NEXT:    xvcvsxwdp vs1, v2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
@@ -341,26 +341,26 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.resul
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    xxmrghw v4, v2, v2
+; CHECK-P8-NEXT:    xxmrglw v2, v2, v2
+; CHECK-P8-NEXT:    xvcvsxwdp vs1, v4
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    xvcvsxwdp vs0, v2
 ; CHECK-P8-NEXT:    xxmrglw v5, v3, v3
 ; CHECK-P8-NEXT:    xxmrghw v3, v3, v3
-; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
-; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
 ; CHECK-P8-NEXT:    xvcvsxwdp vs2, v5
-; CHECK-P8-NEXT:    xvcvsxwdp vs0, v4
-; CHECK-P8-NEXT:    xvcvsxwdp vs1, v2
 ; CHECK-P8-NEXT:    xvcvsxwdp vs3, v3
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
 ; CHECK-P8-NEXT:    stxvd2x vs2, 0, r3
 ; CHECK-P8-NEXT:    blr
@@ -410,52 +410,52 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    li r7, 32
-; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    li r5, 48
+; CHECK-P8-NEXT:    li r6, 32
+; CHECK-P8-NEXT:    li r7, 16
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
+; CHECK-P8-NEXT:    xxswapd v0, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r6
+; CHECK-P8-NEXT:    xxmrglw v7, v0, v0
+; CHECK-P8-NEXT:    xxmrghw v0, v0, v0
+; CHECK-P8-NEXT:    xvcvsxwdp vs6, v7
+; CHECK-P8-NEXT:    xvcvsxwdp vs7, v0
+; CHECK-P8-NEXT:    xxmrghw v3, v2, v2
+; CHECK-P8-NEXT:    xxmrglw v2, v2, v2
+; CHECK-P8-NEXT:    xvcvsxwdp vs2, v3
+; CHECK-P8-NEXT:    xvcvsxwdp vs5, v2
+; CHECK-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r7
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
-; CHECK-P8-NEXT:    xxmrghw v0, v3, v3
+; CHECK-P8-NEXT:    xxmrglw v1, v4, v4
+; CHECK-P8-NEXT:    xxmrghw v4, v4, v4
+; CHECK-P8-NEXT:    xvcvsxwdp vs4, v4
+; CHECK-P8-NEXT:    xvcvsxwdp vs3, v1
 ; CHECK-P8-NEXT:    xxswapd v5, vs0
-; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
-; CHECK-P8-NEXT:    xxmrglw v3, v3, v3
-; CHECK-P8-NEXT:    xvcvsxwdp vs0, v4
-; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    xvcvsxwdp vs1, v0
-; CHECK-P8-NEXT:    xxmrghw v0, v5, v5
-; CHECK-P8-NEXT:    xxmrglw v5, v5, v5
-; CHECK-P8-NEXT:    xvcvsxwdp vs2, v2
-; CHECK-P8-NEXT:    xxmrglw v2, v4, v4
-; CHECK-P8-NEXT:    xvcvsxwdp vs3, v3
-; CHECK-P8-NEXT:    xxmrghw v3, v4, v4
-; CHECK-P8-NEXT:    xvcvsxwdp vs4, v0
-; CHECK-P8-NEXT:    xvcvsxwdp vs5, v5
-; CHECK-P8-NEXT:    xvcvsxwdp vs6, v2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xvcvsxwdp vs7, v3
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs5, vs5
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
 ; CHECK-P8-NEXT:    li r4, 96
-; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xxswapd vs1, vs5
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    xxswapd vs5, vs6
+; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
 ; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xxswapd vs3, vs7
-; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r8
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
+; CHECK-P8-NEXT:    xxmrglw v6, v5, v5
+; CHECK-P8-NEXT:    xxmrghw v5, v5, v5
+; CHECK-P8-NEXT:    xvcvsxwdp vs0, v6
+; CHECK-P8-NEXT:    xvcvsxwdp vs1, v5
+; CHECK-P8-NEXT:    xxswapd vs5, vs6
+; CHECK-P8-NEXT:    xxswapd vs2, vs4
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs4, vs7
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
+; CHECK-P8-NEXT:    li r4, 64
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
index 16acda0ca3a3103..6a50e7a6e8e3b1a 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
@@ -53,14 +53,14 @@ define <4 x float> @test4elt(ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xvcvuxdsp vs1, v3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xvcvuxdsp vs0, v2
-; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
 ; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    xvcvuxdsp vs0, v3
+; CHECK-P8-NEXT:    xxsldwi v3, vs0, vs0, 3
 ; CHECK-P8-NEXT:    vpkudum v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -94,30 +94,30 @@ entry:
 define void @test8elt(ptr noalias nocapture sret(<8 x float>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 32
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r6
+; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
-; CHECK-P8-NEXT:    xxswapd v5, vs3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    xvcvuxdsp vs3, v5
-; CHECK-P8-NEXT:    xvcvuxdsp vs0, v2
-; CHECK-P8-NEXT:    xvcvuxdsp vs1, v3
-; CHECK-P8-NEXT:    xvcvuxdsp vs2, v4
-; CHECK-P8-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    xvcvuxdsp vs0, v3
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    xvcvuxdsp vs1, v2
 ; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    xvcvuxdsp vs0, v4
 ; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-P8-NEXT:    xxsldwi v4, vs2, vs2, 3
 ; CHECK-P8-NEXT:    vpkudum v2, v3, v2
-; CHECK-P8-NEXT:    vpkudum v3, v4, v5
+; CHECK-P8-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-P8-NEXT:    xvcvuxdsp vs0, v5
+; CHECK-P8-NEXT:    xxsldwi v5, vs0, vs0, 3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    xxswapd vs1, v3
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    vpkudum v3, v4, v5
+; CHECK-P8-NEXT:    xxswapd vs1, v3
 ; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -170,56 +170,56 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    li r7, 64
-; CHECK-P8-NEXT:    lxvd2x vs4, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    li r7, 80
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
 ; CHECK-P8-NEXT:    li r7, 96
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r7
-; CHECK-P8-NEXT:    li r7, 112
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    li r6, 112
+; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r7
-; CHECK-P8-NEXT:    li r7, 16
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    xxswapd v5, vs3
-; CHECK-P8-NEXT:    xvcvuxdsp vs3, v2
-; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r6
+; CHECK-P8-NEXT:    li r6, 80
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
+; CHECK-P8-NEXT:    li r7, 32
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r6
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    xxswapd v1, vs3
+; CHECK-P8-NEXT:    xvcvuxdsp vs2, v1
+; CHECK-P8-NEXT:    xvcvuxdsp vs3, v0
+; CHECK-P8-NEXT:    xxswapd v3, vs5
+; CHECK-P8-NEXT:    xxswapd v2, vs4
+; CHECK-P8-NEXT:    xvcvuxdsp vs4, v5
+; CHECK-P8-NEXT:    xvcvuxdsp vs5, v4
+; CHECK-P8-NEXT:    xxswapd v6, vs0
+; CHECK-P8-NEXT:    xxswapd v7, vs1
 ; CHECK-P8-NEXT:    xvcvuxdsp vs0, v3
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xvcvuxdsp vs1, v4
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    xvcvuxdsp vs2, v5
-; CHECK-P8-NEXT:    xxswapd v5, vs4
-; CHECK-P8-NEXT:    xvcvuxdsp vs4, v2
-; CHECK-P8-NEXT:    xvcvuxdsp vs5, v3
-; CHECK-P8-NEXT:    xvcvuxdsp vs6, v4
-; CHECK-P8-NEXT:    xxsldwi v2, vs3, vs3, 3
-; CHECK-P8-NEXT:    xvcvuxdsp vs7, v5
-; CHECK-P8-NEXT:    xxsldwi v3, vs0, vs0, 3
-; CHECK-P8-NEXT:    xxsldwi v4, vs1, vs1, 3
-; CHECK-P8-NEXT:    xxsldwi v5, vs2, vs2, 3
+; CHECK-P8-NEXT:    xvcvuxdsp vs1, v2
+; CHECK-P8-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-P8-NEXT:    xxsldwi v5, vs3, vs3, 3
 ; CHECK-P8-NEXT:    xxsldwi v0, vs4, vs4, 3
-; CHECK-P8-NEXT:    vpkudum v2, v3, v2
 ; CHECK-P8-NEXT:    xxsldwi v1, vs5, vs5, 3
-; CHECK-P8-NEXT:    xxsldwi v6, vs6, vs6, 3
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    xvcvuxdsp vs0, v6
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P8-NEXT:    vpkudum v2, v3, v2
 ; CHECK-P8-NEXT:    vpkudum v3, v5, v4
-; CHECK-P8-NEXT:    xxsldwi v7, vs7, vs7, 3
 ; CHECK-P8-NEXT:    vpkudum v4, v1, v0
-; CHECK-P8-NEXT:    vpkudum v5, v6, v7
-; CHECK-P8-NEXT:    xxswapd vs2, v2
 ; CHECK-P8-NEXT:    xxswapd vs1, v3
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    xxsldwi v6, vs0, vs0, 3
+; CHECK-P8-NEXT:    xvcvuxdsp vs0, v7
+; CHECK-P8-NEXT:    xxsldwi v7, vs0, vs0, 3
 ; CHECK-P8-NEXT:    xxswapd vs0, v4
-; CHECK-P8-NEXT:    xxswapd vs3, v5
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
+; CHECK-P8-NEXT:    vpkudum v5, v6, v7
+; CHECK-P8-NEXT:    xxswapd vs3, v5
 ; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -345,14 +345,14 @@ define <4 x float> @test4elt_signed(ptr nocapture readonly) local_unnamed_addr #
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs0, r3, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xvcvsxdsp vs1, v3
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xvcvsxdsp vs0, v2
-; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
 ; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    xvcvsxdsp vs0, v3
+; CHECK-P8-NEXT:    xxsldwi v3, vs0, vs0, 3
 ; CHECK-P8-NEXT:    vpkudum v2, v2, v3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -386,30 +386,30 @@ entry:
 define void @test8elt_signed(ptr noalias nocapture sret(<8 x float>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 32
 ; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r6
+; CHECK-P8-NEXT:    li r6, 32
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r5
-; CHECK-P8-NEXT:    xxswapd v5, vs3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    xvcvsxdsp vs3, v5
-; CHECK-P8-NEXT:    xvcvsxdsp vs0, v2
-; CHECK-P8-NEXT:    xvcvsxdsp vs1, v3
-; CHECK-P8-NEXT:    xvcvsxdsp vs2, v4
-; CHECK-P8-NEXT:    xxsldwi v5, vs3, vs3, 3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    xvcvsxdsp vs0, v3
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    xvcvsxdsp vs1, v2
 ; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    xvcvsxdsp vs0, v4
 ; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
-; CHECK-P8-NEXT:    xxsldwi v4, vs2, vs2, 3
 ; CHECK-P8-NEXT:    vpkudum v2, v3, v2
-; CHECK-P8-NEXT:    vpkudum v3, v4, v5
+; CHECK-P8-NEXT:    xxsldwi v4, vs0, vs0, 3
+; CHECK-P8-NEXT:    xvcvsxdsp vs0, v5
+; CHECK-P8-NEXT:    xxsldwi v5, vs0, vs0, 3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    xxswapd vs1, v3
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    vpkudum v3, v4, v5
+; CHECK-P8-NEXT:    xxswapd vs1, v3
 ; CHECK-P8-NEXT:    stxvd2x vs1, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -462,56 +462,56 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    li r6, 48
-; CHECK-P8-NEXT:    li r7, 64
-; CHECK-P8-NEXT:    lxvd2x vs4, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
-; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    li r7, 80
-; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
 ; CHECK-P8-NEXT:    li r7, 96
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    lxvd2x vs0, r4, r7
-; CHECK-P8-NEXT:    li r7, 112
-; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    li r6, 112
+; CHECK-P8-NEXT:    li r5, 16
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r7
-; CHECK-P8-NEXT:    li r7, 16
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    xxswapd v5, vs3
-; CHECK-P8-NEXT:    xvcvsxdsp vs3, v2
-; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    li r7, 64
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r6
+; CHECK-P8-NEXT:    li r6, 80
+; CHECK-P8-NEXT:    lxvd2x vs3, r4, r7
+; CHECK-P8-NEXT:    li r7, 32
+; CHECK-P8-NEXT:    lxvd2x vs2, r4, r6
+; CHECK-P8-NEXT:    li r6, 48
+; CHECK-P8-NEXT:    lxvd2x vs5, r4, r7
+; CHECK-P8-NEXT:    lxvd2x vs4, r4, r6
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
+; CHECK-P8-NEXT:    xxswapd v5, vs1
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    xxswapd v1, vs3
+; CHECK-P8-NEXT:    xvcvsxdsp vs2, v1
+; CHECK-P8-NEXT:    xvcvsxdsp vs3, v0
+; CHECK-P8-NEXT:    xxswapd v3, vs5
+; CHECK-P8-NEXT:    xxswapd v2, vs4
+; CHECK-P8-NEXT:    xvcvsxdsp vs4, v5
+; CHECK-P8-NEXT:    xvcvsxdsp vs5, v4
+; CHECK-P8-NEXT:    xxswapd v6, vs0
+; CHECK-P8-NEXT:    xxswapd v7, vs1
 ; CHECK-P8-NEXT:    xvcvsxdsp vs0, v3
-; CHECK-P8-NEXT:    xxswapd v3, vs1
-; CHECK-P8-NEXT:    xvcvsxdsp vs1, v4
-; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    xvcvsxdsp vs2, v5
-; CHECK-P8-NEXT:    xxswapd v5, vs4
-; CHECK-P8-NEXT:    xvcvsxdsp vs4, v2
-; CHECK-P8-NEXT:    xvcvsxdsp vs5, v3
-; CHECK-P8-NEXT:    xvcvsxdsp vs6, v4
-; CHECK-P8-NEXT:    xxsldwi v2, vs3, vs3, 3
-; CHECK-P8-NEXT:    xvcvsxdsp vs7, v5
-; CHECK-P8-NEXT:    xxsldwi v3, vs0, vs0, 3
-; CHECK-P8-NEXT:    xxsldwi v4, vs1, vs1, 3
-; CHECK-P8-NEXT:    xxsldwi v5, vs2, vs2, 3
+; CHECK-P8-NEXT:    xvcvsxdsp vs1, v2
+; CHECK-P8-NEXT:    xxsldwi v4, vs2, vs2, 3
+; CHECK-P8-NEXT:    xxsldwi v5, vs3, vs3, 3
 ; CHECK-P8-NEXT:    xxsldwi v0, vs4, vs4, 3
-; CHECK-P8-NEXT:    vpkudum v2, v3, v2
 ; CHECK-P8-NEXT:    xxsldwi v1, vs5, vs5, 3
-; CHECK-P8-NEXT:    xxsldwi v6, vs6, vs6, 3
+; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
+; CHECK-P8-NEXT:    xvcvsxdsp vs0, v6
+; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
+; CHECK-P8-NEXT:    vpkudum v2, v3, v2
 ; CHECK-P8-NEXT:    vpkudum v3, v5, v4
-; CHECK-P8-NEXT:    xxsldwi v7, vs7, vs7, 3
 ; CHECK-P8-NEXT:    vpkudum v4, v1, v0
-; CHECK-P8-NEXT:    vpkudum v5, v6, v7
-; CHECK-P8-NEXT:    xxswapd vs2, v2
 ; CHECK-P8-NEXT:    xxswapd vs1, v3
+; CHECK-P8-NEXT:    xxswapd vs2, v2
+; CHECK-P8-NEXT:    xxsldwi v6, vs0, vs0, 3
+; CHECK-P8-NEXT:    xvcvsxdsp vs0, v7
+; CHECK-P8-NEXT:    xxsldwi v7, vs0, vs0, 3
 ; CHECK-P8-NEXT:    xxswapd vs0, v4
-; CHECK-P8-NEXT:    xxswapd vs3, v5
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
+; CHECK-P8-NEXT:    vpkudum v5, v6, v7
+; CHECK-P8-NEXT:    xxswapd vs3, v5
 ; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index fe98e94ae79b158..1ff1f6b7bc4e83e 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -67,12 +67,12 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_0 at toc@ha
-; CHECK-P8-NEXT:    mtvsrwz v2, r3
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-P8-NEXT:    mtvsrwz v3, r3
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
@@ -107,20 +107,20 @@ define void @test8elt(ptr noalias nocapture sret(<8 x float>) %agg.result, i64 %
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
-; CHECK-P8-NEXT:    xxlxor v2, v2, v2
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
 ; CHECK-P8-NEXT:    mtvsrd v4, r4
+; CHECK-P8-NEXT:    xxlxor v5, v5, v5
 ; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI2_1 at toc@l
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xxswapd v5, vs1
-; CHECK-P8-NEXT:    vperm v3, v2, v4, v3
-; CHECK-P8-NEXT:    vperm v2, v2, v4, v5
-; CHECK-P8-NEXT:    xvcvuxwsp vs0, v3
-; CHECK-P8-NEXT:    xvcvuxwsp vs1, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vperm v2, v5, v4, v2
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v3, v5, v4, v3
+; CHECK-P8-NEXT:    xvcvuxwsp vs1, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -173,40 +173,40 @@ define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, <16
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_2 at toc@ha
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    xxlxor v1, v1, v1
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_2 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
-; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
+; CHECK-P8-NEXT:    xxswapd v4, vs0
+; CHECK-P8-NEXT:    vperm v4, v1, v2, v4
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, v4
 ; CHECK-P8-NEXT:    xxswapd v5, vs1
-; CHECK-P8-NEXT:    xxswapd v0, vs2
-; CHECK-P8-NEXT:    xxswapd v1, vs3
-; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
-; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
-; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v1
-; CHECK-P8-NEXT:    xvcvuxwsp vs0, v3
+; CHECK-P8-NEXT:    vperm v5, v1, v2, v5
 ; CHECK-P8-NEXT:    xvcvuxwsp vs1, v5
-; CHECK-P8-NEXT:    xvcvuxwsp vs2, v0
-; CHECK-P8-NEXT:    xvcvuxwsp vs3, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs2
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    vperm v3, v1, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    xvcvuxwsp vs2, v3
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    vperm v2, v1, v2, v0
+; CHECK-P8-NEXT:    xvcvuxwsp vs3, v2
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -335,13 +335,13 @@ define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
-; CHECK-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-P8-NEXT:    vspltisw v3, 12
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
+; CHECK-P8-NEXT:    vadduwm v3, v3, v3
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P8-NEXT:    vspltisw v3, 12
-; CHECK-P8-NEXT:    vadduwm v3, v3, v3
+; CHECK-P8-NEXT:    vperm v2, v4, v4, v2
 ; CHECK-P8-NEXT:    vslw v2, v2, v3
 ; CHECK-P8-NEXT:    vsraw v2, v2, v3
 ; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
@@ -378,25 +378,25 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x float>) %agg.result
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_1 at toc@ha
-; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mtvsrd v5, r4
+; CHECK-P8-NEXT:    vspltisw v3, 12
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-P8-NEXT:    vadduwm v3, v3, v3
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI6_1 at toc@l
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_1 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
-; CHECK-P8-NEXT:    vspltisw v4, 12
-; CHECK-P8-NEXT:    vadduwm v4, v4, v4
-; CHECK-P8-NEXT:    vslw v2, v2, v4
-; CHECK-P8-NEXT:    vslw v3, v3, v4
-; CHECK-P8-NEXT:    vsraw v2, v2, v4
-; CHECK-P8-NEXT:    vsraw v3, v3, v4
+; CHECK-P8-NEXT:    vperm v2, v5, v5, v2
+; CHECK-P8-NEXT:    vslw v2, v2, v3
+; CHECK-P8-NEXT:    vsraw v2, v2, v3
 ; CHECK-P8-NEXT:    xvcvsxwsp vs0, v2
-; CHECK-P8-NEXT:    xvcvsxwsp vs1, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    vperm v4, v5, v5, v4
+; CHECK-P8-NEXT:    vslw v2, v4, v3
+; CHECK-P8-NEXT:    vsraw v2, v2, v3
+; CHECK-P8-NEXT:    xvcvsxwsp vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -451,49 +451,49 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.resu
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_2 at toc@ha
-; CHECK-P8-NEXT:    vspltisw v1, 12
+; CHECK-P8-NEXT:    vspltisw v3, 12
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_2 at toc@l
+; CHECK-P8-NEXT:    vadduwm v3, v3, v3
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
+; CHECK-P8-NEXT:    xxswapd v5, vs0
+; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
+; CHECK-P8-NEXT:    vslw v5, v5, v3
+; CHECK-P8-NEXT:    vsraw v5, v5, v3
+; CHECK-P8-NEXT:    xvcvsxwsp vs0, v5
+; CHECK-P8-NEXT:    xxswapd v0, vs1
+; CHECK-P8-NEXT:    vperm v0, v2, v2, v0
+; CHECK-P8-NEXT:    vslw v0, v0, v3
+; CHECK-P8-NEXT:    vsraw v0, v0, v3
+; CHECK-P8-NEXT:    xvcvsxwsp vs1, v0
+; CHECK-P8-NEXT:    xxswapd v4, vs2
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    xxswapd v5, vs2
-; CHECK-P8-NEXT:    xxswapd v0, vs3
-; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
-; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
-; CHECK-P8-NEXT:    vperm v2, v2, v2, v0
-; CHECK-P8-NEXT:    vadduwm v0, v1, v1
-; CHECK-P8-NEXT:    vslw v3, v3, v0
-; CHECK-P8-NEXT:    vslw v4, v4, v0
-; CHECK-P8-NEXT:    vslw v5, v5, v0
-; CHECK-P8-NEXT:    vslw v2, v2, v0
-; CHECK-P8-NEXT:    vsraw v3, v3, v0
-; CHECK-P8-NEXT:    vsraw v4, v4, v0
-; CHECK-P8-NEXT:    vsraw v5, v5, v0
-; CHECK-P8-NEXT:    vsraw v2, v2, v0
-; CHECK-P8-NEXT:    xvcvsxwsp vs0, v3
-; CHECK-P8-NEXT:    xvcvsxwsp vs1, v4
-; CHECK-P8-NEXT:    xvcvsxwsp vs2, v5
-; CHECK-P8-NEXT:    xvcvsxwsp vs3, v2
+; CHECK-P8-NEXT:    vslw v4, v4, v3
+; CHECK-P8-NEXT:    xxswapd v1, vs2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    vperm v2, v2, v2, v1
+; CHECK-P8-NEXT:    vslw v2, v2, v3
+; CHECK-P8-NEXT:    vsraw v2, v2, v3
+; CHECK-P8-NEXT:    vsraw v3, v4, v3
+; CHECK-P8-NEXT:    xvcvsxwsp vs2, v3
+; CHECK-P8-NEXT:    xvcvsxwsp vs3, v2
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r4, 32
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index 08730dace391c37..af3132f88f001a8 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -13,12 +13,12 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
-; CHECK-P8-NEXT:    mtvsrd v2, r3
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
+; CHECK-P8-NEXT:    mtvsrd v3, r3
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
 ; CHECK-P8-NEXT:    xvcvuxddp v2, v2
 ; CHECK-P8-NEXT:    blr
 ;
@@ -53,20 +53,20 @@ define void @test4elt(ptr noalias nocapture sret(<4 x double>) %agg.result, i32
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI1_1 at toc@ha
-; CHECK-P8-NEXT:    xxlxor v2, v2, v2
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
 ; CHECK-P8-NEXT:    mtvsrwz v4, r4
+; CHECK-P8-NEXT:    xxlxor v5, v5, v5
 ; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI1_1 at toc@l
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_1 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xxswapd v5, vs1
-; CHECK-P8-NEXT:    vperm v3, v2, v4, v3
-; CHECK-P8-NEXT:    vperm v2, v2, v4, v5
-; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
-; CHECK-P8-NEXT:    xvcvuxddp vs1, v2
+; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    vperm v2, v5, v4, v2
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v3, v5, v4, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -119,41 +119,41 @@ define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, i64
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI2_2 at toc@ha
-; CHECK-P8-NEXT:    xxlxor v2, v2, v2
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI2_2 at toc@l
-; CHECK-P8-NEXT:    mtvsrd v4, r4
+; CHECK-P8-NEXT:    mtvsrd v0, r4
+; CHECK-P8-NEXT:    xxlxor v1, v1, v1
 ; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_1 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_3 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r6
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI2_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_3 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI2_1 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_2 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_2 at toc@l
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xxswapd v5, vs1
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xxswapd v0, vs2
-; CHECK-P8-NEXT:    xxswapd v1, vs3
-; CHECK-P8-NEXT:    vperm v3, v2, v4, v3
-; CHECK-P8-NEXT:    vperm v5, v2, v4, v5
-; CHECK-P8-NEXT:    vperm v0, v2, v4, v0
-; CHECK-P8-NEXT:    vperm v2, v2, v4, v1
+; CHECK-P8-NEXT:    vperm v3, v1, v0, v3
 ; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
-; CHECK-P8-NEXT:    xvcvuxddp vs1, v5
-; CHECK-P8-NEXT:    xvcvuxddp vs2, v0
-; CHECK-P8-NEXT:    xvcvuxddp vs3, v2
+; CHECK-P8-NEXT:    xxswapd v4, vs1
+; CHECK-P8-NEXT:    vperm v4, v1, v0, v4
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v4
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT:    vperm v2, v1, v0, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd v5, vs2
+; CHECK-P8-NEXT:    xvcvuxddp vs2, v2
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    vperm v5, v1, v0, v5
+; CHECK-P8-NEXT:    xvcvuxddp vs3, v5
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -227,77 +227,77 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, <1
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_1 at toc@ha
-; CHECK-P8-NEXT:    xxlxor v4, v4, v4
+; CHECK-P8-NEXT:    xxlxor v0, v0, v0
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_4 at toc@ha
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_4 at toc@l
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_4 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_4 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_6 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_7 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_6 at toc@l
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_7 at toc@l
-; CHECK-P8-NEXT:    xxswapd v5, vs1
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT:    lxvd2x vs4, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_7 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_7 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs5, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_5 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_3 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_5 at toc@l
-; CHECK-P8-NEXT:    xxswapd v0, vs2
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_3 at toc@l
-; CHECK-P8-NEXT:    xxswapd v1, vs3
+; CHECK-P8-NEXT:    xxswapd v7, vs0
+; CHECK-P8-NEXT:    vperm v7, v0, v2, v7
+; CHECK-P8-NEXT:    xvcvuxddp vs0, v7
+; CHECK-P8-NEXT:    xxswapd v8, vs1
+; CHECK-P8-NEXT:    vperm v8, v0, v2, v8
+; CHECK-P8-NEXT:    xvcvuxddp vs1, v8
+; CHECK-P8-NEXT:    xxswapd v1, vs2
+; CHECK-P8-NEXT:    vperm v1, v0, v2, v1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
+; CHECK-P8-NEXT:    xxswapd v6, vs3
+; CHECK-P8-NEXT:    vperm v6, v0, v2, v6
+; CHECK-P8-NEXT:    xvcvuxddp vs6, v6
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd v3, vs4
+; CHECK-P8-NEXT:    lxvd2x vs4, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
-; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    li r5, 96
-; CHECK-P8-NEXT:    xxswapd v6, vs0
-; CHECK-P8-NEXT:    xxswapd v7, vs1
-; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
-; CHECK-P8-NEXT:    xxswapd v8, vs2
-; CHECK-P8-NEXT:    xxswapd v9, vs3
-; CHECK-P8-NEXT:    vperm v6, v4, v2, v6
-; CHECK-P8-NEXT:    vperm v7, v4, v2, v7
-; CHECK-P8-NEXT:    vperm v8, v4, v2, v8
-; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
-; CHECK-P8-NEXT:    vperm v1, v4, v2, v1
-; CHECK-P8-NEXT:    vperm v2, v4, v2, v9
-; CHECK-P8-NEXT:    xvcvuxddp vs2, v6
-; CHECK-P8-NEXT:    xvcvuxddp vs3, v7
-; CHECK-P8-NEXT:    xvcvuxddp vs4, v8
-; CHECK-P8-NEXT:    xvcvuxddp vs0, v0
-; CHECK-P8-NEXT:    xvcvuxddp vs1, v1
-; CHECK-P8-NEXT:    xvcvuxddp vs5, v2
-; CHECK-P8-NEXT:    xvcvuxddp vs6, v3
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xvcvuxddp vs7, v5
+; CHECK-P8-NEXT:    vperm v3, v0, v2, v3
+; CHECK-P8-NEXT:    xvcvuxddp vs3, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs5
+; CHECK-P8-NEXT:    xvcvuxddp vs5, v1
+; CHECK-P8-NEXT:    vperm v4, v0, v2, v4
+; CHECK-P8-NEXT:    xxswapd v5, vs4
+; CHECK-P8-NEXT:    xvcvuxddp vs4, v4
+; CHECK-P8-NEXT:    vperm v5, v0, v2, v5
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xvcvuxddp vs7, v5
+; CHECK-P8-NEXT:    xxswapd v9, vs2
 ; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    vperm v2, v0, v2, v9
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xvcvuxddp vs2, v2
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
 ; CHECK-P8-NEXT:    li r4, 80
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
-; CHECK-P8-NEXT:    li r5, 64
+; CHECK-P8-NEXT:    xxswapd vs4, vs5
+; CHECK-P8-NEXT:    xxswapd vs7, vs7
 ; CHECK-P8-NEXT:    xxswapd vs3, vs6
-; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs7, r3, r4
+; CHECK-P8-NEXT:    li r4, 64
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
 ; CHECK-P8-NEXT:    li r4, 48
-; CHECK-P8-NEXT:    xxswapd vs2, vs7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -459,26 +459,26 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI5_2 at toc@ha
-; CHECK-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-P8-NEXT:    mtvsrwz v4, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI5_2 at toc@l
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_2 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_2 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
+; CHECK-P8-NEXT:    vperm v2, v4, v4, v2
+; CHECK-P8-NEXT:    xxswapd v3, vs1
+; CHECK-P8-NEXT:    vperm v3, v4, v4, v3
 ; CHECK-P8-NEXT:    lxvd2x v4, 0, r4
 ; CHECK-P8-NEXT:    li r4, 16
 ; CHECK-P8-NEXT:    vsld v2, v2, v4
-; CHECK-P8-NEXT:    vsld v3, v3, v4
 ; CHECK-P8-NEXT:    vsrad v2, v2, v4
-; CHECK-P8-NEXT:    vsrad v3, v3, v4
 ; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
-; CHECK-P8-NEXT:    xvcvsxddp vs1, v3
+; CHECK-P8-NEXT:    vsld v2, v3, v4
+; CHECK-P8-NEXT:    vsrad v2, v2, v4
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
@@ -533,51 +533,51 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.resul
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_0 at toc@ha
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_2 at toc@ha
-; CHECK-P8-NEXT:    mtvsrd v3, r4
+; CHECK-P8-NEXT:    mtvsrd v0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_0 at toc@l
-; CHECK-P8-NEXT:    addi r6, r6, .LCPI6_2 at toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_2 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_4 at toc@ha
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_4 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_3 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r6
-; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_4 at toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_3 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
-; CHECK-P8-NEXT:    addi r5, r6, .LCPI6_4 at toc@l
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
-; CHECK-P8-NEXT:    xxswapd v2, vs0
+; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    vperm v3, v0, v0, v3
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    li r5, 32
+; CHECK-P8-NEXT:    vperm v4, v0, v0, v4
+; CHECK-P8-NEXT:    xxswapd v2, vs2
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
+; CHECK-P8-NEXT:    vperm v2, v0, v0, v2
 ; CHECK-P8-NEXT:    xxswapd v5, vs2
-; CHECK-P8-NEXT:    xxswapd v0, vs3
-; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
-; CHECK-P8-NEXT:    vperm v4, v3, v3, v4
-; CHECK-P8-NEXT:    vperm v5, v3, v3, v5
-; CHECK-P8-NEXT:    vperm v3, v3, v3, v0
+; CHECK-P8-NEXT:    vperm v5, v0, v0, v5
 ; CHECK-P8-NEXT:    lxvd2x v0, 0, r4
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    vsld v2, v2, v0
-; CHECK-P8-NEXT:    vsld v4, v4, v0
 ; CHECK-P8-NEXT:    vsld v5, v5, v0
 ; CHECK-P8-NEXT:    vsld v3, v3, v0
+; CHECK-P8-NEXT:    vsld v4, v4, v0
 ; CHECK-P8-NEXT:    vsrad v2, v2, v0
+; CHECK-P8-NEXT:    vsrad v5, v5, v0
 ; CHECK-P8-NEXT:    vsrad v3, v3, v0
 ; CHECK-P8-NEXT:    vsrad v4, v4, v0
-; CHECK-P8-NEXT:    vsrad v5, v5, v0
-; CHECK-P8-NEXT:    xvcvsxddp vs2, v3
-; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
-; CHECK-P8-NEXT:    xvcvsxddp vs1, v5
-; CHECK-P8-NEXT:    xvcvsxddp vs3, v4
+; CHECK-P8-NEXT:    xvcvsxddp vs2, v2
+; CHECK-P8-NEXT:    xvcvsxddp vs3, v5
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v4
 ; CHECK-P8-NEXT:    xxswapd vs2, vs2
-; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs1, vs1
+; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
+; CHECK-P8-NEXT:    li r4, 32
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
@@ -657,95 +657,95 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_2 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_2 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_4 at toc@ha
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_4 at toc@l
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_4 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_4 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
-; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_5 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_6 at toc@ha
-; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_5 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_6 at toc@l
-; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_7 at toc@ha
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_8 at toc@ha
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_7 at toc@l
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_8 at toc@l
-; CHECK-P8-NEXT:    xxswapd v5, vs2
-; CHECK-P8-NEXT:    xxswapd v0, vs3
-; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
-; CHECK-P8-NEXT:    lxvd2x vs3, 0, r5
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_8 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_8 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs4, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_6 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_6 at toc@l
+; CHECK-P8-NEXT:    xxswapd v6, vs0
+; CHECK-P8-NEXT:    vperm v6, v2, v2, v6
+; CHECK-P8-NEXT:    xxswapd v7, vs1
+; CHECK-P8-NEXT:    vperm v7, v2, v2, v7
+; CHECK-P8-NEXT:    xxswapd v0, vs2
+; CHECK-P8-NEXT:    vperm v0, v2, v2, v0
+; CHECK-P8-NEXT:    xxswapd v3, vs3
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_5 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_5 at toc@l
 ; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P8-NEXT:    xxswapd v4, vs4
+; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
+; CHECK-P8-NEXT:    xxswapd v5, vs3
+; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
+; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
+; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
+; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
-; CHECK-P8-NEXT:    li r5, 96
-; CHECK-P8-NEXT:    xxswapd v1, vs0
-; CHECK-P8-NEXT:    xxswapd v6, vs1
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
-; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
-; CHECK-P8-NEXT:    lxvd2x v9, 0, r4
-; CHECK-P8-NEXT:    li r4, 112
-; CHECK-P8-NEXT:    xxswapd v7, vs2
-; CHECK-P8-NEXT:    xxswapd v8, vs3
 ; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
-; CHECK-P8-NEXT:    vperm v0, v2, v2, v0
+; CHECK-P8-NEXT:    xxswapd v1, vs3
 ; CHECK-P8-NEXT:    vperm v1, v2, v2, v1
-; CHECK-P8-NEXT:    vperm v6, v2, v2, v6
-; CHECK-P8-NEXT:    vperm v7, v2, v2, v7
+; CHECK-P8-NEXT:    xxswapd v8, vs2
 ; CHECK-P8-NEXT:    vperm v2, v2, v2, v8
-; CHECK-P8-NEXT:    vsld v3, v3, v9
-; CHECK-P8-NEXT:    vsld v0, v0, v9
-; CHECK-P8-NEXT:    vsld v1, v1, v9
-; CHECK-P8-NEXT:    vsld v6, v6, v9
-; CHECK-P8-NEXT:    vsld v7, v7, v9
-; CHECK-P8-NEXT:    vsld v2, v2, v9
-; CHECK-P8-NEXT:    vsrad v7, v7, v9
-; CHECK-P8-NEXT:    vsrad v2, v2, v9
-; CHECK-P8-NEXT:    vsld v4, v4, v9
-; CHECK-P8-NEXT:    vsld v5, v5, v9
-; CHECK-P8-NEXT:    vsrad v6, v6, v9
-; CHECK-P8-NEXT:    vsrad v0, v0, v9
-; CHECK-P8-NEXT:    vsrad v1, v1, v9
-; CHECK-P8-NEXT:    xvcvsxddp vs2, v7
-; CHECK-P8-NEXT:    xvcvsxddp vs3, v2
-; CHECK-P8-NEXT:    vsrad v3, v3, v9
-; CHECK-P8-NEXT:    vsrad v4, v4, v9
-; CHECK-P8-NEXT:    vsrad v5, v5, v9
-; CHECK-P8-NEXT:    xvcvsxddp vs4, v6
-; CHECK-P8-NEXT:    xvcvsxddp vs1, v1
-; CHECK-P8-NEXT:    xxswapd vs2, vs2
+; CHECK-P8-NEXT:    lxvd2x v8, 0, r4
+; CHECK-P8-NEXT:    li r4, 112
+; CHECK-P8-NEXT:    vsld v4, v4, v8
+; CHECK-P8-NEXT:    vsld v3, v3, v8
+; CHECK-P8-NEXT:    vsld v5, v5, v8
+; CHECK-P8-NEXT:    vsld v1, v1, v8
+; CHECK-P8-NEXT:    vsld v0, v0, v8
+; CHECK-P8-NEXT:    vsld v2, v2, v8
+; CHECK-P8-NEXT:    vsld v6, v6, v8
+; CHECK-P8-NEXT:    vsld v7, v7, v8
+; CHECK-P8-NEXT:    vsrad v4, v4, v8
+; CHECK-P8-NEXT:    vsrad v3, v3, v8
+; CHECK-P8-NEXT:    vsrad v5, v5, v8
+; CHECK-P8-NEXT:    vsrad v1, v1, v8
+; CHECK-P8-NEXT:    vsrad v0, v0, v8
+; CHECK-P8-NEXT:    vsrad v2, v2, v8
+; CHECK-P8-NEXT:    vsrad v6, v6, v8
+; CHECK-P8-NEXT:    vsrad v7, v7, v8
+; CHECK-P8-NEXT:    xvcvsxddp vs4, v4
+; CHECK-P8-NEXT:    xvcvsxddp vs3, v3
+; CHECK-P8-NEXT:    xvcvsxddp vs7, v5
+; CHECK-P8-NEXT:    xvcvsxddp vs6, v1
 ; CHECK-P8-NEXT:    xvcvsxddp vs5, v0
-; CHECK-P8-NEXT:    xxswapd vs3, vs3
-; CHECK-P8-NEXT:    xvcvsxddp vs0, v5
-; CHECK-P8-NEXT:    xvcvsxddp vs6, v3
-; CHECK-P8-NEXT:    xvcvsxddp vs7, v4
-; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
-; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    xvcvsxddp vs2, v2
+; CHECK-P8-NEXT:    xvcvsxddp vs0, v6
+; CHECK-P8-NEXT:    xvcvsxddp vs1, v7
 ; CHECK-P8-NEXT:    xxswapd vs4, vs4
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r5
-; CHECK-P8-NEXT:    li r5, 64
+; CHECK-P8-NEXT:    xxswapd vs3, vs3
+; CHECK-P8-NEXT:    xxswapd vs7, vs7
+; CHECK-P8-NEXT:    xxswapd vs2, vs2
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-P8-NEXT:    xxswapd vs5, vs5
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
-; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    li r4, 96
+; CHECK-P8-NEXT:    xxswapd vs4, vs5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 80
+; CHECK-P8-NEXT:    stxvd2x vs7, r3, r4
+; CHECK-P8-NEXT:    li r4, 64
 ; CHECK-P8-NEXT:    xxswapd vs3, vs6
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
-; CHECK-P8-NEXT:    li r5, 32
-; CHECK-P8-NEXT:    xxswapd vs2, vs7
-; CHECK-P8-NEXT:    stxvd2x vs5, r3, r4
-; CHECK-P8-NEXT:    li r4, 16
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
+; CHECK-P8-NEXT:    li r4, 48
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
+; CHECK-P8-NEXT:    li r4, 32
 ; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    li r4, 16
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
index e25a9adeb2ceacf..e3aa898d6e516d9 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
@@ -101,21 +101,21 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r7, 16
 ; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    xvcvuxwsp vs3, vs3
 ; CHECK-P8-NEXT:    xvcvuxwsp vs0, vs0
-; CHECK-P8-NEXT:    xvcvuxwsp vs1, vs1
 ; CHECK-P8-NEXT:    xvcvuxwsp vs2, vs2
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    xvcvuxwsp vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    xvcvuxwsp vs0, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -248,21 +248,21 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r7, 16
 ; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    xvcvsxwsp vs3, vs3
 ; CHECK-P8-NEXT:    xvcvsxwsp vs0, vs0
-; CHECK-P8-NEXT:    xvcvsxwsp vs1, vs1
 ; CHECK-P8-NEXT:    xvcvsxwsp vs2, vs2
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    xvcvsxwsp vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    xvcvsxwsp vs0, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll
index 6d7ca6577665c54..5458c174ec88dc7 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll
@@ -70,21 +70,21 @@ entry:
 define void @test8elt(ptr noalias nocapture sret(<8 x double>) %agg.result, ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r7, 16
 ; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    xvcvuxddp vs3, vs3
 ; CHECK-P8-NEXT:    xvcvuxddp vs0, vs0
-; CHECK-P8-NEXT:    xvcvuxddp vs1, vs1
 ; CHECK-P8-NEXT:    xvcvuxddp vs2, vs2
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    xvcvuxddp vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    xvcvuxddp vs0, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -128,13 +128,14 @@ entry:
 define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test16elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 64
-; CHECK-P8-NEXT:    li r8, 96
-; CHECK-P8-NEXT:    li r9, 112
-; CHECK-P8-NEXT:    li r10, 80
-; CHECK-P8-NEXT:    li r11, 48
+; CHECK-P8-NEXT:    li r5, 112
+; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    li r7, 80
+; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    li r9, 48
+; CHECK-P8-NEXT:    li r10, 32
+; CHECK-P8-NEXT:    li r11, 16
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
@@ -142,23 +143,22 @@ define void @test16elt(ptr noalias nocapture sret(<16 x double>) %agg.result, pt
 ; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
 ; CHECK-P8-NEXT:    lxvd2x vs5, r4, r10
 ; CHECK-P8-NEXT:    lxvd2x vs6, r4, r11
-; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
 ; CHECK-P8-NEXT:    xvcvuxddp vs0, vs0
-; CHECK-P8-NEXT:    xvcvuxddp vs1, vs1
-; CHECK-P8-NEXT:    xvcvuxddp vs2, vs2
-; CHECK-P8-NEXT:    xvcvuxddp vs3, vs3
-; CHECK-P8-NEXT:    xvcvuxddp vs4, vs4
-; CHECK-P8-NEXT:    xvcvuxddp vs5, vs5
 ; CHECK-P8-NEXT:    xvcvuxddp vs6, vs6
-; CHECK-P8-NEXT:    xvcvuxddp vs7, vs7
-; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
+; CHECK-P8-NEXT:    xvcvuxddp vs5, vs5
+; CHECK-P8-NEXT:    xvcvuxddp vs4, vs4
+; CHECK-P8-NEXT:    xvcvuxddp vs3, vs3
+; CHECK-P8-NEXT:    xvcvuxddp vs2, vs2
+; CHECK-P8-NEXT:    xvcvuxddp vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    xvcvuxddp vs0, vs7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
 ; CHECK-P8-NEXT:    stxvd2x vs5, r3, r10
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs6, r3, r11
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs7, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt:
@@ -284,21 +284,21 @@ entry:
 define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.result, ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
+; CHECK-P8-NEXT:    li r5, 48
 ; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 48
+; CHECK-P8-NEXT:    li r7, 16
 ; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
-; CHECK-P8-NEXT:    xvcvsxddp vs3, vs3
 ; CHECK-P8-NEXT:    xvcvsxddp vs0, vs0
-; CHECK-P8-NEXT:    xvcvsxddp vs1, vs1
 ; CHECK-P8-NEXT:    xvcvsxddp vs2, vs2
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    xvcvsxddp vs1, vs1
 ; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs3, 0, r3
+; CHECK-P8-NEXT:    xvcvsxddp vs0, vs3
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -342,13 +342,14 @@ entry:
 define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.result, ptr nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test16elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    li r5, 16
-; CHECK-P8-NEXT:    li r6, 32
-; CHECK-P8-NEXT:    li r7, 64
-; CHECK-P8-NEXT:    li r8, 96
-; CHECK-P8-NEXT:    li r9, 112
-; CHECK-P8-NEXT:    li r10, 80
-; CHECK-P8-NEXT:    li r11, 48
+; CHECK-P8-NEXT:    li r5, 112
+; CHECK-P8-NEXT:    li r6, 96
+; CHECK-P8-NEXT:    li r7, 80
+; CHECK-P8-NEXT:    li r8, 64
+; CHECK-P8-NEXT:    li r9, 48
+; CHECK-P8-NEXT:    li r10, 32
+; CHECK-P8-NEXT:    li r11, 16
+; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
 ; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
 ; CHECK-P8-NEXT:    lxvd2x vs1, r4, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, r4, r7
@@ -356,23 +357,22 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
 ; CHECK-P8-NEXT:    lxvd2x vs4, r4, r9
 ; CHECK-P8-NEXT:    lxvd2x vs5, r4, r10
 ; CHECK-P8-NEXT:    lxvd2x vs6, r4, r11
-; CHECK-P8-NEXT:    lxvd2x vs7, 0, r4
 ; CHECK-P8-NEXT:    xvcvsxddp vs0, vs0
-; CHECK-P8-NEXT:    xvcvsxddp vs1, vs1
-; CHECK-P8-NEXT:    xvcvsxddp vs2, vs2
-; CHECK-P8-NEXT:    xvcvsxddp vs3, vs3
-; CHECK-P8-NEXT:    xvcvsxddp vs4, vs4
-; CHECK-P8-NEXT:    xvcvsxddp vs5, vs5
 ; CHECK-P8-NEXT:    xvcvsxddp vs6, vs6
-; CHECK-P8-NEXT:    xvcvsxddp vs7, vs7
-; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
+; CHECK-P8-NEXT:    xvcvsxddp vs5, vs5
+; CHECK-P8-NEXT:    xvcvsxddp vs4, vs4
+; CHECK-P8-NEXT:    xvcvsxddp vs3, vs3
+; CHECK-P8-NEXT:    xvcvsxddp vs2, vs2
+; CHECK-P8-NEXT:    xvcvsxddp vs1, vs1
+; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
+; CHECK-P8-NEXT:    xvcvsxddp vs0, vs7
+; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
+; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs3, r3, r8
+; CHECK-P8-NEXT:    stxvd2x vs4, r3, r9
 ; CHECK-P8-NEXT:    stxvd2x vs5, r3, r10
-; CHECK-P8-NEXT:    stxvd2x vs2, r3, r7
 ; CHECK-P8-NEXT:    stxvd2x vs6, r3, r11
-; CHECK-P8-NEXT:    stxvd2x vs1, r3, r6
-; CHECK-P8-NEXT:    stxvd2x vs0, r3, r5
-; CHECK-P8-NEXT:    stxvd2x vs7, 0, r3
+; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test16elt_signed:

diff  --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
index 99fb4cd2c5a5da0..b98aed8616509e8 100644
--- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
@@ -39,8 +39,8 @@ define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) {
 ;
 ; AIX-P8-64-LABEL: testByte:
 ; AIX-P8-64:       # %bb.0: # %entry
-; AIX-P8-64-NEXT:    addi r5, r1, -16
 ; AIX-P8-64-NEXT:    clrldi r4, r4, 60
+; AIX-P8-64-NEXT:    addi r5, r1, -16
 ; AIX-P8-64-NEXT:    stxvw4x v2, 0, r5
 ; AIX-P8-64-NEXT:    stbx r3, r5, r4
 ; AIX-P8-64-NEXT:    lxvw4x v2, 0, r5
@@ -48,11 +48,11 @@ define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) {
 ;
 ; AIX-P8-32-LABEL: testByte:
 ; AIX-P8-32:       # %bb.0: # %entry
-; AIX-P8-32-NEXT:    addi r3, r1, -16
-; AIX-P8-32-NEXT:    clrlwi r5, r6, 28
-; AIX-P8-32-NEXT:    stxvw4x v2, 0, r3
-; AIX-P8-32-NEXT:    stbx r4, r3, r5
-; AIX-P8-32-NEXT:    lxvw4x v2, 0, r3
+; AIX-P8-32-NEXT:    clrlwi r3, r6, 28
+; AIX-P8-32-NEXT:    addi r5, r1, -16
+; AIX-P8-32-NEXT:    stxvw4x v2, 0, r5
+; AIX-P8-32-NEXT:    stbx r4, r5, r3
+; AIX-P8-32-NEXT:    lxvw4x v2, 0, r5
 ; AIX-P8-32-NEXT:    blr
 entry:
   %conv = trunc i64 %b to i8
@@ -86,8 +86,8 @@ define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
 ;
 ; AIX-P8-64-LABEL: testHalf:
 ; AIX-P8-64:       # %bb.0: # %entry
-; AIX-P8-64-NEXT:    addi r5, r1, -16
 ; AIX-P8-64-NEXT:    rlwinm r4, r4, 1, 28, 30
+; AIX-P8-64-NEXT:    addi r5, r1, -16
 ; AIX-P8-64-NEXT:    stxvw4x v2, 0, r5
 ; AIX-P8-64-NEXT:    sthx r3, r5, r4
 ; AIX-P8-64-NEXT:    lxvw4x v2, 0, r5
@@ -95,11 +95,11 @@ define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
 ;
 ; AIX-P8-32-LABEL: testHalf:
 ; AIX-P8-32:       # %bb.0: # %entry
-; AIX-P8-32-NEXT:    addi r3, r1, -16
-; AIX-P8-32-NEXT:    rlwinm r5, r6, 1, 28, 30
-; AIX-P8-32-NEXT:    stxvw4x v2, 0, r3
-; AIX-P8-32-NEXT:    sthx r4, r3, r5
-; AIX-P8-32-NEXT:    lxvw4x v2, 0, r3
+; AIX-P8-32-NEXT:    rlwinm r3, r6, 1, 28, 30
+; AIX-P8-32-NEXT:    addi r5, r1, -16
+; AIX-P8-32-NEXT:    stxvw4x v2, 0, r5
+; AIX-P8-32-NEXT:    sthx r4, r5, r3
+; AIX-P8-32-NEXT:    lxvw4x v2, 0, r5
 ; AIX-P8-32-NEXT:    blr
 entry:
   %conv = trunc i64 %b to i16
@@ -133,8 +133,8 @@ define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
 ;
 ; AIX-P8-64-LABEL: testWord:
 ; AIX-P8-64:       # %bb.0: # %entry
-; AIX-P8-64-NEXT:    addi r5, r1, -16
 ; AIX-P8-64-NEXT:    rlwinm r4, r4, 2, 28, 29
+; AIX-P8-64-NEXT:    addi r5, r1, -16
 ; AIX-P8-64-NEXT:    stxvw4x v2, 0, r5
 ; AIX-P8-64-NEXT:    stwx r3, r5, r4
 ; AIX-P8-64-NEXT:    lxvw4x v2, 0, r5
@@ -142,11 +142,11 @@ define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
 ;
 ; AIX-P8-32-LABEL: testWord:
 ; AIX-P8-32:       # %bb.0: # %entry
-; AIX-P8-32-NEXT:    addi r3, r1, -16
-; AIX-P8-32-NEXT:    rlwinm r5, r6, 2, 28, 29
-; AIX-P8-32-NEXT:    stxvw4x v2, 0, r3
-; AIX-P8-32-NEXT:    stwx r4, r3, r5
-; AIX-P8-32-NEXT:    lxvw4x v2, 0, r3
+; AIX-P8-32-NEXT:    rlwinm r3, r6, 2, 28, 29
+; AIX-P8-32-NEXT:    addi r5, r1, -16
+; AIX-P8-32-NEXT:    stxvw4x v2, 0, r5
+; AIX-P8-32-NEXT:    stwx r4, r5, r3
+; AIX-P8-32-NEXT:    lxvw4x v2, 0, r5
 ; AIX-P8-32-NEXT:    blr
 entry:
   %conv = trunc i64 %b to i32
@@ -191,9 +191,9 @@ define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) {
 ; AIX-P8-32:       # %bb.0: # %entry
 ; AIX-P8-32-NEXT:    lwz r3, L..C0(r2) # %const.0
 ; AIX-P8-32-NEXT:    stw r4, -16(r1)
-; AIX-P8-32-NEXT:    addi r4, r1, -16
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
 ; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
+; AIX-P8-32-NEXT:    addi r3, r1, -16
+; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
 ; AIX-P8-32-NEXT:    lwz r3, L..C1(r2) # %const.1
 ; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
 ; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
@@ -232,8 +232,8 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
 ;
 ; AIX-P8-64-LABEL: testDoubleword:
 ; AIX-P8-64:       # %bb.0: # %entry
-; AIX-P8-64-NEXT:    addi r5, r1, -16
 ; AIX-P8-64-NEXT:    rlwinm r4, r4, 3, 28, 28
+; AIX-P8-64-NEXT:    addi r5, r1, -16
 ; AIX-P8-64-NEXT:    stxvd2x v2, 0, r5
 ; AIX-P8-64-NEXT:    stdx r3, r5, r4
 ; AIX-P8-64-NEXT:    lxvd2x v2, 0, r5
@@ -246,13 +246,13 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
 ; AIX-P8-32-NEXT:    rlwinm r7, r6, 2, 28, 29
 ; AIX-P8-32-NEXT:    stxvw4x v2, 0, r5
 ; AIX-P8-32-NEXT:    stwx r3, r5, r7
-; AIX-P8-32-NEXT:    addi r3, r6, 1
+; AIX-P8-32-NEXT:    addi r3, r1, -16
 ; AIX-P8-32-NEXT:    lxvw4x vs0, 0, r5
-; AIX-P8-32-NEXT:    addi r5, r1, -16
-; AIX-P8-32-NEXT:    rlwinm r3, r3, 2, 28, 29
-; AIX-P8-32-NEXT:    stxvw4x vs0, 0, r5
-; AIX-P8-32-NEXT:    stwx r4, r5, r3
-; AIX-P8-32-NEXT:    lxvw4x v2, 0, r5
+; AIX-P8-32-NEXT:    addi r5, r6, 1
+; AIX-P8-32-NEXT:    rlwinm r5, r5, 2, 28, 29
+; AIX-P8-32-NEXT:    stxvw4x vs0, 0, r3
+; AIX-P8-32-NEXT:    stwx r4, r3, r5
+; AIX-P8-32-NEXT:    lxvw4x v2, 0, r3
 ; AIX-P8-32-NEXT:    blr
 entry:
   %vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx
@@ -286,17 +286,17 @@ define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) {
 ;
 ; AIX-P8-32-LABEL: testDoublewordImm:
 ; AIX-P8-32:       # %bb.0: # %entry
-; AIX-P8-32-NEXT:    lwz r5, L..C2(r2) # %const.0
 ; AIX-P8-32-NEXT:    stw r3, -16(r1)
+; AIX-P8-32-NEXT:    lwz r3, L..C2(r2) # %const.0
 ; AIX-P8-32-NEXT:    stw r4, -32(r1)
+; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
 ; AIX-P8-32-NEXT:    addi r3, r1, -16
-; AIX-P8-32-NEXT:    addi r4, r1, -32
 ; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
 ; AIX-P8-32-NEXT:    lwz r3, L..C3(r2) # %const.1
-; AIX-P8-32-NEXT:    lxvw4x v3, 0, r5
 ; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
 ; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
+; AIX-P8-32-NEXT:    addi r3, r1, -32
+; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
 ; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
 ; AIX-P8-32-NEXT:    blr
 entry:
@@ -329,17 +329,17 @@ define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) {
 ;
 ; AIX-P8-32-LABEL: testDoublewordImm2:
 ; AIX-P8-32:       # %bb.0: # %entry
-; AIX-P8-32-NEXT:    lwz r5, L..C4(r2) # %const.0
 ; AIX-P8-32-NEXT:    stw r3, -16(r1)
+; AIX-P8-32-NEXT:    lwz r3, L..C4(r2) # %const.0
 ; AIX-P8-32-NEXT:    stw r4, -32(r1)
+; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
 ; AIX-P8-32-NEXT:    addi r3, r1, -16
-; AIX-P8-32-NEXT:    addi r4, r1, -32
 ; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
 ; AIX-P8-32-NEXT:    lwz r3, L..C5(r2) # %const.1
-; AIX-P8-32-NEXT:    lxvw4x v3, 0, r5
 ; AIX-P8-32-NEXT:    vperm v2, v4, v2, v3
 ; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
+; AIX-P8-32-NEXT:    addi r3, r1, -32
+; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
 ; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
 ; AIX-P8-32-NEXT:    blr
 entry:
@@ -375,11 +375,11 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
 ;
 ; AIX-P8-LABEL: testFloat1:
 ; AIX-P8:       # %bb.0: # %entry
-; AIX-P8-NEXT:    addi r3, r1, -16
-; AIX-P8-NEXT:    rlwinm r4, r4, 2, 28, 29
-; AIX-P8-NEXT:    stxvw4x v2, 0, r3
-; AIX-P8-NEXT:    stfsx f1, r3, r4
-; AIX-P8-NEXT:    lxvw4x v2, 0, r3
+; AIX-P8-NEXT:    rlwinm r3, r4, 2, 28, 29
+; AIX-P8-NEXT:    addi r4, r1, -16
+; AIX-P8-NEXT:    stxvw4x v2, 0, r4
+; AIX-P8-NEXT:    stfsx f1, r4, r3
+; AIX-P8-NEXT:    lxvw4x v2, 0, r4
 ; AIX-P8-NEXT:    blr
 entry:
   %vecins = insertelement <4 x float> %a, float %b, i32 %idx1
@@ -425,18 +425,18 @@ define <4 x float> @testFloat2(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze
 ;
 ; AIX-P8-LABEL: testFloat2:
 ; AIX-P8:       # %bb.0: # %entry
-; AIX-P8-NEXT:    lwz r7, 0(r3)
-; AIX-P8-NEXT:    addi r6, r1, -32
+; AIX-P8-NEXT:    lwz r6, 0(r3)
 ; AIX-P8-NEXT:    rlwinm r4, r4, 2, 28, 29
-; AIX-P8-NEXT:    stxvw4x v2, 0, r6
-; AIX-P8-NEXT:    stwx r7, r6, r4
-; AIX-P8-NEXT:    rlwinm r4, r5, 2, 28, 29
-; AIX-P8-NEXT:    addi r5, r1, -16
-; AIX-P8-NEXT:    lxvw4x vs0, 0, r6
+; AIX-P8-NEXT:    addi r7, r1, -32
+; AIX-P8-NEXT:    stxvw4x v2, 0, r7
+; AIX-P8-NEXT:    rlwinm r5, r5, 2, 28, 29
+; AIX-P8-NEXT:    stwx r6, r7, r4
+; AIX-P8-NEXT:    addi r4, r1, -16
+; AIX-P8-NEXT:    lxvw4x vs0, 0, r7
 ; AIX-P8-NEXT:    lwz r3, 1(r3)
-; AIX-P8-NEXT:    stxvw4x vs0, 0, r5
-; AIX-P8-NEXT:    stwx r3, r5, r4
-; AIX-P8-NEXT:    lxvw4x v2, 0, r5
+; AIX-P8-NEXT:    stxvw4x vs0, 0, r4
+; AIX-P8-NEXT:    stwx r3, r4, r5
+; AIX-P8-NEXT:    lxvw4x v2, 0, r4
 ; AIX-P8-NEXT:    blr
 entry:
   %add.ptr1 = getelementptr inbounds i8, ptr %b, i64 1
@@ -496,6 +496,7 @@ define <4 x float> @testFloat3(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze
 ; AIX-P8-64-NEXT:    lis r6, 1
 ; AIX-P8-64-NEXT:    rlwinm r4, r4, 2, 28, 29
 ; AIX-P8-64-NEXT:    addi r7, r1, -32
+; AIX-P8-64-NEXT:    rlwinm r5, r5, 2, 28, 29
 ; AIX-P8-64-NEXT:    lwzx r6, r3, r6
 ; AIX-P8-64-NEXT:    stxvw4x v2, 0, r7
 ; AIX-P8-64-NEXT:    stwx r6, r7, r4
@@ -503,11 +504,10 @@ define <4 x float> @testFloat3(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze
 ; AIX-P8-64-NEXT:    lxvw4x vs0, 0, r7
 ; AIX-P8-64-NEXT:    rldic r4, r4, 36, 27
 ; AIX-P8-64-NEXT:    lwzx r3, r3, r4
-; AIX-P8-64-NEXT:    rlwinm r4, r5, 2, 28, 29
-; AIX-P8-64-NEXT:    addi r5, r1, -16
-; AIX-P8-64-NEXT:    stxvw4x vs0, 0, r5
-; AIX-P8-64-NEXT:    stwx r3, r5, r4
-; AIX-P8-64-NEXT:    lxvw4x v2, 0, r5
+; AIX-P8-64-NEXT:    addi r4, r1, -16
+; AIX-P8-64-NEXT:    stxvw4x vs0, 0, r4
+; AIX-P8-64-NEXT:    stwx r3, r4, r5
+; AIX-P8-64-NEXT:    lxvw4x v2, 0, r4
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testFloat3:
@@ -515,16 +515,16 @@ define <4 x float> @testFloat3(<4 x float> %a, ptr %b, i32 zeroext %idx1, i32 ze
 ; AIX-P8-32-NEXT:    lis r6, 1
 ; AIX-P8-32-NEXT:    rlwinm r4, r4, 2, 28, 29
 ; AIX-P8-32-NEXT:    addi r7, r1, -32
+; AIX-P8-32-NEXT:    rlwinm r5, r5, 2, 28, 29
 ; AIX-P8-32-NEXT:    lwzx r6, r3, r6
 ; AIX-P8-32-NEXT:    stxvw4x v2, 0, r7
 ; AIX-P8-32-NEXT:    stwx r6, r7, r4
-; AIX-P8-32-NEXT:    rlwinm r4, r5, 2, 28, 29
-; AIX-P8-32-NEXT:    addi r5, r1, -16
+; AIX-P8-32-NEXT:    addi r4, r1, -16
 ; AIX-P8-32-NEXT:    lxvw4x vs0, 0, r7
 ; AIX-P8-32-NEXT:    lwz r3, 0(r3)
-; AIX-P8-32-NEXT:    stxvw4x vs0, 0, r5
-; AIX-P8-32-NEXT:    stwx r3, r5, r4
-; AIX-P8-32-NEXT:    lxvw4x v2, 0, r5
+; AIX-P8-32-NEXT:    stxvw4x vs0, 0, r4
+; AIX-P8-32-NEXT:    stwx r3, r4, r5
+; AIX-P8-32-NEXT:    lxvw4x v2, 0, r4
 ; AIX-P8-32-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i8, ptr %b, i64 65536
@@ -563,23 +563,23 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
 ; AIX-P8-64-LABEL: testFloatImm1:
 ; AIX-P8-64:       # %bb.0: # %entry
 ; AIX-P8-64-NEXT:    ld r3, L..C2(r2) # %const.0
-; AIX-P8-64-NEXT:    xscvdpspn v3, f1
-; AIX-P8-64-NEXT:    lxvw4x v4, 0, r3
+; AIX-P8-64-NEXT:    xscvdpspn v4, f1
+; AIX-P8-64-NEXT:    lxvw4x v3, 0, r3
 ; AIX-P8-64-NEXT:    ld r3, L..C3(r2) # %const.1
-; AIX-P8-64-NEXT:    vperm v2, v3, v2, v4
-; AIX-P8-64-NEXT:    lxvw4x v4, 0, r3
-; AIX-P8-64-NEXT:    vperm v2, v2, v3, v4
+; AIX-P8-64-NEXT:    vperm v2, v4, v2, v3
+; AIX-P8-64-NEXT:    lxvw4x v3, 0, r3
+; AIX-P8-64-NEXT:    vperm v2, v2, v4, v3
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testFloatImm1:
 ; AIX-P8-32:       # %bb.0: # %entry
 ; AIX-P8-32-NEXT:    lwz r3, L..C6(r2) # %const.0
-; AIX-P8-32-NEXT:    xscvdpspn v3, f1
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
+; AIX-P8-32-NEXT:    xscvdpspn v4, f1
+; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
 ; AIX-P8-32-NEXT:    lwz r3, L..C7(r2) # %const.1
-; AIX-P8-32-NEXT:    vperm v2, v3, v2, v4
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
-; AIX-P8-32-NEXT:    vperm v2, v2, v3, v4
+; AIX-P8-32-NEXT:    vperm v2, v4, v2, v3
+; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
+; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
 ; AIX-P8-32-NEXT:    blr
 entry:
   %vecins = insertelement <4 x float> %a, float %b, i32 0
@@ -617,26 +617,26 @@ define <4 x float> @testFloatImm2(<4 x float> %a, ptr %b) {
 ; AIX-P8-64-LABEL: testFloatImm2:
 ; AIX-P8-64:       # %bb.0: # %entry
 ; AIX-P8-64-NEXT:    ld r4, L..C4(r2) # %const.0
-; AIX-P8-64-NEXT:    lxsiwzx v3, 0, r3
-; AIX-P8-64-NEXT:    li r5, 4
-; AIX-P8-64-NEXT:    lxvw4x v4, 0, r4
-; AIX-P8-64-NEXT:    ld r4, L..C5(r2) # %const.1
-; AIX-P8-64-NEXT:    vperm v2, v3, v2, v4
-; AIX-P8-64-NEXT:    lxsiwzx v3, r3, r5
-; AIX-P8-64-NEXT:    lxvw4x v4, 0, r4
+; AIX-P8-64-NEXT:    lxsiwzx v4, 0, r3
+; AIX-P8-64-NEXT:    lxvw4x v3, 0, r4
+; AIX-P8-64-NEXT:    li r4, 4
+; AIX-P8-64-NEXT:    vperm v2, v4, v2, v3
+; AIX-P8-64-NEXT:    lxsiwzx v3, r3, r4
+; AIX-P8-64-NEXT:    ld r3, L..C5(r2) # %const.1
+; AIX-P8-64-NEXT:    lxvw4x v4, 0, r3
 ; AIX-P8-64-NEXT:    vperm v2, v2, v3, v4
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testFloatImm2:
 ; AIX-P8-32:       # %bb.0: # %entry
 ; AIX-P8-32-NEXT:    lwz r4, L..C8(r2) # %const.0
-; AIX-P8-32-NEXT:    lxsiwzx v3, 0, r3
-; AIX-P8-32-NEXT:    li r5, 4
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
-; AIX-P8-32-NEXT:    lwz r4, L..C9(r2) # %const.1
-; AIX-P8-32-NEXT:    vperm v2, v3, v2, v4
-; AIX-P8-32-NEXT:    lxsiwzx v3, r3, r5
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
+; AIX-P8-32-NEXT:    lxsiwzx v4, 0, r3
+; AIX-P8-32-NEXT:    lxvw4x v3, 0, r4
+; AIX-P8-32-NEXT:    li r4, 4
+; AIX-P8-32-NEXT:    vperm v2, v4, v2, v3
+; AIX-P8-32-NEXT:    lxsiwzx v3, r3, r4
+; AIX-P8-32-NEXT:    lwz r3, L..C9(r2) # %const.1
+; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
 ; AIX-P8-32-NEXT:    vperm v2, v2, v3, v4
 ; AIX-P8-32-NEXT:    blr
 entry:
@@ -684,24 +684,24 @@ define <4 x float> @testFloatImm3(<4 x float> %a, ptr %b) {
 ;
 ; AIX-P8-64-LABEL: testFloatImm3:
 ; AIX-P8-64:       # %bb.0: # %entry
+; AIX-P8-64-NEXT:    lis r4, 4
+; AIX-P8-64-NEXT:    lxsiwzx v3, r3, r4
 ; AIX-P8-64-NEXT:    ld r4, L..C6(r2) # %const.0
-; AIX-P8-64-NEXT:    lis r5, 4
-; AIX-P8-64-NEXT:    lxsiwzx v3, r3, r5
-; AIX-P8-64-NEXT:    li r5, 1
-; AIX-P8-64-NEXT:    rldic r5, r5, 38, 25
 ; AIX-P8-64-NEXT:    lxvw4x v4, 0, r4
-; AIX-P8-64-NEXT:    ld r4, L..C7(r2) # %const.1
+; AIX-P8-64-NEXT:    li r4, 1
+; AIX-P8-64-NEXT:    rldic r4, r4, 38, 25
 ; AIX-P8-64-NEXT:    vperm v2, v3, v2, v4
-; AIX-P8-64-NEXT:    lxsiwzx v3, r3, r5
-; AIX-P8-64-NEXT:    lxvw4x v4, 0, r4
+; AIX-P8-64-NEXT:    lxsiwzx v3, r3, r4
+; AIX-P8-64-NEXT:    ld r3, L..C7(r2) # %const.1
+; AIX-P8-64-NEXT:    lxvw4x v4, 0, r3
 ; AIX-P8-64-NEXT:    vperm v2, v2, v3, v4
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testFloatImm3:
 ; AIX-P8-32:       # %bb.0: # %entry
+; AIX-P8-32-NEXT:    lis r4, 4
+; AIX-P8-32-NEXT:    lxsiwzx v3, r3, r4
 ; AIX-P8-32-NEXT:    lwz r4, L..C10(r2) # %const.0
-; AIX-P8-32-NEXT:    lis r5, 4
-; AIX-P8-32-NEXT:    lxsiwzx v3, r3, r5
 ; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
 ; AIX-P8-32-NEXT:    lwz r4, L..C11(r2) # %const.1
 ; AIX-P8-32-NEXT:    vperm v2, v3, v2, v4
@@ -747,20 +747,20 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1)
 ;
 ; AIX-P8-64-LABEL: testDouble1:
 ; AIX-P8-64:       # %bb.0: # %entry
-; AIX-P8-64-NEXT:    addi r3, r1, -16
-; AIX-P8-64-NEXT:    rlwinm r4, r4, 3, 28, 28
-; AIX-P8-64-NEXT:    stxvd2x v2, 0, r3
-; AIX-P8-64-NEXT:    stfdx f1, r3, r4
-; AIX-P8-64-NEXT:    lxvd2x v2, 0, r3
+; AIX-P8-64-NEXT:    rlwinm r3, r4, 3, 28, 28
+; AIX-P8-64-NEXT:    addi r4, r1, -16
+; AIX-P8-64-NEXT:    stxvd2x v2, 0, r4
+; AIX-P8-64-NEXT:    stfdx f1, r4, r3
+; AIX-P8-64-NEXT:    lxvd2x v2, 0, r4
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testDouble1:
 ; AIX-P8-32:       # %bb.0: # %entry
-; AIX-P8-32-NEXT:    addi r3, r1, -16
-; AIX-P8-32-NEXT:    rlwinm r4, r5, 3, 28, 28
-; AIX-P8-32-NEXT:    stxvd2x v2, 0, r3
-; AIX-P8-32-NEXT:    stfdx f1, r3, r4
-; AIX-P8-32-NEXT:    lxvd2x v2, 0, r3
+; AIX-P8-32-NEXT:    rlwinm r3, r5, 3, 28, 28
+; AIX-P8-32-NEXT:    addi r4, r1, -16
+; AIX-P8-32-NEXT:    stxvd2x v2, 0, r4
+; AIX-P8-32-NEXT:    stfdx f1, r4, r3
+; AIX-P8-32-NEXT:    lxvd2x v2, 0, r4
 ; AIX-P8-32-NEXT:    blr
 entry:
   %vecins = insertelement <2 x double> %a, double %b, i32 %idx1
@@ -807,35 +807,35 @@ define <2 x double> @testDouble2(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32
 ;
 ; AIX-P8-64-LABEL: testDouble2:
 ; AIX-P8-64:       # %bb.0: # %entry
-; AIX-P8-64-NEXT:    ld r7, 0(r3)
-; AIX-P8-64-NEXT:    addi r6, r1, -32
+; AIX-P8-64-NEXT:    ld r6, 0(r3)
 ; AIX-P8-64-NEXT:    rlwinm r4, r4, 3, 28, 28
-; AIX-P8-64-NEXT:    stxvd2x v2, 0, r6
-; AIX-P8-64-NEXT:    stdx r7, r6, r4
+; AIX-P8-64-NEXT:    addi r7, r1, -32
+; AIX-P8-64-NEXT:    stxvd2x v2, 0, r7
+; AIX-P8-64-NEXT:    rlwinm r5, r5, 3, 28, 28
+; AIX-P8-64-NEXT:    stdx r6, r7, r4
 ; AIX-P8-64-NEXT:    li r4, 1
-; AIX-P8-64-NEXT:    lxvd2x vs0, 0, r6
+; AIX-P8-64-NEXT:    lxvd2x vs0, 0, r7
 ; AIX-P8-64-NEXT:    ldx r3, r3, r4
-; AIX-P8-64-NEXT:    rlwinm r4, r5, 3, 28, 28
-; AIX-P8-64-NEXT:    addi r5, r1, -16
-; AIX-P8-64-NEXT:    stxvd2x vs0, 0, r5
-; AIX-P8-64-NEXT:    stdx r3, r5, r4
-; AIX-P8-64-NEXT:    lxvd2x v2, 0, r5
+; AIX-P8-64-NEXT:    addi r4, r1, -16
+; AIX-P8-64-NEXT:    stxvd2x vs0, 0, r4
+; AIX-P8-64-NEXT:    stdx r3, r4, r5
+; AIX-P8-64-NEXT:    lxvd2x v2, 0, r4
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testDouble2:
 ; AIX-P8-32:       # %bb.0: # %entry
 ; AIX-P8-32-NEXT:    lfd f0, 0(r3)
-; AIX-P8-32-NEXT:    addi r6, r1, -32
 ; AIX-P8-32-NEXT:    rlwinm r4, r4, 3, 28, 28
+; AIX-P8-32-NEXT:    addi r6, r1, -32
+; AIX-P8-32-NEXT:    rlwinm r5, r5, 3, 28, 28
 ; AIX-P8-32-NEXT:    stxvd2x v2, 0, r6
 ; AIX-P8-32-NEXT:    stfdx f0, r6, r4
-; AIX-P8-32-NEXT:    addi r4, r1, -16
 ; AIX-P8-32-NEXT:    lxvd2x vs0, 0, r6
 ; AIX-P8-32-NEXT:    lfd f1, 1(r3)
-; AIX-P8-32-NEXT:    rlwinm r3, r5, 3, 28, 28
-; AIX-P8-32-NEXT:    stxvd2x vs0, 0, r4
-; AIX-P8-32-NEXT:    stfdx f1, r4, r3
-; AIX-P8-32-NEXT:    lxvd2x v2, 0, r4
+; AIX-P8-32-NEXT:    addi r3, r1, -16
+; AIX-P8-32-NEXT:    stxvd2x vs0, 0, r3
+; AIX-P8-32-NEXT:    stfdx f1, r3, r5
+; AIX-P8-32-NEXT:    lxvd2x v2, 0, r3
 ; AIX-P8-32-NEXT:    blr
 entry:
   %add.ptr1 = getelementptr inbounds i8, ptr %b, i64 1
@@ -895,35 +895,35 @@ define <2 x double> @testDouble3(<2 x double> %a, ptr %b, i32 zeroext %idx1, i32
 ; AIX-P8-64-NEXT:    lis r6, 1
 ; AIX-P8-64-NEXT:    rlwinm r4, r4, 3, 28, 28
 ; AIX-P8-64-NEXT:    addi r7, r1, -32
-; AIX-P8-64-NEXT:    li r8, 1
+; AIX-P8-64-NEXT:    rlwinm r5, r5, 3, 28, 28
 ; AIX-P8-64-NEXT:    ldx r6, r3, r6
 ; AIX-P8-64-NEXT:    stxvd2x v2, 0, r7
 ; AIX-P8-64-NEXT:    stdx r6, r7, r4
-; AIX-P8-64-NEXT:    rldic r4, r8, 36, 27
+; AIX-P8-64-NEXT:    li r4, 1
 ; AIX-P8-64-NEXT:    lxvd2x vs0, 0, r7
+; AIX-P8-64-NEXT:    rldic r4, r4, 36, 27
 ; AIX-P8-64-NEXT:    ldx r3, r3, r4
-; AIX-P8-64-NEXT:    rlwinm r4, r5, 3, 28, 28
-; AIX-P8-64-NEXT:    addi r5, r1, -16
-; AIX-P8-64-NEXT:    stxvd2x vs0, 0, r5
-; AIX-P8-64-NEXT:    stdx r3, r5, r4
-; AIX-P8-64-NEXT:    lxvd2x v2, 0, r5
+; AIX-P8-64-NEXT:    addi r4, r1, -16
+; AIX-P8-64-NEXT:    stxvd2x vs0, 0, r4
+; AIX-P8-64-NEXT:    stdx r3, r4, r5
+; AIX-P8-64-NEXT:    lxvd2x v2, 0, r4
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testDouble3:
 ; AIX-P8-32:       # %bb.0: # %entry
 ; AIX-P8-32-NEXT:    lis r6, 1
 ; AIX-P8-32-NEXT:    rlwinm r4, r4, 3, 28, 28
+; AIX-P8-32-NEXT:    rlwinm r5, r5, 3, 28, 28
 ; AIX-P8-32-NEXT:    lfdx f0, r3, r6
 ; AIX-P8-32-NEXT:    addi r6, r1, -32
 ; AIX-P8-32-NEXT:    stxvd2x v2, 0, r6
 ; AIX-P8-32-NEXT:    stfdx f0, r6, r4
-; AIX-P8-32-NEXT:    addi r4, r1, -16
 ; AIX-P8-32-NEXT:    lxvd2x vs0, 0, r6
 ; AIX-P8-32-NEXT:    lfd f1, 0(r3)
-; AIX-P8-32-NEXT:    rlwinm r3, r5, 3, 28, 28
-; AIX-P8-32-NEXT:    stxvd2x vs0, 0, r4
-; AIX-P8-32-NEXT:    stfdx f1, r4, r3
-; AIX-P8-32-NEXT:    lxvd2x v2, 0, r4
+; AIX-P8-32-NEXT:    addi r3, r1, -16
+; AIX-P8-32-NEXT:    stxvd2x vs0, 0, r3
+; AIX-P8-32-NEXT:    stfdx f1, r3, r5
+; AIX-P8-32-NEXT:    lxvd2x v2, 0, r3
 ; AIX-P8-32-NEXT:    blr
 entry:
   %add.ptr = getelementptr inbounds i8, ptr %b, i64 65536
@@ -1128,18 +1128,18 @@ define dso_local <4 x float> @testInsertDoubleToFloat(<4 x float> %a, double %b)
 ; AIX-P8-64:       # %bb.0: # %entry
 ; AIX-P8-64-NEXT:    xsrsp f0, f1
 ; AIX-P8-64-NEXT:    ld r3, L..C8(r2) # %const.0
-; AIX-P8-64-NEXT:    lxvw4x v4, 0, r3
-; AIX-P8-64-NEXT:    xscvdpspn v3, f0
-; AIX-P8-64-NEXT:    vperm v2, v2, v3, v4
+; AIX-P8-64-NEXT:    lxvw4x v3, 0, r3
+; AIX-P8-64-NEXT:    xscvdpspn v4, f0
+; AIX-P8-64-NEXT:    vperm v2, v2, v4, v3
 ; AIX-P8-64-NEXT:    blr
 ;
 ; AIX-P8-32-LABEL: testInsertDoubleToFloat:
 ; AIX-P8-32:       # %bb.0: # %entry
 ; AIX-P8-32-NEXT:    xsrsp f0, f1
 ; AIX-P8-32-NEXT:    lwz r3, L..C12(r2) # %const.0
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
-; AIX-P8-32-NEXT:    xscvdpspn v3, f0
-; AIX-P8-32-NEXT:    vperm v2, v2, v3, v4
+; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
+; AIX-P8-32-NEXT:    xscvdpspn v4, f0
+; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
 ; AIX-P8-32-NEXT:    blr
 entry:
   %conv = fptrunc double %b to float

diff  --git a/llvm/test/CodeGen/PowerPC/vec_select.ll b/llvm/test/CodeGen/PowerPC/vec_select.ll
index 2a839675b2a87a0..fae63b264d19f8d 100644
--- a/llvm/test/CodeGen/PowerPC/vec_select.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_select.ll
@@ -122,14 +122,14 @@ entry:
 define <1 x i128> @test7(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c, <1 x i128> %d) {
 ; CHECK-VSX-LABEL: test7:
 ; CHECK-VSX:       # %bb.0: # %entry
-; CHECK-VSX-NEXT:    xxswapd vs0, v5
-; CHECK-VSX-NEXT:    xxswapd vs1, v4
 ; CHECK-VSX-NEXT:    mfvsrd r3, v5
 ; CHECK-VSX-NEXT:    mfvsrd r4, v4
-; CHECK-VSX-NEXT:    mffprd r5, f0
-; CHECK-VSX-NEXT:    mffprd r6, f1
 ; CHECK-VSX-NEXT:    xor r3, r4, r3
-; CHECK-VSX-NEXT:    xor r4, r6, r5
+; CHECK-VSX-NEXT:    xxswapd vs0, v5
+; CHECK-VSX-NEXT:    xxswapd vs1, v4
+; CHECK-VSX-NEXT:    mffprd r4, f0
+; CHECK-VSX-NEXT:    mffprd r5, f1
+; CHECK-VSX-NEXT:    xor r4, r5, r4
 ; CHECK-VSX-NEXT:    or. r3, r4, r3
 ; CHECK-VSX-NEXT:    bclr 12, eq, 0
 ; CHECK-VSX-NEXT:  # %bb.1: # %entry
@@ -139,15 +139,15 @@ define <1 x i128> @test7(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c, <1 x i128>
 ; CHECK-NOVSX-BE-LABEL: test7:
 ; CHECK-NOVSX-BE:       # %bb.0: # %entry
 ; CHECK-NOVSX-BE-NEXT:    addi r3, r1, -16
-; CHECK-NOVSX-BE-NEXT:    addi r4, r1, -32
 ; CHECK-NOVSX-BE-NEXT:    stvx v5, 0, r3
-; CHECK-NOVSX-BE-NEXT:    stvx v4, 0, r4
+; CHECK-NOVSX-BE-NEXT:    addi r3, r1, -32
+; CHECK-NOVSX-BE-NEXT:    stvx v4, 0, r3
 ; CHECK-NOVSX-BE-NEXT:    ld r3, -16(r1)
 ; CHECK-NOVSX-BE-NEXT:    ld r4, -32(r1)
-; CHECK-NOVSX-BE-NEXT:    ld r5, -8(r1)
-; CHECK-NOVSX-BE-NEXT:    ld r6, -24(r1)
+; CHECK-NOVSX-BE-NEXT:    ld r5, -24(r1)
 ; CHECK-NOVSX-BE-NEXT:    xor r3, r4, r3
-; CHECK-NOVSX-BE-NEXT:    xor r4, r6, r5
+; CHECK-NOVSX-BE-NEXT:    ld r4, -8(r1)
+; CHECK-NOVSX-BE-NEXT:    xor r4, r5, r4
 ; CHECK-NOVSX-BE-NEXT:    or. r3, r4, r3
 ; CHECK-NOVSX-BE-NEXT:    bclr 12, eq, 0
 ; CHECK-NOVSX-BE-NEXT:  # %bb.1: # %entry
@@ -157,15 +157,15 @@ define <1 x i128> @test7(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c, <1 x i128>
 ; CHECK-NOVSX-LE-LABEL: test7:
 ; CHECK-NOVSX-LE:       # %bb.0: # %entry
 ; CHECK-NOVSX-LE-NEXT:    addi r3, r1, -16
-; CHECK-NOVSX-LE-NEXT:    addi r4, r1, -32
 ; CHECK-NOVSX-LE-NEXT:    stvx v5, 0, r3
-; CHECK-NOVSX-LE-NEXT:    stvx v4, 0, r4
+; CHECK-NOVSX-LE-NEXT:    addi r3, r1, -32
+; CHECK-NOVSX-LE-NEXT:    stvx v4, 0, r3
 ; CHECK-NOVSX-LE-NEXT:    ld r3, -8(r1)
 ; CHECK-NOVSX-LE-NEXT:    ld r4, -24(r1)
-; CHECK-NOVSX-LE-NEXT:    ld r5, -16(r1)
-; CHECK-NOVSX-LE-NEXT:    ld r6, -32(r1)
+; CHECK-NOVSX-LE-NEXT:    ld r5, -32(r1)
 ; CHECK-NOVSX-LE-NEXT:    xor r3, r4, r3
-; CHECK-NOVSX-LE-NEXT:    xor r4, r6, r5
+; CHECK-NOVSX-LE-NEXT:    ld r4, -16(r1)
+; CHECK-NOVSX-LE-NEXT:    xor r4, r5, r4
 ; CHECK-NOVSX-LE-NEXT:    or. r3, r4, r3
 ; CHECK-NOVSX-LE-NEXT:    bclr 12, eq, 0
 ; CHECK-NOVSX-LE-NEXT:  # %bb.1: # %entry

diff  --git a/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll b/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
index 2756716609bd0dd..065a09e0513259d 100644
--- a/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
@@ -28,9 +28,9 @@ define void @VPKUDUM(ptr %A, ptr %B) {
 ; CHECK-LABEL: VPKUDUM:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    lxvd2x 1, 0, 4
 ; CHECK-NEXT:    xxswapd 34, 0
-; CHECK-NEXT:    xxswapd 35, 1
+; CHECK-NEXT:    lxvd2x 0, 0, 4
+; CHECK-NEXT:    xxswapd 35, 0
 ; CHECK-NEXT:    vpkudum 2, 3, 2
 ; CHECK-NEXT:    xxswapd 0, 34
 ; CHECK-NEXT:    stxvd2x 0, 0, 3

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index b94dac42e1fff34..9cabe0c17d849d9 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -44,29 +44,29 @@ define <3 x float> @constrained_vector_fdiv_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-LABEL: constrained_vector_fdiv_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 35
-; PC64LE-NEXT:    xxsldwi 1, 35, 35, 3
+; PC64LE-NEXT:    xxswapd 1, 34
 ; PC64LE-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 3, 34
-; PC64LE-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
-; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
-; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xscvspdpn 3, 3
-; PC64LE-NEXT:    xscvspdpn 4, 4
-; PC64LE-NEXT:    xscvspdpn 5, 5
+; PC64LE-NEXT:    xxsldwi 2, 35, 35, 3
+; PC64LE-NEXT:    xxsldwi 3, 34, 34, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
+; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
+; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
+; PC64LE-NEXT:    xsdivsp 0, 1, 0
+; PC64LE-NEXT:    xscvspdpn 1, 2
+; PC64LE-NEXT:    xscvspdpn 2, 3
 ; PC64LE-NEXT:    xsdivsp 1, 2, 1
-; PC64LE-NEXT:    xsdivsp 0, 3, 0
-; PC64LE-NEXT:    xsdivsp 2, 5, 4
-; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xscvspdpn 1, 5
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xscvspdpn 0, 4
+; PC64LE-NEXT:    xsdivsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fdiv_v3f32:
@@ -107,17 +107,17 @@ entry:
 define <3 x double> @constrained_vector_fdiv_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fdiv_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
 ; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
-; PC64LE-NEXT:    xsdivdp 3, 3, 6
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
+; PC64LE-NEXT:    xsdivdp 3, 3, 6
 ; PC64LE-NEXT:    xvdivdp 2, 1, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fdiv_v3f64:
@@ -217,8 +217,8 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    xxswapd 2, 63
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    xxswapd 2, 63
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
@@ -283,13 +283,13 @@ define <3 x float> @constrained_vector_frem_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    xxsldwi 2, 35, 35, 1
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    std 0, 112(1)
-; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
 ; PC64LE-NEXT:    bl fmodf
@@ -308,21 +308,21 @@ define <3 x float> @constrained_vector_frem_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    bl fmodf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
-; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -392,8 +392,8 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stfd 28, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    fmr 28, 2
-; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    fmr 2, 4
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stfd 29, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    fmr 30, 5
@@ -410,20 +410,20 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
-; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    fmr 1, 29
+; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    lfd 29, 72(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    lfd 28, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    fmr 1, 0
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -493,33 +493,33 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-NEXT:    vmr 28, 2
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 96
+; PC64LE-NEXT:    xxlor 1, 60, 60
 ; PC64LE-NEXT:    vmr 29, 3
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 4
 ; PC64LE-NEXT:    li 3, 112
+; PC64LE-NEXT:    xxlor 2, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 5
-; PC64LE-NEXT:    xxlor 1, 60, 60
-; PC64LE-NEXT:    xxlor 2, 62, 62
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 59, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 60
-; PC64LE-NEXT:    xxswapd 2, 62
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    xxswapd 2, 62
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 59, 1
-; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    xxlor 1, 61, 61
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 60, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 61
-; PC64LE-NEXT:    xxswapd 2, 63
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    xxswapd 2, 63
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
@@ -641,29 +641,29 @@ define <3 x float> @constrained_vector_fmul_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-LABEL: constrained_vector_fmul_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 35
-; PC64LE-NEXT:    xxsldwi 1, 35, 35, 3
+; PC64LE-NEXT:    xxswapd 1, 34
 ; PC64LE-NEXT:    addis 3, 2, .LCPI12_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 3, 34
-; PC64LE-NEXT:    addi 3, 3, .LCPI12_0 at toc@l
-; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
-; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xscvspdpn 3, 3
-; PC64LE-NEXT:    xscvspdpn 4, 4
-; PC64LE-NEXT:    xscvspdpn 5, 5
+; PC64LE-NEXT:    xxsldwi 2, 35, 35, 3
+; PC64LE-NEXT:    xxsldwi 3, 34, 34, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI12_0 at toc@l
+; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
+; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
+; PC64LE-NEXT:    xsmulsp 0, 1, 0
+; PC64LE-NEXT:    xscvspdpn 1, 2
+; PC64LE-NEXT:    xscvspdpn 2, 3
 ; PC64LE-NEXT:    xsmulsp 1, 2, 1
-; PC64LE-NEXT:    xsmulsp 0, 3, 0
-; PC64LE-NEXT:    xsmulsp 2, 5, 4
-; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xscvspdpn 1, 5
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xscvspdpn 0, 4
+; PC64LE-NEXT:    xsmulsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fmul_v3f32:
@@ -704,17 +704,17 @@ entry:
 define <3 x double> @constrained_vector_fmul_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fmul_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
 ; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
-; PC64LE-NEXT:    xsmuldp 3, 3, 6
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
+; PC64LE-NEXT:    xsmuldp 3, 3, 6
 ; PC64LE-NEXT:    xvmuldp 2, 1, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fmul_v3f64:
@@ -803,29 +803,29 @@ define <3 x float> @constrained_vector_fadd_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-LABEL: constrained_vector_fadd_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 35
-; PC64LE-NEXT:    xxsldwi 1, 35, 35, 3
+; PC64LE-NEXT:    xxswapd 1, 34
 ; PC64LE-NEXT:    addis 3, 2, .LCPI17_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 3, 34
-; PC64LE-NEXT:    addi 3, 3, .LCPI17_0 at toc@l
-; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
-; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xscvspdpn 3, 3
-; PC64LE-NEXT:    xscvspdpn 4, 4
-; PC64LE-NEXT:    xscvspdpn 5, 5
+; PC64LE-NEXT:    xxsldwi 2, 35, 35, 3
+; PC64LE-NEXT:    xxsldwi 3, 34, 34, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI17_0 at toc@l
+; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
+; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
+; PC64LE-NEXT:    xsaddsp 0, 1, 0
+; PC64LE-NEXT:    xscvspdpn 1, 2
+; PC64LE-NEXT:    xscvspdpn 2, 3
 ; PC64LE-NEXT:    xsaddsp 1, 2, 1
-; PC64LE-NEXT:    xsaddsp 0, 3, 0
-; PC64LE-NEXT:    xsaddsp 2, 5, 4
-; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xscvspdpn 1, 5
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xscvspdpn 0, 4
+; PC64LE-NEXT:    xsaddsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fadd_v3f32:
@@ -866,17 +866,17 @@ entry:
 define <3 x double> @constrained_vector_fadd_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fadd_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
 ; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
-; PC64LE-NEXT:    xsadddp 3, 3, 6
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
+; PC64LE-NEXT:    xsadddp 3, 3, 6
 ; PC64LE-NEXT:    xvadddp 2, 1, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fadd_v3f64:
@@ -965,29 +965,29 @@ define <3 x float> @constrained_vector_fsub_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-LABEL: constrained_vector_fsub_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 35
-; PC64LE-NEXT:    xxsldwi 1, 35, 35, 3
+; PC64LE-NEXT:    xxswapd 1, 34
 ; PC64LE-NEXT:    addis 3, 2, .LCPI22_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 3, 34
-; PC64LE-NEXT:    addi 3, 3, .LCPI22_0 at toc@l
-; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
-; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xscvspdpn 3, 3
-; PC64LE-NEXT:    xscvspdpn 4, 4
-; PC64LE-NEXT:    xscvspdpn 5, 5
+; PC64LE-NEXT:    xxsldwi 2, 35, 35, 3
+; PC64LE-NEXT:    xxsldwi 3, 34, 34, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI22_0 at toc@l
+; PC64LE-NEXT:    xxsldwi 5, 34, 34, 1
+; PC64LE-NEXT:    xxsldwi 4, 35, 35, 1
+; PC64LE-NEXT:    xssubsp 0, 1, 0
+; PC64LE-NEXT:    xscvspdpn 1, 2
+; PC64LE-NEXT:    xscvspdpn 2, 3
 ; PC64LE-NEXT:    xssubsp 1, 2, 1
-; PC64LE-NEXT:    xssubsp 0, 3, 0
-; PC64LE-NEXT:    xssubsp 2, 5, 4
-; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xscvspdpn 1, 5
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xscvspdpn 0, 4
+; PC64LE-NEXT:    xssubsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fsub_v3f32:
@@ -1028,17 +1028,17 @@ entry:
 define <3 x double> @constrained_vector_fsub_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fsub_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
 ; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
-; PC64LE-NEXT:    xssubdp 3, 3, 6
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
+; PC64LE-NEXT:    xssubdp 3, 3, 6
 ; PC64LE-NEXT:    xvsubdp 2, 1, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fsub_v3f64:
@@ -1124,24 +1124,24 @@ entry:
 define <3 x float> @constrained_vector_sqrt_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sqrt_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI27_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI27_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xxswapd 36, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI27_0 at toc@l
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xssqrtsp 0, 0
 ; PC64LE-NEXT:    xssqrtsp 1, 1
-; PC64LE-NEXT:    xssqrtsp 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xscvspdpn 0, 2
+; PC64LE-NEXT:    xssqrtsp 0, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sqrt_v3f32:
@@ -1181,8 +1181,8 @@ define <3 x double> @constrained_vector_sqrt_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xssqrtdp 3, 3
 ; PC64LE-NEXT:    xvsqrtdp 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sqrt_v3f64:
@@ -1277,8 +1277,8 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    xxswapd 2, 63
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    xxswapd 2, 63
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
@@ -1343,13 +1343,13 @@ define <3 x float> @constrained_vector_pow_v3f32(<3 x float> %x, <3 x float> %y)
 ; PC64LE-NEXT:    xxsldwi 2, 35, 35, 1
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    std 0, 112(1)
-; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
 ; PC64LE-NEXT:    bl powf
@@ -1368,21 +1368,21 @@ define <3 x float> @constrained_vector_pow_v3f32(<3 x float> %x, <3 x float> %y)
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    bl powf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI32_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI32_0 at toc@l
-; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -1452,8 +1452,8 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stfd 28, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    fmr 28, 2
-; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    fmr 2, 4
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stfd 29, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    fmr 30, 5
@@ -1470,20 +1470,20 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
-; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    fmr 1, 29
+; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    lfd 29, 72(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    lfd 28, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    fmr 1, 0
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -1553,33 +1553,33 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-NEXT:    vmr 28, 2
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 96
+; PC64LE-NEXT:    xxlor 1, 60, 60
 ; PC64LE-NEXT:    vmr 29, 3
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 4
 ; PC64LE-NEXT:    li 3, 112
+; PC64LE-NEXT:    xxlor 2, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 5
-; PC64LE-NEXT:    xxlor 1, 60, 60
-; PC64LE-NEXT:    xxlor 2, 62, 62
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 59, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 60
-; PC64LE-NEXT:    xxswapd 2, 62
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    xxswapd 2, 62
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 59, 1
-; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    xxlor 1, 61, 61
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 60, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 61
-; PC64LE-NEXT:    xxswapd 2, 63
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    xxswapd 2, 63
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
@@ -1704,17 +1704,17 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    std 30, 80(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    clrldi 30, 5, 32
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
@@ -1780,10 +1780,10 @@ define <3 x float> @constrained_vector_powi_v3f32(<3 x float> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    std 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    clrldi 30, 5, 32
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    mr 4, 30
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
 ; PC64LE-NEXT:    bl __powisf2
@@ -1800,20 +1800,20 @@ define <3 x float> @constrained_vector_powi_v3f32(<3 x float> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl __powisf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI37_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    ld 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
 ; PC64LE-NEXT:    addi 3, 3, .LCPI37_0 at toc@l
-; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -1881,12 +1881,12 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    std 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    clrldi 30, 6, 32
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 3
 ; PC64LE-NEXT:    fmr 30, 2
+; PC64LE-NEXT:    mr 4, 30
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 63, 1, 1
@@ -1896,19 +1896,19 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    fmr 1, 31
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    ld 30, 64(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -1970,32 +1970,32 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    std 0, 128(1)
 ; PC64LE-NEXT:    std 30, 96(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    clrldi 30, 7, 32
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
@@ -2172,9 +2172,9 @@ define <3 x float> @constrained_vector_sin_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl sinf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
@@ -2187,19 +2187,19 @@ define <3 x float> @constrained_vector_sin_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl sinf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI42_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI42_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xscvdpspn 34, 31
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2274,14 +2274,14 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2339,9 +2339,9 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
@@ -2523,9 +2523,9 @@ define <3 x float> @constrained_vector_cos_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl cosf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
@@ -2538,20 +2538,20 @@ define <3 x float> @constrained_vector_cos_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl cosf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI47_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI47_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xscvdpspn 34, 31
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
-; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -2625,14 +2625,14 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2690,9 +2690,9 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
@@ -2874,9 +2874,9 @@ define <3 x float> @constrained_vector_exp_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl expf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
@@ -2889,19 +2889,19 @@ define <3 x float> @constrained_vector_exp_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl expf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI52_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI52_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xscvdpspn 34, 31
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2976,14 +2976,14 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3041,9 +3041,9 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
@@ -3225,9 +3225,9 @@ define <3 x float> @constrained_vector_exp2_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl exp2f
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
@@ -3240,19 +3240,19 @@ define <3 x float> @constrained_vector_exp2_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl exp2f
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI57_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI57_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xscvdpspn 34, 31
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3327,14 +3327,14 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3392,9 +3392,9 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
@@ -3576,9 +3576,9 @@ define <3 x float> @constrained_vector_log_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl logf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
@@ -3591,19 +3591,19 @@ define <3 x float> @constrained_vector_log_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl logf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI62_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI62_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xscvdpspn 34, 31
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3678,14 +3678,14 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3743,9 +3743,9 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
@@ -3927,9 +3927,9 @@ define <3 x float> @constrained_vector_log10_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl log10f
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
@@ -3942,19 +3942,19 @@ define <3 x float> @constrained_vector_log10_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl log10f
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI67_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI67_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xscvdpspn 34, 31
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4029,14 +4029,14 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4094,9 +4094,9 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
@@ -4278,9 +4278,9 @@ define <3 x float> @constrained_vector_log2_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl log2f
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
@@ -4293,19 +4293,19 @@ define <3 x float> @constrained_vector_log2_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl log2f
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI72_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI72_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xscvdpspn 34, 31
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4380,14 +4380,14 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4445,9 +4445,9 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
@@ -4564,24 +4564,24 @@ entry:
 define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_rint_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI77_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI77_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xxswapd 36, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI77_0 at toc@l
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xsrdpic 0, 0
 ; PC64LE-NEXT:    xsrdpic 1, 1
-; PC64LE-NEXT:    xsrdpic 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xscvspdpn 0, 2
+; PC64LE-NEXT:    xsrdpic 0, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_rint_v3f32:
@@ -4621,8 +4621,8 @@ define <3 x double> @constrained_vector_rint_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xsrdpic 3, 3
 ; PC64LE-NEXT:    xvrdpic 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_rint_v3f64:
@@ -4768,9 +4768,9 @@ define <3 x float> @constrained_vector_nearbyint_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl nearbyintf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
@@ -4783,19 +4783,19 @@ define <3 x float> @constrained_vector_nearbyint_v3f32(<3 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl nearbyintf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI82_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI82_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xscvdpspn 34, 31
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4870,14 +4870,14 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4935,9 +4935,9 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    li 3, 80
+; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    xxlor 1, 62, 62
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
@@ -5073,13 +5073,13 @@ define <3 x float> @constrained_vector_maxnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE-NEXT:    xxsldwi 2, 35, 35, 1
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    std 0, 112(1)
-; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
 ; PC64LE-NEXT:    bl fmaxf
@@ -5098,21 +5098,21 @@ define <3 x float> @constrained_vector_maxnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    bl fmaxf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI87_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI87_0 at toc@l
-; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -5178,26 +5178,26 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
 ; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    std 0, 80(1)
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    std 0, 80(1)
 ; PC64LE-NEXT:    fmr 2, 6
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    xvmaxdp 63, 1, 0
 ; PC64LE-NEXT:    fmr 1, 3
 ; PC64LE-NEXT:    bl fmax
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -5315,13 +5315,13 @@ define <3 x float> @constrained_vector_minnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE-NEXT:    xxsldwi 2, 35, 35, 1
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    std 0, 112(1)
-; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
 ; PC64LE-NEXT:    bl fminf
@@ -5340,21 +5340,21 @@ define <3 x float> @constrained_vector_minnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE-NEXT:    xscvspdpn 2, 2
 ; PC64LE-NEXT:    bl fminf
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xscvdpspn 1, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI92_0 at toc@ha
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 36, 31
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    addi 3, 3, .LCPI92_0 at toc@l
-; PC64LE-NEXT:    xscvdpspn 34, 31
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 0, 1
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -5420,26 +5420,26 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
 ; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    std 0, 80(1)
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    std 0, 80(1)
 ; PC64LE-NEXT:    fmr 2, 6
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    xvmindp 63, 1, 0
 ; PC64LE-NEXT:    fmr 1, 3
 ; PC64LE-NEXT:    bl fmin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xxswapd 0, 63
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxswapd 1, 63
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    fmr 1, 0
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -5528,9 +5528,9 @@ define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32(<2 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvdpsxws 0, 0
 ; PC64LE-NEXT:    xscvdpsxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mffprwz 4, 1
 ; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
@@ -5560,23 +5560,23 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
 ; PC64LE-NEXT:    xxswapd 1, 34
-; PC64LE-NEXT:    addis 3, 2, .LCPI97_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI97_0 at toc@l
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvdpsxws 0, 0
 ; PC64LE-NEXT:    xscvdpsxws 1, 1
-; PC64LE-NEXT:    xscvdpsxws 2, 2
-; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    mffprwz 3, 0
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwz 1, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI97_0 at toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI97_0 at toc@l
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwz 1, 4
-; PC64LE-NEXT:    mtfprwz 3, 5
 ; PC64LE-NEXT:    xxswapd 35, 0
-; PC64LE-NEXT:    mffprwz 3, 2
-; PC64LE-NEXT:    xxmrghw 34, 3, 1
+; PC64LE-NEXT:    xscvspdpn 0, 2
+; PC64LE-NEXT:    xscvdpsxws 0, 0
+; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    mtvsrwz 36, 3
 ; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
@@ -5657,9 +5657,9 @@ define <2 x i64> @constrained_vector_fptosi_v2i64_v2f32(<2 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvdpsxds 0, 0
 ; PC64LE-NEXT:    xscvdpsxds 1, 1
 ; PC64LE-NEXT:    mffprd 3, 0
-; PC64LE-NEXT:    mffprd 4, 1
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    mtfprd 1, 4
+; PC64LE-NEXT:    mffprd 3, 1
+; PC64LE-NEXT:    mtfprd 1, 3
 ; PC64LE-NEXT:    xxmrghd 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
@@ -5687,16 +5687,16 @@ define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32(<3 x float> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
 ; PC64LE-NEXT:    xxswapd 1, 34
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvspdpn 0, 0
-; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvdpsxds 0, 0
-; PC64LE-NEXT:    xscvdpsxds 1, 1
-; PC64LE-NEXT:    xscvdpsxds 2, 2
 ; PC64LE-NEXT:    mffprd 3, 0
-; PC64LE-NEXT:    mffprd 4, 1
-; PC64LE-NEXT:    mffprd 5, 2
+; PC64LE-NEXT:    xscvspdpn 0, 1
+; PC64LE-NEXT:    xscvdpsxds 0, 0
+; PC64LE-NEXT:    mffprd 4, 0
+; PC64LE-NEXT:    xscvspdpn 0, 2
+; PC64LE-NEXT:    xscvdpsxds 0, 0
+; PC64LE-NEXT:    mffprd 5, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v3i64_v3f32:
@@ -5725,26 +5725,27 @@ define <4 x i64> @constrained_vector_fptosi_v4i64_v4f32(<4 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptosi_v4i64_v4f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xscvspdpn 1, 34
-; PC64LE-NEXT:    xxswapd 2, 34
-; PC64LE-NEXT:    xxsldwi 3, 34, 34, 1
+; PC64LE-NEXT:    xxswapd 1, 34
 ; PC64LE-NEXT:    xscvspdpn 0, 0
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xscvspdpn 3, 3
-; PC64LE-NEXT:    xscvdpsxds 1, 1
+; PC64LE-NEXT:    xscvspdpn 1, 1
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvdpsxds 0, 0
-; PC64LE-NEXT:    xscvdpsxds 2, 2
-; PC64LE-NEXT:    xscvdpsxds 3, 3
+; PC64LE-NEXT:    xscvdpsxds 1, 1
+; PC64LE-NEXT:    mffprd 3, 0
+; PC64LE-NEXT:    mtfprd 0, 3
 ; PC64LE-NEXT:    mffprd 3, 1
 ; PC64LE-NEXT:    mtfprd 1, 3
+; PC64LE-NEXT:    xxmrghd 36, 1, 0
+; PC64LE-NEXT:    xscvspdpn 0, 34
+; PC64LE-NEXT:    xscvspdpn 1, 2
+; PC64LE-NEXT:    vmr 2, 4
+; PC64LE-NEXT:    xscvdpsxds 0, 0
+; PC64LE-NEXT:    xscvdpsxds 1, 1
 ; PC64LE-NEXT:    mffprd 3, 0
-; PC64LE-NEXT:    mffprd 4, 2
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    mffprd 3, 3
-; PC64LE-NEXT:    mtfprd 2, 4
-; PC64LE-NEXT:    mtfprd 3, 3
-; PC64LE-NEXT:    xxmrghd 34, 2, 0
-; PC64LE-NEXT:    xxmrghd 35, 1, 3
+; PC64LE-NEXT:    mffprd 3, 1
+; PC64LE-NEXT:    mtfprd 1, 3
+; PC64LE-NEXT:    xxmrghd 35, 0, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v4i64_v4f32:
@@ -5798,14 +5799,14 @@ entry:
 define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64(<2 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptosi_v2i32_v2f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xscvdpsxws 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xscvdpsxws 0, 0
 ; PC64LE-NEXT:    mffprwz 3, 1
-; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    mtfprwz 1, 3
+; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mtfprwz 1, 4
-; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v2i32_v2f64:
@@ -5830,18 +5831,18 @@ define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptosi_v3i32_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xscvdpsxws 0, 1
-; PC64LE-NEXT:    addis 3, 2, .LCPI105_0 at toc@ha
 ; PC64LE-NEXT:    xscvdpsxws 1, 2
-; PC64LE-NEXT:    addi 3, 3, .LCPI105_0 at toc@l
-; PC64LE-NEXT:    xscvdpsxws 2, 3
-; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    mffprwz 3, 0
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwz 1, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI105_0 at toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI105_0 at toc@l
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwz 1, 4
-; PC64LE-NEXT:    mtfprwz 3, 5
 ; PC64LE-NEXT:    xxswapd 35, 0
-; PC64LE-NEXT:    mffprwz 3, 2
-; PC64LE-NEXT:    xxmrghw 34, 3, 1
+; PC64LE-NEXT:    xscvdpsxws 0, 3
+; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    mtvsrwz 36, 3
 ; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
@@ -5874,19 +5875,19 @@ define <4 x i32> @constrained_vector_fptosi_v4i32_v4f64(<4 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptosi_v4i32_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
-; PC64LE-NEXT:    xxswapd 2, 35
-; PC64LE-NEXT:    xscvdpsxws 1, 34
-; PC64LE-NEXT:    xscvdpsxws 3, 35
+; PC64LE-NEXT:    xscvdpsxws 2, 34
+; PC64LE-NEXT:    xxswapd 1, 35
 ; PC64LE-NEXT:    xscvdpsxws 0, 0
-; PC64LE-NEXT:    xscvdpsxws 2, 2
-; PC64LE-NEXT:    mffprwz 3, 1
-; PC64LE-NEXT:    mffprwz 4, 3
-; PC64LE-NEXT:    mffprwz 5, 0
-; PC64LE-NEXT:    mffprwz 6, 2
-; PC64LE-NEXT:    rldimi 5, 3, 32, 0
-; PC64LE-NEXT:    rldimi 6, 4, 32, 0
-; PC64LE-NEXT:    mtfprd 0, 5
-; PC64LE-NEXT:    mtfprd 1, 6
+; PC64LE-NEXT:    xscvdpsxws 1, 1
+; PC64LE-NEXT:    mffprwz 3, 2
+; PC64LE-NEXT:    xscvdpsxws 2, 35
+; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    rldimi 4, 3, 32, 0
+; PC64LE-NEXT:    mffprwz 3, 2
+; PC64LE-NEXT:    mtfprd 0, 4
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    rldimi 4, 3, 32, 0
+; PC64LE-NEXT:    mtfprd 1, 4
 ; PC64LE-NEXT:    xxmrghd 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
@@ -5953,11 +5954,11 @@ define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptosi_v3i64_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xscvdpsxds 0, 1
-; PC64LE-NEXT:    xscvdpsxds 1, 2
-; PC64LE-NEXT:    xscvdpsxds 2, 3
 ; PC64LE-NEXT:    mffprd 3, 0
-; PC64LE-NEXT:    mffprd 4, 1
-; PC64LE-NEXT:    mffprd 5, 2
+; PC64LE-NEXT:    xscvdpsxds 0, 2
+; PC64LE-NEXT:    mffprd 4, 0
+; PC64LE-NEXT:    xscvdpsxds 0, 3
+; PC64LE-NEXT:    mffprd 5, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptosi_v3i64_v3f64:
@@ -6024,9 +6025,9 @@ define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32(<2 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvdpuxws 0, 0
 ; PC64LE-NEXT:    xscvdpuxws 1, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mffprwz 4, 1
 ; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
@@ -6056,23 +6057,23 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32(<3 x float> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
 ; PC64LE-NEXT:    xxswapd 1, 34
-; PC64LE-NEXT:    addis 3, 2, .LCPI113_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI113_0 at toc@l
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvdpuxws 0, 0
 ; PC64LE-NEXT:    xscvdpuxws 1, 1
-; PC64LE-NEXT:    xscvdpuxws 2, 2
-; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    mffprwz 3, 0
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwz 1, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI113_0 at toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI113_0 at toc@l
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwz 1, 4
-; PC64LE-NEXT:    mtfprwz 3, 5
 ; PC64LE-NEXT:    xxswapd 35, 0
-; PC64LE-NEXT:    mffprwz 3, 2
-; PC64LE-NEXT:    xxmrghw 34, 3, 1
+; PC64LE-NEXT:    xscvspdpn 0, 2
+; PC64LE-NEXT:    xscvdpuxws 0, 0
+; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    mtvsrwz 36, 3
 ; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
@@ -6153,9 +6154,9 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32(<2 x float> %x) #0 {
 ; PC64LE-NEXT:    xscvdpuxds 0, 0
 ; PC64LE-NEXT:    xscvdpuxds 1, 1
 ; PC64LE-NEXT:    mffprd 3, 0
-; PC64LE-NEXT:    mffprd 4, 1
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    mtfprd 1, 4
+; PC64LE-NEXT:    mffprd 3, 1
+; PC64LE-NEXT:    mtfprd 1, 3
 ; PC64LE-NEXT:    xxmrghd 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
@@ -6183,16 +6184,16 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32(<3 x float> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
 ; PC64LE-NEXT:    xxswapd 1, 34
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvspdpn 0, 0
-; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvdpuxds 0, 0
-; PC64LE-NEXT:    xscvdpuxds 1, 1
-; PC64LE-NEXT:    xscvdpuxds 2, 2
 ; PC64LE-NEXT:    mffprd 3, 0
-; PC64LE-NEXT:    mffprd 4, 1
-; PC64LE-NEXT:    mffprd 5, 2
+; PC64LE-NEXT:    xscvspdpn 0, 1
+; PC64LE-NEXT:    xscvdpuxds 0, 0
+; PC64LE-NEXT:    mffprd 4, 0
+; PC64LE-NEXT:    xscvspdpn 0, 2
+; PC64LE-NEXT:    xscvdpuxds 0, 0
+; PC64LE-NEXT:    mffprd 5, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v3i64_v3f32:
@@ -6221,26 +6222,27 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32(<4 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptoui_v4i64_v4f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xscvspdpn 1, 34
-; PC64LE-NEXT:    xxswapd 2, 34
-; PC64LE-NEXT:    xxsldwi 3, 34, 34, 1
+; PC64LE-NEXT:    xxswapd 1, 34
 ; PC64LE-NEXT:    xscvspdpn 0, 0
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xscvspdpn 3, 3
-; PC64LE-NEXT:    xscvdpuxds 1, 1
+; PC64LE-NEXT:    xscvspdpn 1, 1
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xscvdpuxds 0, 0
-; PC64LE-NEXT:    xscvdpuxds 2, 2
-; PC64LE-NEXT:    xscvdpuxds 3, 3
+; PC64LE-NEXT:    xscvdpuxds 1, 1
+; PC64LE-NEXT:    mffprd 3, 0
+; PC64LE-NEXT:    mtfprd 0, 3
 ; PC64LE-NEXT:    mffprd 3, 1
 ; PC64LE-NEXT:    mtfprd 1, 3
+; PC64LE-NEXT:    xxmrghd 36, 1, 0
+; PC64LE-NEXT:    xscvspdpn 0, 34
+; PC64LE-NEXT:    xscvspdpn 1, 2
+; PC64LE-NEXT:    vmr 2, 4
+; PC64LE-NEXT:    xscvdpuxds 0, 0
+; PC64LE-NEXT:    xscvdpuxds 1, 1
 ; PC64LE-NEXT:    mffprd 3, 0
-; PC64LE-NEXT:    mffprd 4, 2
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    mffprd 3, 3
-; PC64LE-NEXT:    mtfprd 2, 4
-; PC64LE-NEXT:    mtfprd 3, 3
-; PC64LE-NEXT:    xxmrghd 34, 2, 0
-; PC64LE-NEXT:    xxmrghd 35, 1, 3
+; PC64LE-NEXT:    mffprd 3, 1
+; PC64LE-NEXT:    mtfprd 1, 3
+; PC64LE-NEXT:    xxmrghd 35, 0, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v4i64_v4f32:
@@ -6293,14 +6295,14 @@ entry:
 define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64(<2 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptoui_v2i32_v2f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xscvdpuxws 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xscvdpuxws 0, 0
 ; PC64LE-NEXT:    mffprwz 3, 1
-; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    mtfprwz 1, 3
+; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mtfprwz 1, 4
-; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v2i32_v2f64:
@@ -6325,18 +6327,18 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptoui_v3i32_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xscvdpuxws 0, 1
-; PC64LE-NEXT:    addis 3, 2, .LCPI121_0 at toc@ha
 ; PC64LE-NEXT:    xscvdpuxws 1, 2
+; PC64LE-NEXT:    mffprwz 3, 0
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwz 1, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI121_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI121_0 at toc@l
-; PC64LE-NEXT:    xscvdpuxws 2, 3
-; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwz 1, 4
-; PC64LE-NEXT:    mtfprwz 3, 5
 ; PC64LE-NEXT:    xxswapd 35, 0
-; PC64LE-NEXT:    mffprwz 3, 2
-; PC64LE-NEXT:    xxmrghw 34, 3, 1
+; PC64LE-NEXT:    xscvdpuxws 0, 3
+; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    mtvsrwz 36, 3
 ; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
@@ -6369,19 +6371,19 @@ define <4 x i32> @constrained_vector_fptoui_v4i32_v4f64(<4 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptoui_v4i32_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
-; PC64LE-NEXT:    xxswapd 2, 35
-; PC64LE-NEXT:    xscvdpuxws 1, 34
-; PC64LE-NEXT:    xscvdpuxws 3, 35
+; PC64LE-NEXT:    xscvdpuxws 2, 34
+; PC64LE-NEXT:    xxswapd 1, 35
 ; PC64LE-NEXT:    xscvdpuxws 0, 0
-; PC64LE-NEXT:    xscvdpuxws 2, 2
-; PC64LE-NEXT:    mffprwz 3, 1
-; PC64LE-NEXT:    mffprwz 4, 3
-; PC64LE-NEXT:    mffprwz 5, 0
-; PC64LE-NEXT:    mffprwz 6, 2
-; PC64LE-NEXT:    rldimi 5, 3, 32, 0
-; PC64LE-NEXT:    rldimi 6, 4, 32, 0
-; PC64LE-NEXT:    mtfprd 0, 5
-; PC64LE-NEXT:    mtfprd 1, 6
+; PC64LE-NEXT:    xscvdpuxws 1, 1
+; PC64LE-NEXT:    mffprwz 3, 2
+; PC64LE-NEXT:    xscvdpuxws 2, 35
+; PC64LE-NEXT:    mffprwz 4, 0
+; PC64LE-NEXT:    rldimi 4, 3, 32, 0
+; PC64LE-NEXT:    mffprwz 3, 2
+; PC64LE-NEXT:    mtfprd 0, 4
+; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    rldimi 4, 3, 32, 0
+; PC64LE-NEXT:    mtfprd 1, 4
 ; PC64LE-NEXT:    xxmrghd 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
@@ -6448,11 +6450,11 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptoui_v3i64_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xscvdpuxds 0, 1
-; PC64LE-NEXT:    xscvdpuxds 1, 2
-; PC64LE-NEXT:    xscvdpuxds 2, 3
 ; PC64LE-NEXT:    mffprd 3, 0
-; PC64LE-NEXT:    mffprd 4, 1
-; PC64LE-NEXT:    mffprd 5, 2
+; PC64LE-NEXT:    xscvdpuxds 0, 2
+; PC64LE-NEXT:    mffprd 4, 0
+; PC64LE-NEXT:    xscvdpuxds 0, 3
+; PC64LE-NEXT:    mffprd 5, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptoui_v3i64_v3f64:
@@ -6540,17 +6542,17 @@ define <3 x float> @constrained_vector_fptrunc_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_fptrunc_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xsrsp 0, 1
-; PC64LE-NEXT:    addis 3, 2, .LCPI129_0 at toc@ha
 ; PC64LE-NEXT:    xsrsp 1, 2
+; PC64LE-NEXT:    addis 3, 2, .LCPI129_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI129_0 at toc@l
-; PC64LE-NEXT:    xsrsp 2, 3
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xsrsp 0, 3
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
@@ -6646,8 +6648,8 @@ define <3 x double> @constrained_vector_fpext_v3f32(<3 x float> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    xxswapd 1, 34
-; PC64LE-NEXT:    xxsldwi 4, 34, 34, 3
 ; PC64LE-NEXT:    xscvspdpn 3, 0
+; PC64LE-NEXT:    xxsldwi 4, 34, 34, 3
 ; PC64LE-NEXT:    xscvspdpn 2, 1
 ; PC64LE-NEXT:    xscvspdpn 1, 4
 ; PC64LE-NEXT:    blr
@@ -6673,13 +6675,14 @@ define <4 x double> @constrained_vector_fpext_v4f32(<4 x float> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
 ; PC64LE-NEXT:    xxswapd 1, 34
-; PC64LE-NEXT:    xxsldwi 3, 34, 34, 1
-; PC64LE-NEXT:    xscvspdpn 2, 34
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 3, 3
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
-; PC64LE-NEXT:    xxmrghd 35, 2, 3
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
+; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    xxmrghd 0, 1, 0
+; PC64LE-NEXT:    xscvspdpn 1, 34
+; PC64LE-NEXT:    xxlor 34, 0, 0
+; PC64LE-NEXT:    xxmrghd 35, 1, 2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fpext_v4f32:
@@ -6739,24 +6742,24 @@ entry:
 define <3 x float> @constrained_vector_ceil_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_ceil_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI137_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI137_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xxswapd 36, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI137_0 at toc@l
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xsrdpip 0, 0
 ; PC64LE-NEXT:    xsrdpip 1, 1
-; PC64LE-NEXT:    xsrdpip 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xscvspdpn 0, 2
+; PC64LE-NEXT:    xsrdpip 0, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_ceil_v3f32:
@@ -6795,8 +6798,8 @@ define <3 x double> @constrained_vector_ceil_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xsrdpip 3, 3
 ; PC64LE-NEXT:    xvrdpip 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
@@ -6855,24 +6858,24 @@ entry:
 define <3 x float> @constrained_vector_floor_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_floor_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI141_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI141_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xxswapd 36, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI141_0 at toc@l
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xsrdpim 0, 0
 ; PC64LE-NEXT:    xsrdpim 1, 1
-; PC64LE-NEXT:    xsrdpim 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xscvspdpn 0, 2
+; PC64LE-NEXT:    xsrdpim 0, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_floor_v3f32:
@@ -6911,8 +6914,8 @@ define <3 x double> @constrained_vector_floor_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xsrdpim 3, 3
 ; PC64LE-NEXT:    xvrdpim 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_floor_v3f64:
@@ -6970,24 +6973,24 @@ entry:
 define <3 x float> @constrained_vector_round_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_round_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI145_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI145_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xxswapd 36, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI145_0 at toc@l
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xsrdpi 0, 0
 ; PC64LE-NEXT:    xsrdpi 1, 1
-; PC64LE-NEXT:    xsrdpi 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xscvspdpn 0, 2
+; PC64LE-NEXT:    xsrdpi 0, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_round_v3f32:
@@ -7027,8 +7030,8 @@ define <3 x double> @constrained_vector_round_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xsrdpi 3, 3
 ; PC64LE-NEXT:    xvrdpi 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_round_v3f64:
@@ -7086,24 +7089,24 @@ entry:
 define <3 x float> @constrained_vector_trunc_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_trunc_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    xxswapd 1, 34
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxsldwi 1, 34, 34, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI149_0 at toc@ha
-; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI149_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 3, 0, 3
 ; PC64LE-NEXT:    xscvspdpn 0, 0
 ; PC64LE-NEXT:    xscvspdpn 1, 1
-; PC64LE-NEXT:    xscvspdpn 2, 2
-; PC64LE-NEXT:    xxswapd 36, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI149_0 at toc@l
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    xsrdpiz 0, 0
 ; PC64LE-NEXT:    xsrdpiz 1, 1
-; PC64LE-NEXT:    xsrdpiz 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    xscvspdpn 0, 2
+; PC64LE-NEXT:    xsrdpiz 0, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_trunc_v3f32:
@@ -7142,8 +7145,8 @@ define <3 x double> @constrained_vector_trunc_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    xsrdpiz 3, 3
 ; PC64LE-NEXT:    xvrdpiz 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
@@ -7283,9 +7286,9 @@ define <2 x double> @constrained_vector_sitofp_v2f64_v2i32(<2 x i32> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mffprwz 4, 1
 ; PC64LE-NEXT:    mtfprwa 0, 3
-; PC64LE-NEXT:    mtfprwa 1, 4
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwa 1, 3
 ; PC64LE-NEXT:    xscvsxddp 0, 0
 ; PC64LE-NEXT:    xscvsxddp 1, 1
 ; PC64LE-NEXT:    xxmrghd 34, 1, 0
@@ -7317,9 +7320,9 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mffprwz 4, 1
 ; PC64LE-NEXT:    mtfprwa 0, 3
-; PC64LE-NEXT:    mtfprwa 1, 4
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwa 1, 3
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
@@ -7370,16 +7373,16 @@ entry:
 define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v2f32_v2i64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    mfvsrd 3, 34
-; PC64LE-NEXT:    mffprd 4, 0
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    mtfprd 1, 3
+; PC64LE-NEXT:    mffprd 3, 0
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    mtfprd 1, 4
-; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v2f32_v2i64:
@@ -7406,16 +7409,16 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v3f64_v3i32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
-; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    mfvsrwz 3, 34
-; PC64LE-NEXT:    mtfprwa 3, 3
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mffprwz 4, 1
 ; PC64LE-NEXT:    mtfprwa 0, 3
-; PC64LE-NEXT:    mtfprwa 2, 4
+; PC64LE-NEXT:    mffprwz 3, 2
 ; PC64LE-NEXT:    xscvsxddp 1, 0
-; PC64LE-NEXT:    xscvsxddp 2, 2
-; PC64LE-NEXT:    xscvsxddp 3, 3
+; PC64LE-NEXT:    mtfprwa 0, 3
+; PC64LE-NEXT:    mfvsrwz 3, 34
+; PC64LE-NEXT:    xscvsxddp 2, 0
+; PC64LE-NEXT:    mtfprwa 0, 3
+; PC64LE-NEXT:    xscvsxddp 3, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f64_v3i32:
@@ -7445,23 +7448,23 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
+; PC64LE-NEXT:    mffprwz 3, 0
+; PC64LE-NEXT:    mtfprwa 0, 3
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwa 1, 3
+; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    addis 3, 2, .LCPI161_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI161_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 3, 0, 3
-; PC64LE-NEXT:    mffprwz 4, 0
-; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwa 0, 4
-; PC64LE-NEXT:    xxswapd 36, 3
-; PC64LE-NEXT:    mtfprwa 1, 5
-; PC64LE-NEXT:    mfvsrwz 4, 34
-; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    mtfprwa 2, 4
-; PC64LE-NEXT:    xscvsxdsp 2, 2
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
 ; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    mfvsrwz 3, 34
+; PC64LE-NEXT:    xxswapd 36, 0
+; PC64LE-NEXT:    mtfprwa 0, 3
+; PC64LE-NEXT:    xscvsxdsp 0, 0
+; PC64LE-NEXT:    xscvdpspn 34, 0
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
@@ -7499,11 +7502,11 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v3f64_v3i64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    mtfprd 2, 4
-; PC64LE-NEXT:    mtfprd 3, 5
 ; PC64LE-NEXT:    xscvsxddp 1, 0
-; PC64LE-NEXT:    xscvsxddp 2, 2
-; PC64LE-NEXT:    xscvsxddp 3, 3
+; PC64LE-NEXT:    mtfprd 0, 4
+; PC64LE-NEXT:    xscvsxddp 2, 0
+; PC64LE-NEXT:    mtfprd 0, 5
+; PC64LE-NEXT:    xscvsxddp 3, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f64_v3i64:
@@ -7527,20 +7530,20 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v3f32_v3i64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    addis 6, 2, .LCPI163_0 at toc@ha
 ; PC64LE-NEXT:    mtfprd 1, 4
-; PC64LE-NEXT:    addi 3, 6, .LCPI163_0 at toc@l
+; PC64LE-NEXT:    addis 3, 2, .LCPI163_0 at toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI163_0 at toc@l
 ; PC64LE-NEXT:    xscvsxdsp 0, 0
 ; PC64LE-NEXT:    xscvsxdsp 1, 1
-; PC64LE-NEXT:    mtfprd 2, 5
-; PC64LE-NEXT:    xscvsxdsp 2, 2
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    mtfprd 0, 5
+; PC64LE-NEXT:    xscvsxdsp 0, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64:
@@ -7572,22 +7575,23 @@ define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v4f64_v4i32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
-; PC64LE-NEXT:    mfvsrwz 3, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    mtfprwa 2, 3
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 3
+; PC64LE-NEXT:    mtfprwa 0, 3
+; PC64LE-NEXT:    mffprwz 3, 1
 ; PC64LE-NEXT:    mtfprwa 1, 3
-; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mtfprwa 3, 4
-; PC64LE-NEXT:    xscvsxddp 0, 2
+; PC64LE-NEXT:    xscvsxddp 0, 0
+; PC64LE-NEXT:    mfvsrwz 3, 34
+; PC64LE-NEXT:    xscvsxddp 1, 1
+; PC64LE-NEXT:    xxmrghd 0, 1, 0
+; PC64LE-NEXT:    mtfprwa 1, 3
+; PC64LE-NEXT:    mffprwz 3, 2
+; PC64LE-NEXT:    xxlor 34, 0, 0
 ; PC64LE-NEXT:    mtfprwa 2, 3
 ; PC64LE-NEXT:    xscvsxddp 1, 1
-; PC64LE-NEXT:    xscvsxddp 3, 3
 ; PC64LE-NEXT:    xscvsxddp 2, 2
-; PC64LE-NEXT:    xxmrghd 34, 3, 1
-; PC64LE-NEXT:    xxmrghd 35, 2, 0
+; PC64LE-NEXT:    xxmrghd 35, 2, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sitofp_v4f64_v4i32:
@@ -7622,8 +7626,8 @@ entry:
 define <4 x float> @constrained_vector_sitofp_v4f32_v4i16(<4 x i16> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v4f32_v4i16:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    vmrglh 2, 2, 2
 ; PC64LE-NEXT:    vspltisw 3, 8
+; PC64LE-NEXT:    vmrglh 2, 2, 2
 ; PC64LE-NEXT:    vadduwm 3, 3, 3
 ; PC64LE-NEXT:    vslw 2, 2, 3
 ; PC64LE-NEXT:    vsraw 2, 2, 3
@@ -7685,24 +7689,24 @@ entry:
 define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sitofp_v4f32_v4i64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    mfvsrd 3, 34
-; PC64LE-NEXT:    xxswapd 2, 35
-; PC64LE-NEXT:    mfvsrd 4, 35
-; PC64LE-NEXT:    mtfprd 1, 3
-; PC64LE-NEXT:    mffprd 3, 0
-; PC64LE-NEXT:    mtfprd 0, 4
-; PC64LE-NEXT:    mtfprd 3, 3
-; PC64LE-NEXT:    mffprd 3, 2
-; PC64LE-NEXT:    xscvsxdsp 1, 1
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxswapd 1, 35
 ; PC64LE-NEXT:    mtfprd 2, 3
-; PC64LE-NEXT:    xscvsxdsp 0, 0
-; PC64LE-NEXT:    xscvsxdsp 3, 3
+; PC64LE-NEXT:    mfvsrd 3, 35
+; PC64LE-NEXT:    mtfprd 3, 3
+; PC64LE-NEXT:    mffprd 3, 0
 ; PC64LE-NEXT:    xscvsxdsp 2, 2
-; PC64LE-NEXT:    xxmrghd 0, 0, 1
-; PC64LE-NEXT:    xxmrghd 1, 2, 3
-; PC64LE-NEXT:    xvcvdpsp 34, 0
-; PC64LE-NEXT:    xvcvdpsp 35, 1
+; PC64LE-NEXT:    mtfprd 0, 3
+; PC64LE-NEXT:    mffprd 3, 1
+; PC64LE-NEXT:    xscvsxdsp 3, 3
+; PC64LE-NEXT:    mtfprd 1, 3
+; PC64LE-NEXT:    xscvsxdsp 0, 0
+; PC64LE-NEXT:    xscvsxdsp 1, 1
+; PC64LE-NEXT:    xxmrghd 2, 3, 2
+; PC64LE-NEXT:    xvcvdpsp 34, 2
+; PC64LE-NEXT:    xxmrghd 0, 1, 0
+; PC64LE-NEXT:    xvcvdpsp 35, 0
 ; PC64LE-NEXT:    vmrgew 2, 2, 3
 ; PC64LE-NEXT:    blr
 ;
@@ -7849,9 +7853,9 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mffprwz 4, 1
 ; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    xscvuxddp 0, 0
 ; PC64LE-NEXT:    xscvuxddp 1, 1
 ; PC64LE-NEXT:    xxmrghd 34, 1, 0
@@ -7883,9 +7887,9 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 {
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mffprwz 4, 1
 ; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mtfprwz 1, 4
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwz 1, 3
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
 ; PC64LE-NEXT:    xscvdpspn 0, 0
@@ -7936,16 +7940,16 @@ entry:
 define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_uitofp_v2f32_v2i64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    mfvsrd 3, 34
-; PC64LE-NEXT:    mffprd 4, 0
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    mtfprd 1, 3
+; PC64LE-NEXT:    mffprd 3, 0
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    mtfprd 1, 4
-; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxmrghw 34, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v2f32_v2i64:
@@ -7972,16 +7976,16 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_uitofp_v3f64_v3i32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
-; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    mfvsrwz 3, 34
-; PC64LE-NEXT:    mtfprwz 3, 3
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 1
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mffprwz 4, 1
 ; PC64LE-NEXT:    mtfprwz 0, 3
-; PC64LE-NEXT:    mtfprwz 2, 4
+; PC64LE-NEXT:    mffprwz 3, 2
 ; PC64LE-NEXT:    xscvuxddp 1, 0
-; PC64LE-NEXT:    xscvuxddp 2, 2
-; PC64LE-NEXT:    xscvuxddp 3, 3
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mfvsrwz 3, 34
+; PC64LE-NEXT:    xscvuxddp 2, 0
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    xscvuxddp 3, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v3f64_v3i32:
@@ -8011,23 +8015,23 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
+; PC64LE-NEXT:    mffprwz 3, 0
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mffprwz 3, 1
+; PC64LE-NEXT:    mtfprwz 1, 3
+; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    addis 3, 2, .LCPI179_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI179_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 3, 0, 3
-; PC64LE-NEXT:    mffprwz 4, 0
-; PC64LE-NEXT:    mffprwz 5, 1
-; PC64LE-NEXT:    mtfprwz 0, 4
-; PC64LE-NEXT:    xxswapd 36, 3
-; PC64LE-NEXT:    mtfprwz 1, 5
-; PC64LE-NEXT:    mfvsrwz 4, 34
-; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
-; PC64LE-NEXT:    mtfprwz 2, 4
-; PC64LE-NEXT:    xscvuxdsp 2, 2
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
 ; PC64LE-NEXT:    xxmrghw 35, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    mfvsrwz 3, 34
+; PC64LE-NEXT:    xxswapd 36, 0
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    xscvuxdsp 0, 0
+; PC64LE-NEXT:    xscvdpspn 34, 0
 ; PC64LE-NEXT:    vperm 2, 2, 3, 4
 ; PC64LE-NEXT:    blr
 ;
@@ -8065,11 +8069,11 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_uitofp_v3f64_v3i64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    mtfprd 2, 4
-; PC64LE-NEXT:    mtfprd 3, 5
 ; PC64LE-NEXT:    xscvuxddp 1, 0
-; PC64LE-NEXT:    xscvuxddp 2, 2
-; PC64LE-NEXT:    xscvuxddp 3, 3
+; PC64LE-NEXT:    mtfprd 0, 4
+; PC64LE-NEXT:    xscvuxddp 2, 0
+; PC64LE-NEXT:    mtfprd 0, 5
+; PC64LE-NEXT:    xscvuxddp 3, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v3f64_v3i64:
@@ -8093,20 +8097,20 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_uitofp_v3f32_v3i64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mtfprd 0, 3
-; PC64LE-NEXT:    addis 6, 2, .LCPI181_0 at toc@ha
 ; PC64LE-NEXT:    mtfprd 1, 4
-; PC64LE-NEXT:    addi 3, 6, .LCPI181_0 at toc@l
+; PC64LE-NEXT:    addis 3, 2, .LCPI181_0 at toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI181_0 at toc@l
 ; PC64LE-NEXT:    xscvuxdsp 0, 0
 ; PC64LE-NEXT:    xscvuxdsp 1, 1
-; PC64LE-NEXT:    mtfprd 2, 5
-; PC64LE-NEXT:    xscvuxdsp 2, 2
 ; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 34, 2
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xxmrghw 35, 1, 0
-; PC64LE-NEXT:    xxswapd 36, 2
-; PC64LE-NEXT:    vperm 2, 2, 3, 4
+; PC64LE-NEXT:    xxmrghw 34, 1, 0
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 35, 0
+; PC64LE-NEXT:    mtfprd 0, 5
+; PC64LE-NEXT:    xscvuxdsp 0, 0
+; PC64LE-NEXT:    xscvdpspn 36, 0
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v3f32_v3i64:
@@ -8138,22 +8142,23 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_uitofp_v4f64_v4i32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    xxswapd 0, 34
-; PC64LE-NEXT:    mfvsrwz 3, 34
 ; PC64LE-NEXT:    xxsldwi 1, 34, 34, 1
-; PC64LE-NEXT:    mtfprwz 2, 3
 ; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    xxsldwi 0, 34, 34, 3
-; PC64LE-NEXT:    mffprwz 4, 1
+; PC64LE-NEXT:    xxsldwi 2, 34, 34, 3
+; PC64LE-NEXT:    mtfprwz 0, 3
+; PC64LE-NEXT:    mffprwz 3, 1
 ; PC64LE-NEXT:    mtfprwz 1, 3
-; PC64LE-NEXT:    mffprwz 3, 0
-; PC64LE-NEXT:    mtfprwz 3, 4
-; PC64LE-NEXT:    xscvuxddp 0, 2
+; PC64LE-NEXT:    xscvuxddp 0, 0
+; PC64LE-NEXT:    mfvsrwz 3, 34
+; PC64LE-NEXT:    xscvuxddp 1, 1
+; PC64LE-NEXT:    xxmrghd 0, 1, 0
+; PC64LE-NEXT:    mtfprwz 1, 3
+; PC64LE-NEXT:    mffprwz 3, 2
+; PC64LE-NEXT:    xxlor 34, 0, 0
 ; PC64LE-NEXT:    mtfprwz 2, 3
 ; PC64LE-NEXT:    xscvuxddp 1, 1
-; PC64LE-NEXT:    xscvuxddp 3, 3
 ; PC64LE-NEXT:    xscvuxddp 2, 2
-; PC64LE-NEXT:    xxmrghd 34, 3, 1
-; PC64LE-NEXT:    xxmrghd 35, 2, 0
+; PC64LE-NEXT:    xxmrghd 35, 2, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_uitofp_v4f64_v4i32:
@@ -8248,24 +8253,24 @@ entry:
 define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_uitofp_v4f32_v4i64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    xxswapd 0, 34
 ; PC64LE-NEXT:    mfvsrd 3, 34
-; PC64LE-NEXT:    xxswapd 2, 35
-; PC64LE-NEXT:    mfvsrd 4, 35
-; PC64LE-NEXT:    mtfprd 1, 3
-; PC64LE-NEXT:    mffprd 3, 0
-; PC64LE-NEXT:    mtfprd 0, 4
-; PC64LE-NEXT:    mtfprd 3, 3
-; PC64LE-NEXT:    mffprd 3, 2
-; PC64LE-NEXT:    xscvuxdsp 1, 1
+; PC64LE-NEXT:    xxswapd 0, 34
+; PC64LE-NEXT:    xxswapd 1, 35
 ; PC64LE-NEXT:    mtfprd 2, 3
-; PC64LE-NEXT:    xscvuxdsp 0, 0
-; PC64LE-NEXT:    xscvuxdsp 3, 3
+; PC64LE-NEXT:    mfvsrd 3, 35
+; PC64LE-NEXT:    mtfprd 3, 3
+; PC64LE-NEXT:    mffprd 3, 0
 ; PC64LE-NEXT:    xscvuxdsp 2, 2
-; PC64LE-NEXT:    xxmrghd 0, 0, 1
-; PC64LE-NEXT:    xxmrghd 1, 2, 3
-; PC64LE-NEXT:    xvcvdpsp 34, 0
-; PC64LE-NEXT:    xvcvdpsp 35, 1
+; PC64LE-NEXT:    mtfprd 0, 3
+; PC64LE-NEXT:    mffprd 3, 1
+; PC64LE-NEXT:    xscvuxdsp 3, 3
+; PC64LE-NEXT:    mtfprd 1, 3
+; PC64LE-NEXT:    xscvuxdsp 0, 0
+; PC64LE-NEXT:    xscvuxdsp 1, 1
+; PC64LE-NEXT:    xxmrghd 2, 3, 2
+; PC64LE-NEXT:    xvcvdpsp 34, 2
+; PC64LE-NEXT:    xxmrghd 0, 1, 0
+; PC64LE-NEXT:    xvcvdpsp 35, 0
 ; PC64LE-NEXT:    vmrgew 2, 2, 3
 ; PC64LE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vector-ldst.ll b/llvm/test/CodeGen/PowerPC/vector-ldst.ll
index f39c663eadebf77..d756ad5edf6b912 100644
--- a/llvm/test/CodeGen/PowerPC/vector-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-ldst.ll
@@ -525,9 +525,9 @@ define dso_local <16 x i8> @ld_disjoint_align32_vector(i64 %ptr) {
 ; CHECK-P8-LE-LABEL: ld_disjoint_align32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    lxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-LE-NEXT:    blr
@@ -535,9 +535,9 @@ define dso_local <16 x i8> @ld_disjoint_align32_vector(i64 %ptr) {
 ; CHECK-P8-BE-LABEL: ld_disjoint_align32_vector:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-BE-NEXT:    lxvw4x v2, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -988,8 +988,8 @@ define dso_local void @st_unalign32_vector(ptr nocapture %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_unalign32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r4, 1
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 34463
 ; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
@@ -1022,8 +1022,8 @@ define dso_local void @st_align32_vector(ptr nocapture %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_align32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r4, 1525
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 56600
 ; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
@@ -1246,8 +1246,8 @@ define dso_local void @st_disjoint_unalign16_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_disjoint_unalign16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 51
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 6
 ; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
@@ -1283,8 +1283,8 @@ define dso_local void @st_disjoint_align16_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_disjoint_align16_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 51
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 24
 ; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
@@ -1314,8 +1314,8 @@ define dso_local void @st_not_disjoint32_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_not_disjoint32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 34463
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    oris r3, r3, 1
 ; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
@@ -1351,8 +1351,8 @@ define dso_local void @st_disjoint_unalign32_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_disjoint_unalign32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r4, 1
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    rldicr r3, r3, 0, 43
 ; CHECK-P8-LE-NEXT:    ori r4, r4, 34463
 ; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
@@ -1393,20 +1393,20 @@ define dso_local void @st_disjoint_align32_vector(i64 %ptr, <16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_disjoint_align32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r4, -15264
-; CHECK-P8-LE-NEXT:    lis r5, 15258
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    and r3, r3, r4
-; CHECK-P8-LE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-LE-NEXT:    lis r4, 15258
+; CHECK-P8-LE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-LE-NEXT:    stxvd2x vs0, r3, r4
 ; CHECK-P8-LE-NEXT:    blr
 ;
 ; CHECK-P8-BE-LABEL: st_disjoint_align32_vector:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    lis r4, -15264
-; CHECK-P8-BE-NEXT:    lis r5, 15258
 ; CHECK-P8-BE-NEXT:    and r3, r3, r4
-; CHECK-P8-BE-NEXT:    ori r4, r5, 41712
+; CHECK-P8-BE-NEXT:    lis r4, 15258
+; CHECK-P8-BE-NEXT:    ori r4, r4, 41712
 ; CHECK-P8-BE-NEXT:    stxvw4x v2, r3, r4
 ; CHECK-P8-BE-NEXT:    blr
 entry:
@@ -1625,8 +1625,8 @@ define dso_local void @st_cst_unalign32_vector(<16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_cst_unalign32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r3, 1
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 34463
 ; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr
@@ -1659,8 +1659,8 @@ define dso_local void @st_cst_align32_vector(<16 x i8> %str) {
 ;
 ; CHECK-P8-LE-LABEL: st_cst_align32_vector:
 ; CHECK-P8-LE:       # %bb.0: # %entry
-; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    lis r3, 152
+; CHECK-P8-LE-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-LE-NEXT:    ori r3, r3, 38428
 ; CHECK-P8-LE-NEXT:    stxvd2x vs0, 0, r3
 ; CHECK-P8-LE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/vperm-swap.ll b/llvm/test/CodeGen/PowerPC/vperm-swap.ll
index 6a2750faa61a803..0a3b5ae613db8a5 100644
--- a/llvm/test/CodeGen/PowerPC/vperm-swap.ll
+++ b/llvm/test/CodeGen/PowerPC/vperm-swap.ll
@@ -22,13 +22,13 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-NEXT: .byte   31                              # 0x1f
 ; CHECK-LE-P8-LABEL: test_none_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI0_0 at toc@ha
 ; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
-; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI0_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI0_0 at toc@l
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vselect-constants.ll b/llvm/test/CodeGen/PowerPC/vselect-constants.ll
index d2d331b0a078c5b..b72142943dd8be6 100644
--- a/llvm/test/CodeGen/PowerPC/vselect-constants.ll
+++ b/llvm/test/CodeGen/PowerPC/vselect-constants.ll
@@ -10,20 +10,20 @@
 define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) {
 ; CHECK-LABEL: sel_C1_or_C2_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    vspltisw 3, -16
 ; CHECK-NEXT:    vspltisw 4, 15
-; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LCPI0_1 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-NEXT:    addi 4, 4, .LCPI0_1 at toc@l
-; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    lxvd2x 1, 0, 4
 ; CHECK-NEXT:    vsubuwm 3, 4, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
 ; CHECK-NEXT:    vslw 2, 2, 3
-; CHECK-NEXT:    xxswapd 36, 1
+; CHECK-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
 ; CHECK-NEXT:    vsraw 2, 2, 3
-; CHECK-NEXT:    xxswapd 35, 0
-; CHECK-NEXT:    xxsel 34, 36, 35, 34
+; CHECK-NEXT:    xxswapd 37, 0
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 32, 0
+; CHECK-NEXT:    xxsel 34, 32, 37, 34
 ; CHECK-NEXT:    blr
   %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
   ret <4 x i32> %add
@@ -33,15 +33,15 @@ define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: cmp_sel_C1_or_C2_vec:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LCPI1_1 at toc@ha
 ; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
-; CHECK-NEXT:    addi 4, 4, .LCPI1_1 at toc@l
 ; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    lxvd2x 1, 0, 4
-; CHECK-NEXT:    xxswapd 35, 0
-; CHECK-NEXT:    xxswapd 36, 1
-; CHECK-NEXT:    xxsel 34, 36, 35, 34
+; CHECK-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI1_1 at toc@l
+; CHECK-NEXT:    xxswapd 36, 0
+; CHECK-NEXT:    lxvd2x 0, 0, 3
+; CHECK-NEXT:    xxswapd 37, 0
+; CHECK-NEXT:    xxsel 34, 37, 36, 34
 ; CHECK-NEXT:    blr
   %cond = icmp eq <4 x i32> %x, %y
   %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
@@ -54,8 +54,8 @@ define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) {
 ; CHECK-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
 ; CHECK-NEXT:    vspltisw 3, 1
 ; CHECK-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
-; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    xxland 34, 34, 35
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    xxswapd 36, 0
 ; CHECK-NEXT:    vadduwm 2, 2, 4
 ; CHECK-NEXT:    blr
@@ -70,8 +70,8 @@ define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
 ; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxswapd 35, 0
-; CHECK-NEXT:    vsubuwm 2, 3, 2
+; CHECK-NEXT:    xxswapd 36, 0
+; CHECK-NEXT:    vsubuwm 2, 4, 2
 ; CHECK-NEXT:    blr
   %cond = icmp eq <4 x i32> %x, %y
   %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
@@ -81,16 +81,16 @@ define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) {
 ; CHECK-LABEL: sel_Cminus1_or_C_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
 ; CHECK-NEXT:    vspltisw 3, -16
 ; CHECK-NEXT:    vspltisw 4, 15
-; CHECK-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
-; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    vsubuwm 3, 4, 3
+; CHECK-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-NEXT:    vslw 2, 2, 3
 ; CHECK-NEXT:    vsraw 2, 2, 3
-; CHECK-NEXT:    xxswapd 35, 0
-; CHECK-NEXT:    vadduwm 2, 2, 3
+; CHECK-NEXT:    xxswapd 37, 0
+; CHECK-NEXT:    vadduwm 2, 2, 5
 ; CHECK-NEXT:    blr
   %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
   ret <4 x i32> %add
@@ -103,8 +103,8 @@ define <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:    vcmpequw 2, 2, 3
 ; CHECK-NEXT:    addi 3, 3, .LCPI5_0 at toc@l
 ; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxswapd 35, 0
-; CHECK-NEXT:    vadduwm 2, 2, 3
+; CHECK-NEXT:    xxswapd 36, 0
+; CHECK-NEXT:    vadduwm 2, 2, 4
 ; CHECK-NEXT:    blr
   %cond = icmp eq <4 x i32> %x, %y
   %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
@@ -170,9 +170,9 @@ define <4 x i32> @sel_1_or_0_vec(<4 x i1> %cond) {
 define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: cmp_sel_1_or_0_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vspltisw 4, 1
 ; CHECK-NEXT:    vcmpequw 2, 2, 3
-; CHECK-NEXT:    vspltisw 3, 1
-; CHECK-NEXT:    xxland 34, 34, 35
+; CHECK-NEXT:    xxland 34, 34, 36
 ; CHECK-NEXT:    blr
   %cond = icmp eq <4 x i32> %x, %y
   %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
@@ -193,9 +193,9 @@ define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: cmp_sel_0_or_1_vec:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vcmpequw 2, 2, 3
-; CHECK-NEXT:    vspltisw 3, 1
+; CHECK-NEXT:    vspltisw 4, 1
 ; CHECK-NEXT:    xxlnor 0, 34, 34
-; CHECK-NEXT:    xxland 34, 0, 35
+; CHECK-NEXT:    xxland 34, 0, 36
 ; CHECK-NEXT:    blr
   %cond = icmp eq <4 x i32> %x, %y
   %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>

diff  --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index e42e2ae24332626..f2e68a78a495f39 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -2423,11 +2423,11 @@ define <2 x i32> @test80(i32 %v) {
 ;
 ; CHECK-LE-LABEL: test80:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    addis r4, r2, .LCPI65_0 at toc@ha
-; CHECK-LE-NEXT:    mtfprwz f1, r3
-; CHECK-LE-NEXT:    addi r4, r4, .LCPI65_0 at toc@l
-; CHECK-LE-NEXT:    lxvd2x vs0, 0, r4
-; CHECK-LE-NEXT:    xxspltw v2, vs1, 1
+; CHECK-LE-NEXT:    mtfprwz f0, r3
+; CHECK-LE-NEXT:    addis r3, r2, .LCPI65_0 at toc@ha
+; CHECK-LE-NEXT:    addi r3, r3, .LCPI65_0 at toc@l
+; CHECK-LE-NEXT:    xxspltw v2, vs0, 1
+; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-LE-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-NEXT:    vadduwm v2, v2, v3
 ; CHECK-LE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
index 6bbd94b28faafdf..16aa63cac0ab712 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -191,19 +191,19 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-LABEL: lshr_16bytes:
 ; LE-64BIT:       # %bb.0:
 ; LE-64BIT-NEXT:    lwz 4, 0(4)
-; LE-64BIT-NEXT:    ld 6, 0(3)
-; LE-64BIT-NEXT:    ld 3, 8(3)
+; LE-64BIT-NEXT:    ld 6, 8(3)
+; LE-64BIT-NEXT:    ld 3, 0(3)
 ; LE-64BIT-NEXT:    slwi 4, 4, 3
 ; LE-64BIT-NEXT:    subfic 7, 4, 64
-; LE-64BIT-NEXT:    srd 6, 6, 4
-; LE-64BIT-NEXT:    addi 8, 4, -64
-; LE-64BIT-NEXT:    sld 7, 3, 7
-; LE-64BIT-NEXT:    or 6, 6, 7
-; LE-64BIT-NEXT:    srd 7, 3, 8
-; LE-64BIT-NEXT:    or 6, 6, 7
 ; LE-64BIT-NEXT:    srd 3, 3, 4
-; LE-64BIT-NEXT:    std 3, 8(5)
-; LE-64BIT-NEXT:    std 6, 0(5)
+; LE-64BIT-NEXT:    sld 7, 6, 7
+; LE-64BIT-NEXT:    or 3, 3, 7
+; LE-64BIT-NEXT:    addi 7, 4, -64
+; LE-64BIT-NEXT:    srd 4, 6, 4
+; LE-64BIT-NEXT:    srd 7, 6, 7
+; LE-64BIT-NEXT:    std 4, 8(5)
+; LE-64BIT-NEXT:    or 3, 3, 7
+; LE-64BIT-NEXT:    std 3, 0(5)
 ; LE-64BIT-NEXT:    blr
 ;
 ; BE-LABEL: lshr_16bytes:
@@ -265,19 +265,19 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-LABEL: shl_16bytes:
 ; LE-64BIT:       # %bb.0:
 ; LE-64BIT-NEXT:    lwz 4, 0(4)
-; LE-64BIT-NEXT:    ld 6, 8(3)
-; LE-64BIT-NEXT:    ld 3, 0(3)
+; LE-64BIT-NEXT:    ld 6, 0(3)
+; LE-64BIT-NEXT:    ld 3, 8(3)
 ; LE-64BIT-NEXT:    slwi 4, 4, 3
 ; LE-64BIT-NEXT:    subfic 7, 4, 64
-; LE-64BIT-NEXT:    sld 6, 6, 4
-; LE-64BIT-NEXT:    addi 8, 4, -64
-; LE-64BIT-NEXT:    srd 7, 3, 7
-; LE-64BIT-NEXT:    or 6, 6, 7
-; LE-64BIT-NEXT:    sld 7, 3, 8
-; LE-64BIT-NEXT:    or 6, 6, 7
 ; LE-64BIT-NEXT:    sld 3, 3, 4
-; LE-64BIT-NEXT:    std 3, 0(5)
-; LE-64BIT-NEXT:    std 6, 8(5)
+; LE-64BIT-NEXT:    srd 7, 6, 7
+; LE-64BIT-NEXT:    or 3, 3, 7
+; LE-64BIT-NEXT:    addi 7, 4, -64
+; LE-64BIT-NEXT:    sld 4, 6, 4
+; LE-64BIT-NEXT:    sld 7, 6, 7
+; LE-64BIT-NEXT:    std 4, 0(5)
+; LE-64BIT-NEXT:    or 3, 3, 7
+; LE-64BIT-NEXT:    std 3, 8(5)
 ; LE-64BIT-NEXT:    blr
 ;
 ; BE-LABEL: shl_16bytes:
@@ -338,20 +338,20 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-LABEL: ashr_16bytes:
 ; LE-64BIT:       # %bb.0:
 ; LE-64BIT-NEXT:    lwz 4, 0(4)
-; LE-64BIT-NEXT:    ld 6, 0(3)
-; LE-64BIT-NEXT:    ld 3, 8(3)
+; LE-64BIT-NEXT:    ld 6, 8(3)
+; LE-64BIT-NEXT:    ld 3, 0(3)
 ; LE-64BIT-NEXT:    slwi 4, 4, 3
 ; LE-64BIT-NEXT:    subfic 7, 4, 64
-; LE-64BIT-NEXT:    srd 6, 6, 4
-; LE-64BIT-NEXT:    addi 8, 4, -64
-; LE-64BIT-NEXT:    sld 7, 3, 7
-; LE-64BIT-NEXT:    cmpwi 8, 1
-; LE-64BIT-NEXT:    or 6, 6, 7
-; LE-64BIT-NEXT:    srad 7, 3, 8
-; LE-64BIT-NEXT:    isellt 6, 6, 7
-; LE-64BIT-NEXT:    srad 3, 3, 4
-; LE-64BIT-NEXT:    std 3, 8(5)
-; LE-64BIT-NEXT:    std 6, 0(5)
+; LE-64BIT-NEXT:    srd 3, 3, 4
+; LE-64BIT-NEXT:    sld 7, 6, 7
+; LE-64BIT-NEXT:    or 3, 3, 7
+; LE-64BIT-NEXT:    addi 7, 4, -64
+; LE-64BIT-NEXT:    srad 4, 6, 4
+; LE-64BIT-NEXT:    cmpwi 7, 1
+; LE-64BIT-NEXT:    srad 8, 6, 7
+; LE-64BIT-NEXT:    std 4, 8(5)
+; LE-64BIT-NEXT:    isellt 3, 3, 8
+; LE-64BIT-NEXT:    std 3, 0(5)
 ; LE-64BIT-NEXT:    blr
 ;
 ; BE-LABEL: ashr_16bytes:
@@ -422,18 +422,18 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
 ; LE-64BIT-NEXT:    xxlxor 2, 2, 2
 ; LE-64BIT-NEXT:    addi 7, 1, -64
-; LE-64BIT-NEXT:    li 8, 32
 ; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
 ; LE-64BIT-NEXT:    lwz 3, 0(4)
 ; LE-64BIT-NEXT:    li 4, 48
 ; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
-; LE-64BIT-NEXT:    stxvd2x 2, 7, 8
+; LE-64BIT-NEXT:    li 4, 32
 ; LE-64BIT-NEXT:    clrldi 3, 3, 59
+; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
 ; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
 ; LE-64BIT-NEXT:    stxvd2x 1, 0, 7
-; LE-64BIT-NEXT:    add 4, 7, 3
 ; LE-64BIT-NEXT:    lxvd2x 0, 7, 3
-; LE-64BIT-NEXT:    lxvd2x 1, 4, 6
+; LE-64BIT-NEXT:    add 3, 7, 3
+; LE-64BIT-NEXT:    lxvd2x 1, 3, 6
 ; LE-64BIT-NEXT:    stxvd2x 1, 5, 6
 ; LE-64BIT-NEXT:    stxvd2x 0, 0, 5
 ; LE-64BIT-NEXT:    blr
@@ -530,23 +530,23 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-LABEL: shl_32bytes:
 ; LE-64BIT:       # %bb.0:
 ; LE-64BIT-NEXT:    li 6, 16
-; LE-64BIT-NEXT:    lwz 4, 0(4)
-; LE-64BIT-NEXT:    xxlxor 1, 1, 1
-; LE-64BIT-NEXT:    lxvd2x 2, 0, 3
+; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
+; LE-64BIT-NEXT:    xxlxor 2, 2, 2
 ; LE-64BIT-NEXT:    li 7, 48
-; LE-64BIT-NEXT:    addi 8, 1, -32
 ; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
-; LE-64BIT-NEXT:    addi 3, 1, -64
-; LE-64BIT-NEXT:    clrlwi 4, 4, 27
-; LE-64BIT-NEXT:    stxvd2x 1, 3, 6
-; LE-64BIT-NEXT:    neg 4, 4
-; LE-64BIT-NEXT:    stxvd2x 0, 3, 7
+; LE-64BIT-NEXT:    lwz 3, 0(4)
+; LE-64BIT-NEXT:    addi 4, 1, -64
+; LE-64BIT-NEXT:    stxvd2x 2, 4, 6
+; LE-64BIT-NEXT:    clrlwi 3, 3, 27
+; LE-64BIT-NEXT:    stxvd2x 0, 4, 7
 ; LE-64BIT-NEXT:    li 7, 32
-; LE-64BIT-NEXT:    extsw 4, 4
-; LE-64BIT-NEXT:    stxvd2x 2, 3, 7
-; LE-64BIT-NEXT:    stxvd2x 1, 0, 3
-; LE-64BIT-NEXT:    add 3, 8, 4
-; LE-64BIT-NEXT:    lxvd2x 0, 8, 4
+; LE-64BIT-NEXT:    neg 3, 3
+; LE-64BIT-NEXT:    stxvd2x 1, 4, 7
+; LE-64BIT-NEXT:    stxvd2x 2, 0, 4
+; LE-64BIT-NEXT:    extsw 3, 3
+; LE-64BIT-NEXT:    addi 4, 1, -32
+; LE-64BIT-NEXT:    lxvd2x 0, 4, 3
+; LE-64BIT-NEXT:    add 3, 4, 3
 ; LE-64BIT-NEXT:    lxvd2x 1, 3, 6
 ; LE-64BIT-NEXT:    stxvd2x 1, 5, 6
 ; LE-64BIT-NEXT:    stxvd2x 0, 0, 5
@@ -639,25 +639,25 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-LABEL: ashr_32bytes:
 ; LE-64BIT:       # %bb.0:
-; LE-64BIT-NEXT:    ld 7, 16(3)
-; LE-64BIT-NEXT:    ld 8, 24(3)
 ; LE-64BIT-NEXT:    lxvd2x 0, 0, 3
+; LE-64BIT-NEXT:    ld 6, 16(3)
+; LE-64BIT-NEXT:    ld 3, 24(3)
+; LE-64BIT-NEXT:    addi 7, 1, -64
 ; LE-64BIT-NEXT:    lwz 4, 0(4)
-; LE-64BIT-NEXT:    addi 6, 1, -64
-; LE-64BIT-NEXT:    sradi 3, 8, 63
-; LE-64BIT-NEXT:    clrldi 4, 4, 59
-; LE-64BIT-NEXT:    std 8, 24(6)
-; LE-64BIT-NEXT:    std 7, 16(6)
-; LE-64BIT-NEXT:    std 3, 56(6)
-; LE-64BIT-NEXT:    std 3, 48(6)
-; LE-64BIT-NEXT:    li 7, 16
-; LE-64BIT-NEXT:    std 3, 40(6)
-; LE-64BIT-NEXT:    std 3, 32(6)
-; LE-64BIT-NEXT:    add 3, 6, 4
-; LE-64BIT-NEXT:    stxvd2x 0, 0, 6
-; LE-64BIT-NEXT:    lxvd2x 0, 6, 4
-; LE-64BIT-NEXT:    lxvd2x 1, 3, 7
-; LE-64BIT-NEXT:    stxvd2x 1, 5, 7
+; LE-64BIT-NEXT:    li 8, 16
+; LE-64BIT-NEXT:    std 3, 24(7)
+; LE-64BIT-NEXT:    sradi 3, 3, 63
+; LE-64BIT-NEXT:    std 6, 16(7)
+; LE-64BIT-NEXT:    std 3, 56(7)
+; LE-64BIT-NEXT:    std 3, 48(7)
+; LE-64BIT-NEXT:    std 3, 40(7)
+; LE-64BIT-NEXT:    std 3, 32(7)
+; LE-64BIT-NEXT:    clrldi 3, 4, 59
+; LE-64BIT-NEXT:    stxvd2x 0, 0, 7
+; LE-64BIT-NEXT:    lxvd2x 0, 7, 3
+; LE-64BIT-NEXT:    add 3, 7, 3
+; LE-64BIT-NEXT:    lxvd2x 1, 3, 8
+; LE-64BIT-NEXT:    stxvd2x 1, 5, 8
 ; LE-64BIT-NEXT:    stxvd2x 0, 0, 5
 ; LE-64BIT-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
index dd150f4aee0fcbd..abfe6a953dd6c83 100644
--- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
@@ -173,18 +173,18 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-LABEL: lshr_16bytes:
 ; LE-64BIT:       # %bb.0:
 ; LE-64BIT-NEXT:    lwz 4, 0(4)
-; LE-64BIT-NEXT:    ld 6, 0(3)
-; LE-64BIT-NEXT:    ld 3, 8(3)
+; LE-64BIT-NEXT:    ld 6, 8(3)
 ; LE-64BIT-NEXT:    subfic 7, 4, 64
-; LE-64BIT-NEXT:    srd 6, 6, 4
-; LE-64BIT-NEXT:    addi 8, 4, -64
-; LE-64BIT-NEXT:    sld 7, 3, 7
-; LE-64BIT-NEXT:    or 6, 6, 7
-; LE-64BIT-NEXT:    srd 7, 3, 8
-; LE-64BIT-NEXT:    or 6, 6, 7
+; LE-64BIT-NEXT:    ld 3, 0(3)
 ; LE-64BIT-NEXT:    srd 3, 3, 4
-; LE-64BIT-NEXT:    std 3, 8(5)
-; LE-64BIT-NEXT:    std 6, 0(5)
+; LE-64BIT-NEXT:    sld 7, 6, 7
+; LE-64BIT-NEXT:    or 3, 3, 7
+; LE-64BIT-NEXT:    addi 7, 4, -64
+; LE-64BIT-NEXT:    srd 4, 6, 4
+; LE-64BIT-NEXT:    srd 7, 6, 7
+; LE-64BIT-NEXT:    std 4, 8(5)
+; LE-64BIT-NEXT:    or 3, 3, 7
+; LE-64BIT-NEXT:    std 3, 0(5)
 ; LE-64BIT-NEXT:    blr
 ;
 ; BE-LABEL: lshr_16bytes:
@@ -260,18 +260,18 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-LABEL: shl_16bytes:
 ; LE-64BIT:       # %bb.0:
 ; LE-64BIT-NEXT:    lwz 4, 0(4)
-; LE-64BIT-NEXT:    ld 6, 8(3)
-; LE-64BIT-NEXT:    ld 3, 0(3)
+; LE-64BIT-NEXT:    ld 6, 0(3)
 ; LE-64BIT-NEXT:    subfic 7, 4, 64
-; LE-64BIT-NEXT:    sld 6, 6, 4
-; LE-64BIT-NEXT:    addi 8, 4, -64
-; LE-64BIT-NEXT:    srd 7, 3, 7
-; LE-64BIT-NEXT:    or 6, 6, 7
-; LE-64BIT-NEXT:    sld 7, 3, 8
-; LE-64BIT-NEXT:    or 6, 6, 7
+; LE-64BIT-NEXT:    ld 3, 8(3)
 ; LE-64BIT-NEXT:    sld 3, 3, 4
-; LE-64BIT-NEXT:    std 3, 0(5)
-; LE-64BIT-NEXT:    std 6, 8(5)
+; LE-64BIT-NEXT:    srd 7, 6, 7
+; LE-64BIT-NEXT:    or 3, 3, 7
+; LE-64BIT-NEXT:    addi 7, 4, -64
+; LE-64BIT-NEXT:    sld 4, 6, 4
+; LE-64BIT-NEXT:    sld 7, 6, 7
+; LE-64BIT-NEXT:    std 4, 0(5)
+; LE-64BIT-NEXT:    or 3, 3, 7
+; LE-64BIT-NEXT:    std 3, 8(5)
 ; LE-64BIT-NEXT:    blr
 ;
 ; BE-LABEL: shl_16bytes:
@@ -346,19 +346,19 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-LABEL: ashr_16bytes:
 ; LE-64BIT:       # %bb.0:
 ; LE-64BIT-NEXT:    lwz 4, 0(4)
-; LE-64BIT-NEXT:    ld 6, 0(3)
-; LE-64BIT-NEXT:    ld 3, 8(3)
+; LE-64BIT-NEXT:    ld 6, 8(3)
 ; LE-64BIT-NEXT:    subfic 7, 4, 64
-; LE-64BIT-NEXT:    srd 6, 6, 4
-; LE-64BIT-NEXT:    addi 8, 4, -64
-; LE-64BIT-NEXT:    sld 7, 3, 7
-; LE-64BIT-NEXT:    cmpwi 8, 1
-; LE-64BIT-NEXT:    or 6, 6, 7
-; LE-64BIT-NEXT:    srad 7, 3, 8
-; LE-64BIT-NEXT:    isellt 6, 6, 7
-; LE-64BIT-NEXT:    srad 3, 3, 4
-; LE-64BIT-NEXT:    std 3, 8(5)
-; LE-64BIT-NEXT:    std 6, 0(5)
+; LE-64BIT-NEXT:    ld 3, 0(3)
+; LE-64BIT-NEXT:    srd 3, 3, 4
+; LE-64BIT-NEXT:    sld 7, 6, 7
+; LE-64BIT-NEXT:    or 3, 3, 7
+; LE-64BIT-NEXT:    addi 7, 4, -64
+; LE-64BIT-NEXT:    srad 4, 6, 4
+; LE-64BIT-NEXT:    cmpwi 7, 1
+; LE-64BIT-NEXT:    srad 8, 6, 7
+; LE-64BIT-NEXT:    std 4, 8(5)
+; LE-64BIT-NEXT:    isellt 3, 3, 8
+; LE-64BIT-NEXT:    std 3, 0(5)
 ; LE-64BIT-NEXT:    blr
 ;
 ; BE-LABEL: ashr_16bytes:
@@ -440,42 +440,42 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-LABEL: lshr_32bytes:
 ; LE-64BIT:       # %bb.0:
 ; LE-64BIT-NEXT:    li 6, 16
-; LE-64BIT-NEXT:    lxvd2x 2, 0, 3
-; LE-64BIT-NEXT:    xxlxor 0, 0, 0
-; LE-64BIT-NEXT:    lwz 4, 0(4)
+; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
+; LE-64BIT-NEXT:    xxlxor 2, 2, 2
 ; LE-64BIT-NEXT:    addi 7, 1, -64
 ; LE-64BIT-NEXT:    li 8, 32
-; LE-64BIT-NEXT:    lxvd2x 1, 3, 6
-; LE-64BIT-NEXT:    li 3, 48
-; LE-64BIT-NEXT:    stxvd2x 0, 7, 3
-; LE-64BIT-NEXT:    stxvd2x 0, 7, 8
-; LE-64BIT-NEXT:    rlwinm 3, 4, 29, 27, 31
-; LE-64BIT-NEXT:    stxvd2x 1, 7, 6
-; LE-64BIT-NEXT:    stxvd2x 2, 0, 7
-; LE-64BIT-NEXT:    ldux 6, 3, 7
-; LE-64BIT-NEXT:    li 7, 7
-; LE-64BIT-NEXT:    nand 7, 4, 7
-; LE-64BIT-NEXT:    clrlwi 4, 4, 29
-; LE-64BIT-NEXT:    clrlwi 7, 7, 26
-; LE-64BIT-NEXT:    subfic 11, 4, 64
-; LE-64BIT-NEXT:    ld 8, 16(3)
-; LE-64BIT-NEXT:    ld 9, 8(3)
-; LE-64BIT-NEXT:    ld 3, 24(3)
-; LE-64BIT-NEXT:    srd 6, 6, 4
+; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
+; LE-64BIT-NEXT:    lwz 3, 0(4)
+; LE-64BIT-NEXT:    li 4, 48
+; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
+; LE-64BIT-NEXT:    stxvd2x 2, 7, 8
+; LE-64BIT-NEXT:    rlwinm 4, 3, 29, 27, 31
+; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
+; LE-64BIT-NEXT:    stxvd2x 1, 0, 7
+; LE-64BIT-NEXT:    li 6, 7
+; LE-64BIT-NEXT:    ldux 7, 4, 7
+; LE-64BIT-NEXT:    ld 8, 16(4)
+; LE-64BIT-NEXT:    nand 6, 3, 6
+; LE-64BIT-NEXT:    ld 9, 8(4)
+; LE-64BIT-NEXT:    clrlwi 3, 3, 29
+; LE-64BIT-NEXT:    ld 4, 24(4)
+; LE-64BIT-NEXT:    clrlwi 6, 6, 26
+; LE-64BIT-NEXT:    srd 7, 7, 3
 ; LE-64BIT-NEXT:    sldi 10, 8, 1
-; LE-64BIT-NEXT:    srd 8, 8, 4
-; LE-64BIT-NEXT:    sld 7, 10, 7
-; LE-64BIT-NEXT:    srd 10, 9, 4
-; LE-64BIT-NEXT:    sld 9, 9, 11
-; LE-64BIT-NEXT:    sld 11, 3, 11
-; LE-64BIT-NEXT:    or 7, 10, 7
-; LE-64BIT-NEXT:    or 8, 11, 8
-; LE-64BIT-NEXT:    or 6, 9, 6
-; LE-64BIT-NEXT:    std 7, 8(5)
-; LE-64BIT-NEXT:    srd 3, 3, 4
-; LE-64BIT-NEXT:    std 6, 0(5)
-; LE-64BIT-NEXT:    std 8, 16(5)
+; LE-64BIT-NEXT:    srd 11, 9, 3
+; LE-64BIT-NEXT:    srd 8, 8, 3
+; LE-64BIT-NEXT:    sld 6, 10, 6
+; LE-64BIT-NEXT:    subfic 10, 3, 64
+; LE-64BIT-NEXT:    srd 3, 4, 3
+; LE-64BIT-NEXT:    or 6, 11, 6
+; LE-64BIT-NEXT:    sld 11, 4, 10
+; LE-64BIT-NEXT:    sld 9, 9, 10
 ; LE-64BIT-NEXT:    std 3, 24(5)
+; LE-64BIT-NEXT:    or 7, 9, 7
+; LE-64BIT-NEXT:    or 3, 11, 8
+; LE-64BIT-NEXT:    std 6, 8(5)
+; LE-64BIT-NEXT:    std 7, 0(5)
+; LE-64BIT-NEXT:    std 3, 16(5)
 ; LE-64BIT-NEXT:    blr
 ;
 ; BE-LABEL: lshr_32bytes:
@@ -628,44 +628,44 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT:       # %bb.0:
 ; LE-64BIT-NEXT:    li 6, 16
 ; LE-64BIT-NEXT:    lwz 4, 0(4)
-; LE-64BIT-NEXT:    xxlxor 1, 1, 1
-; LE-64BIT-NEXT:    lxvd2x 2, 0, 3
-; LE-64BIT-NEXT:    li 7, 48
+; LE-64BIT-NEXT:    xxlxor 2, 2, 2
+; LE-64BIT-NEXT:    addi 7, 1, -64
+; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
+; LE-64BIT-NEXT:    addi 8, 1, -32
 ; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
-; LE-64BIT-NEXT:    addi 3, 1, -64
-; LE-64BIT-NEXT:    rlwinm 8, 4, 29, 27, 31
-; LE-64BIT-NEXT:    stxvd2x 1, 3, 6
+; LE-64BIT-NEXT:    stxvd2x 2, 7, 6
+; LE-64BIT-NEXT:    li 6, 48
+; LE-64BIT-NEXT:    rlwinm 3, 4, 29, 27, 31
+; LE-64BIT-NEXT:    neg 3, 3
+; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
 ; LE-64BIT-NEXT:    li 6, 32
-; LE-64BIT-NEXT:    stxvd2x 0, 3, 7
-; LE-64BIT-NEXT:    neg 7, 8
-; LE-64BIT-NEXT:    addi 8, 1, -32
-; LE-64BIT-NEXT:    stxvd2x 2, 3, 6
+; LE-64BIT-NEXT:    extsw 3, 3
+; LE-64BIT-NEXT:    stxvd2x 1, 7, 6
+; LE-64BIT-NEXT:    stxvd2x 2, 0, 7
 ; LE-64BIT-NEXT:    li 6, 7
-; LE-64BIT-NEXT:    stxvd2x 1, 0, 3
-; LE-64BIT-NEXT:    extsw 3, 7
-; LE-64BIT-NEXT:    nand 6, 4, 6
-; LE-64BIT-NEXT:    clrlwi 4, 4, 29
 ; LE-64BIT-NEXT:    ldux 3, 8, 3
-; LE-64BIT-NEXT:    clrlwi 6, 6, 26
-; LE-64BIT-NEXT:    subfic 11, 4, 64
 ; LE-64BIT-NEXT:    ld 7, 8(8)
+; LE-64BIT-NEXT:    nand 6, 4, 6
 ; LE-64BIT-NEXT:    ld 9, 16(8)
+; LE-64BIT-NEXT:    clrlwi 4, 4, 29
 ; LE-64BIT-NEXT:    ld 8, 24(8)
+; LE-64BIT-NEXT:    clrlwi 6, 6, 26
 ; LE-64BIT-NEXT:    rldicl 10, 7, 63, 1
 ; LE-64BIT-NEXT:    sld 8, 8, 4
+; LE-64BIT-NEXT:    sld 7, 7, 4
 ; LE-64BIT-NEXT:    srd 6, 10, 6
 ; LE-64BIT-NEXT:    sld 10, 9, 4
-; LE-64BIT-NEXT:    srd 9, 9, 11
-; LE-64BIT-NEXT:    srd 11, 3, 11
 ; LE-64BIT-NEXT:    or 6, 10, 6
-; LE-64BIT-NEXT:    sld 7, 7, 4
-; LE-64BIT-NEXT:    or 8, 8, 9
-; LE-64BIT-NEXT:    std 6, 16(5)
-; LE-64BIT-NEXT:    or 7, 7, 11
+; LE-64BIT-NEXT:    subfic 10, 4, 64
+; LE-64BIT-NEXT:    srd 9, 9, 10
+; LE-64BIT-NEXT:    srd 10, 3, 10
 ; LE-64BIT-NEXT:    sld 3, 3, 4
-; LE-64BIT-NEXT:    std 8, 24(5)
+; LE-64BIT-NEXT:    std 6, 16(5)
+; LE-64BIT-NEXT:    or 7, 7, 10
 ; LE-64BIT-NEXT:    std 3, 0(5)
+; LE-64BIT-NEXT:    or 3, 8, 9
 ; LE-64BIT-NEXT:    std 7, 8(5)
+; LE-64BIT-NEXT:    std 3, 24(5)
 ; LE-64BIT-NEXT:    blr
 ;
 ; BE-LABEL: shl_32bytes:
@@ -812,44 +812,44 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; LE-64BIT-LABEL: ashr_32bytes:
 ; LE-64BIT:       # %bb.0:
-; LE-64BIT-NEXT:    ld 7, 16(3)
-; LE-64BIT-NEXT:    ld 8, 24(3)
 ; LE-64BIT-NEXT:    lxvd2x 0, 0, 3
+; LE-64BIT-NEXT:    ld 6, 24(3)
 ; LE-64BIT-NEXT:    lwz 4, 0(4)
-; LE-64BIT-NEXT:    addi 6, 1, -64
-; LE-64BIT-NEXT:    sradi 3, 8, 63
-; LE-64BIT-NEXT:    std 8, 24(6)
-; LE-64BIT-NEXT:    std 7, 16(6)
-; LE-64BIT-NEXT:    std 3, 56(6)
-; LE-64BIT-NEXT:    rlwinm 7, 4, 29, 27, 31
-; LE-64BIT-NEXT:    std 3, 48(6)
-; LE-64BIT-NEXT:    std 3, 40(6)
-; LE-64BIT-NEXT:    std 3, 32(6)
-; LE-64BIT-NEXT:    stxvd2x 0, 0, 6
-; LE-64BIT-NEXT:    ldux 3, 7, 6
-; LE-64BIT-NEXT:    li 6, 7
-; LE-64BIT-NEXT:    nand 6, 4, 6
+; LE-64BIT-NEXT:    addi 7, 1, -64
+; LE-64BIT-NEXT:    ld 3, 16(3)
+; LE-64BIT-NEXT:    sradi 8, 6, 63
+; LE-64BIT-NEXT:    rlwinm 9, 4, 29, 27, 31
+; LE-64BIT-NEXT:    std 6, 24(7)
+; LE-64BIT-NEXT:    std 3, 16(7)
+; LE-64BIT-NEXT:    li 3, 7
+; LE-64BIT-NEXT:    std 8, 56(7)
+; LE-64BIT-NEXT:    std 8, 48(7)
+; LE-64BIT-NEXT:    std 8, 40(7)
+; LE-64BIT-NEXT:    std 8, 32(7)
+; LE-64BIT-NEXT:    stxvd2x 0, 0, 7
+; LE-64BIT-NEXT:    nand 3, 4, 3
 ; LE-64BIT-NEXT:    clrlwi 4, 4, 29
-; LE-64BIT-NEXT:    clrlwi 6, 6, 26
-; LE-64BIT-NEXT:    subfic 11, 4, 64
-; LE-64BIT-NEXT:    ld 8, 16(7)
-; LE-64BIT-NEXT:    ld 9, 8(7)
-; LE-64BIT-NEXT:    ld 7, 24(7)
-; LE-64BIT-NEXT:    srd 3, 3, 4
-; LE-64BIT-NEXT:    sldi 10, 8, 1
-; LE-64BIT-NEXT:    srd 8, 8, 4
-; LE-64BIT-NEXT:    sld 6, 10, 6
-; LE-64BIT-NEXT:    srd 10, 9, 4
-; LE-64BIT-NEXT:    sld 9, 9, 11
-; LE-64BIT-NEXT:    sld 11, 7, 11
-; LE-64BIT-NEXT:    or 6, 10, 6
-; LE-64BIT-NEXT:    or 8, 11, 8
-; LE-64BIT-NEXT:    or 3, 9, 3
-; LE-64BIT-NEXT:    std 6, 8(5)
-; LE-64BIT-NEXT:    srad 4, 7, 4
-; LE-64BIT-NEXT:    std 3, 0(5)
-; LE-64BIT-NEXT:    std 8, 16(5)
+; LE-64BIT-NEXT:    ldux 6, 9, 7
+; LE-64BIT-NEXT:    ld 7, 16(9)
+; LE-64BIT-NEXT:    ld 8, 8(9)
+; LE-64BIT-NEXT:    clrlwi 3, 3, 26
+; LE-64BIT-NEXT:    ld 9, 24(9)
+; LE-64BIT-NEXT:    srd 6, 6, 4
+; LE-64BIT-NEXT:    sldi 10, 7, 1
+; LE-64BIT-NEXT:    srd 11, 8, 4
+; LE-64BIT-NEXT:    srd 7, 7, 4
+; LE-64BIT-NEXT:    sld 3, 10, 3
+; LE-64BIT-NEXT:    subfic 10, 4, 64
+; LE-64BIT-NEXT:    srad 4, 9, 4
+; LE-64BIT-NEXT:    or 3, 11, 3
+; LE-64BIT-NEXT:    sld 11, 9, 10
+; LE-64BIT-NEXT:    sld 8, 8, 10
 ; LE-64BIT-NEXT:    std 4, 24(5)
+; LE-64BIT-NEXT:    or 6, 8, 6
+; LE-64BIT-NEXT:    or 4, 11, 7
+; LE-64BIT-NEXT:    std 3, 8(5)
+; LE-64BIT-NEXT:    std 6, 0(5)
+; LE-64BIT-NEXT:    std 4, 16(5)
 ; LE-64BIT-NEXT:    blr
 ;
 ; BE-LABEL: ashr_32bytes:


        


More information about the llvm-commits mailing list