[llvm] 67c64d8 - [PowerPC] Implement scheduling model for Power10

Qiu Chaofan via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 18 00:32:55 PDT 2021


Author: Qiu Chaofan
Date: 2021-10-18T15:27:49+08:00
New Revision: 67c64d83378e7e84e30801420ebba453987e2546

URL: https://github.com/llvm/llvm-project/commit/67c64d83378e7e84e30801420ebba453987e2546
DIFF: https://github.com/llvm/llvm-project/commit/67c64d83378e7e84e30801420ebba453987e2546.diff

LOG: [PowerPC] Implement scheduling model for Power10

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D110855

Added: 
    llvm/lib/Target/PowerPC/P10InstrResources.td
    llvm/lib/Target/PowerPC/PPCSchedPredicates.td
    llvm/lib/Target/PowerPC/PPCScheduleP10.td

Modified: 
    llvm/lib/Target/PowerPC/PPC.td
    llvm/lib/Target/PowerPC/PPCSchedule.td
    llvm/lib/Target/PowerPC/PPCScheduleP9.td
    llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
    llvm/test/CodeGen/PowerPC/constant-pool.ll
    llvm/test/CodeGen/PowerPC/int128_ldst.ll
    llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
    llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
    llvm/test/CodeGen/PowerPC/mma-outer-product.ll
    llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
    llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
    llvm/test/CodeGen/PowerPC/p10-fi-elim.ll
    llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
    llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
    llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
    llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
    llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
    llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
    llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
    llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
    llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
    llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
    llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
    llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
    llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
    llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
    llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
new file mode 100644
index 000000000000..5c040f31db02
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -0,0 +1,2075 @@
+//===--- P10InstrResources.td - P10 Scheduling Definitions -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines the itinerary class data for the POWER10 processor.
+//
+//===----------------------------------------------------------------------===//
+// 22 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_22C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+      (instrs
+    FDIVS,
+    XSDIVSP
+)>;
+
+// 2-way crack instructions
+// 22 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_22C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    FDIVS_rec
+)>;
+
+// 24 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_24C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+      (instrs
+    XVDIVSP
+)>;
+
+// 26 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_26C, P10W_DISP_ANY, P10BF_Read],
+      (instrs
+    FSQRTS,
+    XSSQRTSP
+)>;
+
+// 2-way crack instructions
+// 26 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_26C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    FSQRTS_rec
+)>;
+
+// 27 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_ANY, P10BF_Read],
+      (instrs
+    XVSQRTSP
+)>;
+
+// 27 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+      (instrs
+    FDIV,
+    XSDIVDP,
+    XVDIVDP
+)>;
+
+// 2-way crack instructions
+// 27 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    FDIV_rec
+)>;
+
+// 36 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_36C, P10W_DISP_ANY, P10BF_Read],
+      (instrs
+    FSQRT,
+    XSSQRTDP,
+    XVSQRTDP
+)>;
+
+// 2-way crack instructions
+// 36 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_36C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    FSQRT_rec
+)>;
+
+// 7 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read],
+      (instrs
+    FCFID,
+    FCFIDS,
+    FCFIDU,
+    FCFIDUS,
+    FCTID,
+    FCTIDU,
+    FCTIDUZ,
+    FCTIDZ,
+    FCTIW,
+    FCTIWU,
+    FCTIWUZ,
+    FCTIWZ,
+    FRE,
+    FRES,
+    FRIMD, FRIMS,
+    FRIND, FRINS,
+    FRIPD, FRIPS,
+    FRIZD, FRIZS,
+    FRSP,
+    FRSQRTE,
+    FRSQRTES,
+    VCFSX, VCFSX_0,
+    VCFUX, VCFUX_0,
+    VCTSXS, VCTSXS_0,
+    VCTUXS, VCTUXS_0,
+    VLOGEFP,
+    VREFP,
+    VRFIM,
+    VRFIN,
+    VRFIP,
+    VRFIZ,
+    VRSQRTEFP,
+    XSCVDPHP,
+    XSCVDPSP,
+    XSCVDPSPN,
+    XSCVDPSXDS, XSCVDPSXDSs,
+    XSCVDPSXWS, XSCVDPSXWSs,
+    XSCVDPUXDS, XSCVDPUXDSs,
+    XSCVDPUXWS, XSCVDPUXWSs,
+    XSCVSPDP,
+    XSCVSXDDP,
+    XSCVSXDSP,
+    XSCVUXDDP,
+    XSCVUXDSP,
+    XSRDPI,
+    XSRDPIC,
+    XSRDPIM,
+    XSRDPIP,
+    XSRDPIZ,
+    XSREDP,
+    XSRESP,
+    XSRSP,
+    XSRSQRTEDP,
+    XSRSQRTESP,
+    XVCVDPSP,
+    XVCVDPSXDS,
+    XVCVDPSXWS,
+    XVCVDPUXDS,
+    XVCVDPUXWS,
+    XVCVSPBF16,
+    XVCVSPDP,
+    XVCVSPHP,
+    XVCVSPSXDS,
+    XVCVSPSXWS,
+    XVCVSPUXDS,
+    XVCVSPUXWS,
+    XVCVSXDDP,
+    XVCVSXDSP,
+    XVCVSXWDP,
+    XVCVSXWSP,
+    XVCVUXDDP,
+    XVCVUXDSP,
+    XVCVUXWDP,
+    XVCVUXWSP,
+    XVRDPI,
+    XVRDPIC,
+    XVRDPIM,
+    XVRDPIP,
+    XVRDPIZ,
+    XVREDP,
+    XVRESP,
+    XVRSPI,
+    XVRSPIC,
+    XVRSPIM,
+    XVRSPIP,
+    XVRSPIZ,
+    XVRSQRTEDP,
+    XVRSQRTESP
+)>;
+
+// 7 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+      (instrs
+    FADD,
+    FADDS,
+    FMUL,
+    FMULS,
+    FSUB,
+    FSUBS,
+    VADDFP,
+    VSUBFP,
+    XSADDDP,
+    XSADDSP,
+    XSMULDP,
+    XSMULSP,
+    XSSUBDP,
+    XSSUBSP,
+    XVADDDP,
+    XVADDSP,
+    XVMULDP,
+    XVMULSP,
+    XVSUBDP,
+    XVSUBSP
+)>;
+
+// 7 Cycles Binary Floating Point operations, 3 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read, P10BF_Read, P10BF_Read],
+      (instrs
+    FMADD,
+    FMADDS,
+    FMSUB,
+    FMSUBS,
+    FNMADD,
+    FNMADDS,
+    FNMSUB,
+    FNMSUBS,
+    FSELD, FSELS,
+    VMADDFP,
+    VNMSUBFP,
+    XSMADDADP,
+    XSMADDASP,
+    XSMADDMDP,
+    XSMADDMSP,
+    XSMSUBADP,
+    XSMSUBASP,
+    XSMSUBMDP,
+    XSMSUBMSP,
+    XSNMADDADP,
+    XSNMADDASP,
+    XSNMADDMDP,
+    XSNMADDMSP,
+    XSNMSUBADP,
+    XSNMSUBASP,
+    XSNMSUBMDP,
+    XSNMSUBMSP,
+    XVMADDADP,
+    XVMADDASP,
+    XVMADDMDP,
+    XVMADDMSP,
+    XVMSUBADP,
+    XVMSUBASP,
+    XVMSUBMDP,
+    XVMSUBMSP,
+    XVNMADDADP,
+    XVNMADDASP,
+    XVNMADDMDP,
+    XVNMADDMSP,
+    XVNMSUBADP,
+    XVNMSUBASP,
+    XVNMSUBMDP,
+    XVNMSUBMSP
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 7 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_BF_7C, P10W_DISP_ANY, P10BF_Read],
+      (instrs
+    VEXPTEFP
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    FADD_rec,
+    FADDS_rec,
+    FMUL_rec,
+    FMULS_rec,
+    FSUB_rec,
+    FSUBS_rec
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    FCFID_rec,
+    FCFIDS_rec,
+    FCFIDU_rec,
+    FCFIDUS_rec,
+    FCTID_rec,
+    FCTIDU_rec,
+    FCTIDUZ_rec,
+    FCTIDZ_rec,
+    FCTIW_rec,
+    FCTIWU_rec,
+    FCTIWUZ_rec,
+    FCTIWZ_rec,
+    FRE_rec,
+    FRES_rec,
+    FRIMD_rec, FRIMS_rec,
+    FRIND_rec, FRINS_rec,
+    FRIPD_rec, FRIPS_rec,
+    FRIZD_rec, FRIZS_rec,
+    FRSP_rec,
+    FRSQRTE_rec,
+    FRSQRTES_rec
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    FMADD_rec,
+    FMADDS_rec,
+    FMSUB_rec,
+    FMSUBS_rec,
+    FNMADD_rec,
+    FNMADDS_rec,
+    FNMSUB_rec,
+    FNMSUBS_rec,
+    FSELD_rec, FSELS_rec
+)>;
+
+// 2 Cycles Branch operations, 0 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY],
+      (instrs
+    BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
+    BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
+    BL, BL8, BL8_NOP, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_TLS, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_TLS
+)>;
+
+// 2 Cycles Branch operations, 1 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
+      (instrs
+    B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8,
+    BA, TAILBA, TAILBA8,
+    BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL_LWZinto_toc, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
+    BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
+    BLA, BLA8, BLA8_NOP
+)>;
+
+// 2 Cycles Branch operations, 3 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read],
+      (instrs
+    BCCTR, BCCTR8, BCCTR8n, BCCTRn, gBCCTR,
+    BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL
+)>;
+
+// 2 Cycles Branch operations, 4 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read, P10BR_Read],
+      (instrs
+    BDNZA, BDNZAm, BDNZAp, BDZA, BDZAm, BDZAp, gBCA, gBCAat,
+    BDNZLA, BDNZLAm, BDNZLAp, BDZLA, BDZLAm, BDZLAp, gBCLA, gBCLAat
+)>;
+
+// 7 Cycles Crypto operations, 1 input operands
+def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read],
+      (instrs
+    VSBOX
+)>;
+
+// 7 Cycles Crypto operations, 2 input operands
+def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read, P10CY_Read],
+      (instrs
+    CFUGED,
+    CNTLZDM,
+    CNTTZDM,
+    PDEPD,
+    PEXTD,
+    VCFUGED,
+    VCIPHER,
+    VCIPHERLAST,
+    VCLZDM,
+    VCTZDM,
+    VGNB,
+    VNCIPHER,
+    VNCIPHERLAST,
+    VPDEPD,
+    VPEXTD,
+    VPMSUMB,
+    VPMSUMD,
+    VPMSUMH,
+    VPMSUMW
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read],
+      (instrs
+    XSCVDPQP,
+    XSCVQPDP,
+    XSCVQPDPO,
+    XSCVQPSDZ,
+    XSCVQPSQZ,
+    XSCVQPSWZ,
+    XSCVQPUDZ,
+    XSCVQPUQZ,
+    XSCVQPUWZ,
+    XSCVSDQP,
+    XSCVSQQP,
+    XSCVUDQP,
+    XSCVUQQP
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+      (instrs
+    XSADDQP,
+    XSADDQPO,
+    XSSUBQP,
+    XSSUBQPO
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 3 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
+      (instrs
+    BCDSR_rec,
+    XSRQPI,
+    XSRQPIX,
+    XSRQPXP
+)>;
+
+// 2-way crack instructions
+// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY],
+      (instrs
+    HASHST,
+    HASHSTP
+)>;
+
+// 24 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_24C, P10W_DISP_ANY, P10DF_Read],
+      (instrs
+    BCDCTSQ_rec
+)>;
+
+// 25 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+      (instrs
+    XSMULQP,
+    XSMULQPO
+)>;
+
+// 25 Cycles Decimal Floating Point operations, 3 input operands
+def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
+      (instrs
+    XSMADDQP,
+    XSMADDQPO,
+    XSMSUBQP,
+    XSMSUBQPO,
+    XSNMADDQP,
+    XSNMADDQPO,
+    XSNMSUBQP,
+    XSNMSUBQPO
+)>;
+
+// 38 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+      (instrs
+    BCDCFSQ_rec
+)>;
+
+// 59 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_59C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+      (instrs
+    XSDIVQP,
+    XSDIVQPO
+)>;
+
+// 61 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_61C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+      (instrs
+    VDIVESQ,
+    VDIVEUQ,
+    VDIVSQ,
+    VDIVUQ
+)>;
+
+// 68 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_68C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+      (instrs
+    VMODSQ,
+    VMODUQ
+)>;
+
+// 77 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_77C, P10W_DISP_ANY, P10DF_Read],
+      (instrs
+    XSSQRTQP,
+    XSSQRTQPO
+)>;
+
+// 20 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_20C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+      (instrs
+    DIVW,
+    DIVWO,
+    DIVWU,
+    DIVWUO,
+    MODSW
+)>;
+
+// 2-way crack instructions
+// 20 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_20C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    DIVW_rec,
+    DIVWO_rec,
+    DIVWU_rec,
+    DIVWUO_rec
+)>;
+
+// 25 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_25C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+      (instrs
+    DIVD,
+    DIVDO,
+    DIVDU,
+    DIVDUO,
+    DIVWE,
+    DIVWEO,
+    DIVWEU,
+    DIVWEUO
+)>;
+
+// 2-way crack instructions
+// 25 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_25C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    DIVD_rec,
+    DIVDO_rec,
+    DIVDU_rec,
+    DIVDUO_rec,
+    DIVWE_rec,
+    DIVWEO_rec,
+    DIVWEU_rec,
+    DIVWEUO_rec
+)>;
+
+// 27 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_27C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+      (instrs
+    MODSD,
+    MODUD,
+    MODUW
+)>;
+
+// 41 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_41C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+      (instrs
+    DIVDE,
+    DIVDEO,
+    DIVDEU,
+    DIVDEUO
+)>;
+
+// 2-way crack instructions
+// 41 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_41C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    DIVDE_rec,
+    DIVDEO_rec,
+    DIVDEU_rec,
+    DIVDEUO_rec
+)>;
+
+// 43 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_43C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+      (instrs
+    VDIVSD,
+    VDIVUD
+)>;
+
+// 47 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_47C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+      (instrs
+    VMODSD,
+    VMODUD
+)>;
+
+// 54 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_54C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+      (instrs
+    VDIVSW,
+    VDIVUW
+)>;
+
+// 60 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_60C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+      (instrs
+    VMODSW,
+    VMODUW
+)>;
+
+// 75 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_75C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+      (instrs
+    VDIVESD,
+    VDIVEUD
+)>;
+
+// 83 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_83C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+      (instrs
+    VDIVESW,
+    VDIVEUW
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 1 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read],
+      (instrs
+    BCDCTN_rec,
+    VMUL10CUQ,
+    VMUL10UQ,
+    XSXSIGQP
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 2 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
+      (instrs
+    BCDCFN_rec,
+    BCDCFZ_rec,
+    BCDCPSGN_rec,
+    BCDCTZ_rec,
+    BCDSETSGN_rec,
+    BCDUS_rec,
+    BCDUTRUNC_rec,
+    VADDCUQ,
+    VADDUQM,
+    VMUL10ECUQ,
+    VMUL10EUQ,
+    VSUBCUQ,
+    VSUBUQM,
+    XSCMPEXPQP,
+    XSCMPOQP,
+    XSCMPUQP,
+    XSTSTDCQP,
+    XXGENPCVBM
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 3 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
+      (instrs
+    BCDS_rec,
+    BCDTRUNC_rec,
+    VADDECUQ,
+    VADDEUQM,
+    VSUBECUQ,
+    VSUBEUQM
+)>;
+
+// 4 Cycles ALU2 operations, 0 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY],
+      (instrs
+    TRAP, TW
+)>;
+
+// 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
+      (instrs
+    CNTLZD,
+    CNTLZD_rec,
+    CNTLZW, CNTLZW8,
+    CNTLZW8_rec, CNTLZW_rec,
+    CNTTZD,
+    CNTTZD_rec,
+    CNTTZW, CNTTZW8,
+    CNTTZW8_rec, CNTTZW_rec,
+    FTSQRT,
+    MTVSRBM,
+    MTVSRBMI,
+    MTVSRDM,
+    MTVSRHM,
+    MTVSRQM,
+    MTVSRWM,
+    POPCNTB, POPCNTB8,
+    POPCNTD,
+    POPCNTW,
+    VCLZB,
+    VCLZD,
+    VCLZH,
+    VCLZW,
+    VCTZB,
+    VCTZD,
+    VCTZH,
+    VCTZW,
+    VEXPANDBM,
+    VEXPANDDM,
+    VEXPANDHM,
+    VEXPANDQM,
+    VEXPANDWM,
+    VEXTRACTBM,
+    VEXTRACTDM,
+    VEXTRACTHM,
+    VEXTRACTQM,
+    VEXTRACTWM,
+    VPOPCNTB,
+    VPOPCNTD,
+    VPOPCNTH,
+    VPOPCNTW,
+    VPRTYBD,
+    VPRTYBW,
+    XSCVHPDP,
+    XSCVSPDPN,
+    XSTSQRTDP,
+    XVCVHPSP,
+    XVTLSBB,
+    XVTSQRTDP,
+    XVTSQRTSP
+)>;
+
+// 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
+      (instrs
+    CMPEQB,
+    EXTSWSLI_32_64_rec, EXTSWSLI_rec,
+    FCMPOD, FCMPOS,
+    FCMPUD, FCMPUS,
+    FTDIV,
+    SLD_rec,
+    SLW8_rec, SLW_rec,
+    SRD_rec,
+    SRW8_rec, SRW_rec,
+    VABSDUB,
+    VABSDUH,
+    VABSDUW,
+    VADDCUW,
+    VADDSBS,
+    VADDSHS,
+    VADDSWS,
+    VADDUBS,
+    VADDUHS,
+    VADDUWS,
+    VAVGSB,
+    VAVGSH,
+    VAVGSW,
+    VAVGUB,
+    VAVGUH,
+    VAVGUW,
+    VCMPBFP,
+    VCMPBFP_rec,
+    VCMPEQFP,
+    VCMPEQFP_rec,
+    VCMPEQUB_rec,
+    VCMPEQUD_rec,
+    VCMPEQUH_rec,
+    VCMPEQUQ,
+    VCMPEQUQ_rec,
+    VCMPEQUW_rec,
+    VCMPGEFP,
+    VCMPGEFP_rec,
+    VCMPGTFP,
+    VCMPGTFP_rec,
+    VCMPGTSB_rec,
+    VCMPGTSD_rec,
+    VCMPGTSH_rec,
+    VCMPGTSQ,
+    VCMPGTSQ_rec,
+    VCMPGTSW_rec,
+    VCMPGTUB_rec,
+    VCMPGTUD_rec,
+    VCMPGTUH_rec,
+    VCMPGTUQ,
+    VCMPGTUQ_rec,
+    VCMPGTUW_rec,
+    VCMPNEB_rec,
+    VCMPNEH_rec,
+    VCMPNEW_rec,
+    VCMPNEZB_rec,
+    VCMPNEZH_rec,
+    VCMPNEZW_rec,
+    VCMPSQ,
+    VCMPUQ,
+    VCNTMBB,
+    VCNTMBD,
+    VCNTMBH,
+    VCNTMBW,
+    VMAXFP,
+    VMINFP,
+    VSUBCUW,
+    VSUBSBS,
+    VSUBSHS,
+    VSUBSWS,
+    VSUBUBS,
+    VSUBUHS,
+    VSUBUWS,
+    XSCMPEQDP,
+    XSCMPEXPDP,
+    XSCMPGEDP,
+    XSCMPGTDP,
+    XSCMPODP,
+    XSCMPUDP,
+    XSMAXCDP,
+    XSMAXDP,
+    XSMAXJDP,
+    XSMINCDP,
+    XSMINDP,
+    XSMINJDP,
+    XSTDIVDP,
+    XSTSTDCDP,
+    XSTSTDCSP,
+    XVCMPEQDP,
+    XVCMPEQDP_rec,
+    XVCMPEQSP,
+    XVCMPEQSP_rec,
+    XVCMPGEDP,
+    XVCMPGEDP_rec,
+    XVCMPGESP,
+    XVCMPGESP_rec,
+    XVCMPGTDP,
+    XVCMPGTDP_rec,
+    XVCMPGTSP,
+    XVCMPGTSP_rec,
+    XVMAXDP,
+    XVMAXSP,
+    XVMINDP,
+    XVMINSP,
+    XVTDIVDP,
+    XVTDIVSP,
+    XVTSTDCDP,
+    XVTSTDCSP
+)>;
+
+// 4 Cycles ALU2 operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
+      (instrs
+    CMPRB, CMPRB8,
+    RLDCL_rec,
+    RLDCR_rec,
+    RLDIC_rec,
+    RLDICL_32_rec, RLDICL_rec,
+    RLDICR_rec,
+    TD,
+    TDI,
+    TWI,
+    VSHASIGMAD,
+    VSHASIGMAW
+)>;
+
+// 4 Cycles ALU2 operations, 4 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+      (instrs
+    RLDIMI_rec,
+    RLWINM8_rec, RLWINM_rec,
+    RLWNM8_rec, RLWNM_rec
+)>;
+
+// 4 Cycles ALU2 operations, 5 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+      (instrs
+    RLWIMI8_rec, RLWIMI_rec
+)>;
+
+// Single crack instructions
+// 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
+      (instrs
+    SRAD_rec,
+    SRADI_rec,
+    SRAW_rec,
+    SRAWI_rec
+)>;
+
+// Single crack instructions
+// 4 Cycles ALU2 operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
+      (instrs
+    TABORTDC,
+    TABORTDCI,
+    TABORTWC,
+    TABORTWCI
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles Permute operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+      (instrs
+    VRLQ,
+    VRLQNM,
+    VSLQ,
+    VSRAQ,
+    VSRQ
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+      (instrs
+    VRLQMI
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles ALU2 operations, 0 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_PAIR, P10W_F2_4C],
+      (instrs
+    MFCR, MFCR8
+)>;
+
+// 2 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_2C, P10W_DISP_ANY, P10FX_Read],
+      (instrs
+    MTCTR, MTCTR8, MTCTR8loop, MTCTRloop,
+    MTLR, MTLR8
+)>;
+
+// 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    CR6SET, CREQV, CRSET,
+    DSS, DSSALL,
+    MCRXRX,
+    MFCTR, MFCTR8,
+    MFLR, MFLR8,
+    NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8,
+    VXOR, V_SET0, V_SET0B, V_SET0H,
+    XXLEQV, XXLEQVOnes,
+    XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz
+)>;
+
+// 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
+      (instrs
+    ADDI, ADDI8, ADDIdtprelL32,  ADDItlsldLADDR32,  ADDItocL, LI, LI8,
+    ADDIS, ADDIS8,  ADDISdtprelHA32, ADDIStocHA,  ADDIStocHA8, LIS, LIS8,
+    ADDME, ADDME8,
+    ADDME8O, ADDMEO,
+    ADDZE, ADDZE8,
+    ADDZE8O, ADDZEO,
+    EXTSB, EXTSB8, EXTSB8_32_64,
+    EXTSB8_rec, EXTSB_rec,
+    EXTSH, EXTSH8, EXTSH8_32_64,
+    EXTSH8_rec, EXTSH_rec,
+    EXTSW, EXTSW_32, EXTSW_32_64,
+    EXTSW_32_64_rec, EXTSW_rec,
+    FABSD, FABSS,
+    FMR,
+    FNABSD, FNABSS,
+    FNEGD, FNEGS,
+    MCRF,
+    MFOCRF, MFOCRF8,
+    MFVRD, MFVSRD,
+    MFVRWZ, MFVSRWZ,
+    MTOCRF, MTOCRF8,
+    MTVRD, MTVSRD,
+    MTVRWA, MTVSRWA,
+    MTVRWZ, MTVSRWZ,
+    NEG, NEG8,
+    NEG8_rec, NEG_rec,
+    NEG8O, NEGO,
+    SETB, SETB8,
+    SETBC, SETBC8,
+    SETBCR, SETBCR8,
+    SETNBC, SETNBC8,
+    SETNBCR, SETNBCR8,
+    SUBFME, SUBFME8,
+    SUBFME8O, SUBFMEO,
+    SUBFZE, SUBFZE8,
+    SUBFZE8O, SUBFZEO,
+    VEXTSB2D, VEXTSB2Ds,
+    VEXTSB2W, VEXTSB2Ws,
+    VEXTSD2Q,
+    VEXTSH2D, VEXTSH2Ds,
+    VEXTSH2W, VEXTSH2Ws,
+    VEXTSW2D, VEXTSW2Ds,
+    VNEGD,
+    VNEGW,
+    WAIT,
+    XSABSDP,
+    XSABSQP,
+    XSNABSDP,
+    XSNABSQP,
+    XSNEGDP,
+    XSNEGQP,
+    XSXEXPDP,
+    XSXEXPQP,
+    XSXSIGDP,
+    XVABSDP,
+    XVABSSP,
+    XVNABSDP,
+    XVNABSSP,
+    XVNEGDP,
+    XVNEGSP,
+    XVXEXPDP,
+    XVXEXPSP,
+    XVXSIGDP,
+    XVXSIGSP
+)>;
+
+// 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+      (instrs
+    ADD4, ADD4TLS, ADD8, ADD8TLS, ADD8TLS_,
+    ADD4_rec, ADD8_rec,
+    ADDE, ADDE8,
+    ADDE8O, ADDEO,
+    ADDIC, ADDIC8,
+    ADD4O, ADD8O,
+    AND, AND8,
+    AND8_rec, AND_rec,
+    ANDC, ANDC8,
+    ANDC8_rec, ANDC_rec,
+    ANDI8_rec, ANDI_rec,
+    ANDIS8_rec, ANDIS_rec,
+    CMPD, CMPW,
+    CMPB, CMPB8,
+    CMPDI, CMPWI,
+    CMPLD, CMPLW,
+    CMPLDI, CMPLWI,
+    CRAND,
+    CRANDC,
+    CRNAND,
+    CRNOR,
+    CROR,
+    CRORC,
+    CR6UNSET, CRUNSET, CRXOR,
+    EQV, EQV8,
+    EQV8_rec, EQV_rec,
+    EXTSWSLI, EXTSWSLI_32_64,
+    FCPSGND, FCPSGNS,
+    NAND, NAND8,
+    NAND8_rec, NAND_rec,
+    NOR, NOR8,
+    NOR8_rec, NOR_rec,
+    COPY, OR, OR8,
+    OR8_rec, OR_rec,
+    ORC, ORC8,
+    ORC8_rec, ORC_rec,
+    ORIS, ORIS8,
+    SLD,
+    SLW, SLW8,
+    SRAD,
+    SRADI, SRADI_32,
+    SRAW,
+    SRAWI,
+    SRD,
+    SRW, SRW8,
+    SUBF, SUBF8,
+    SUBF8_rec, SUBF_rec,
+    SUBFE, SUBFE8,
+    SUBFE8O, SUBFEO,
+    SUBFIC, SUBFIC8,
+    SUBF8O, SUBFO,
+    VADDUBM,
+    VADDUDM,
+    VADDUHM,
+    VADDUWM,
+    VAND,
+    VANDC,
+    VCMPEQUB,
+    VCMPEQUD,
+    VCMPEQUH,
+    VCMPEQUW,
+    VCMPGTSB,
+    VCMPGTSD,
+    VCMPGTSH,
+    VCMPGTSW,
+    VCMPGTUB,
+    VCMPGTUD,
+    VCMPGTUH,
+    VCMPGTUW,
+    VCMPNEB,
+    VCMPNEH,
+    VCMPNEW,
+    VCMPNEZB,
+    VCMPNEZH,
+    VCMPNEZW,
+    VEQV,
+    VMAXSB,
+    VMAXSD,
+    VMAXSH,
+    VMAXSW,
+    VMAXUB,
+    VMAXUD,
+    VMAXUH,
+    VMAXUW,
+    VMINSB,
+    VMINSD,
+    VMINSH,
+    VMINSW,
+    VMINUB,
+    VMINUD,
+    VMINUH,
+    VMINUW,
+    VMRGEW,
+    VMRGOW,
+    VNAND,
+    VNOR,
+    VOR,
+    VORC,
+    VRLB,
+    VRLD,
+    VRLDNM,
+    VRLH,
+    VRLW,
+    VRLWNM,
+    VSLB,
+    VSLD,
+    VSLH,
+    VSLW,
+    VSRAB,
+    VSRAD,
+    VSRAH,
+    VSRAW,
+    VSRB,
+    VSRD,
+    VSRH,
+    VSRW,
+    VSUBUBM,
+    VSUBUDM,
+    VSUBUHM,
+    VSUBUWM,
+    XOR, XOR8,
+    XOR8_rec, XOR_rec,
+    XORI, XORI8,
+    XORIS, XORIS8,
+    XSCPSGNDP,
+    XSCPSGNQP,
+    XSIEXPDP,
+    XSIEXPQP,
+    XVCPSGNDP,
+    XVCPSGNSP,
+    XVIEXPDP,
+    XVIEXPSP,
+    XXLAND,
+    XXLANDC,
+    XXLNAND,
+    XXLNOR,
+    XXLOR, XXLORf,
+    XXLORC
+)>;
+
+// 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
+      (instrs
+    ADDEX, ADDEX8,
+    DST, DST64, DSTT, DSTT64,
+    DSTST, DSTST64, DSTSTT, DSTSTT64,
+    ISEL, ISEL8,
+    RLDCL,
+    RLDCR,
+    RLDIC,
+    RLDICL, RLDICL_32, RLDICL_32_64,
+    RLDICR, RLDICR_32,
+    VRLDMI,
+    VRLWMI,
+    VSEL,
+    XXSEL
+)>;
+
+// 3 Cycles ALU operations, 4 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+      (instrs
+    RLDIMI,
+    RLWINM, RLWINM8,
+    RLWNM, RLWNM8
+)>;
+
+// 3 Cycles ALU operations, 5 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+      (instrs
+    RLWIMI, RLWIMI8
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+      (instrs
+    MFFS,
+    MFFS_rec,
+    MFFSL,
+    MFVSCR,
+    TRECHKPT
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
+      (instrs
+    ADDME8_rec, ADDME_rec,
+    ADDME8O_rec, ADDMEO_rec,
+    ADDZE8_rec, ADDZE_rec,
+    ADDZE8O_rec, ADDZEO_rec,
+    MCRFS,
+    MFFSCDRN,
+    MFFSCDRNI,
+    MFFSCRN,
+    MFFSCRNI,
+    MTFSB0,
+    MTVSCR,
+    NEG8O_rec, NEGO_rec,
+    SUBFME8_rec, SUBFME_rec,
+    SUBFME8O_rec, SUBFMEO_rec,
+    SUBFZE8_rec, SUBFZE_rec,
+    SUBFZE8O_rec, SUBFZEO_rec,
+    TABORT,
+    TBEGIN,
+    TRECLAIM,
+    TSR
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+      (instrs
+    ADDE8_rec, ADDE_rec,
+    ADDE8O_rec, ADDEO_rec,
+    ADDIC_rec,
+    ADD4O_rec, ADD8O_rec,
+    SUBFE8_rec, SUBFE_rec,
+    SUBFE8O_rec, SUBFEO_rec,
+    SUBF8O_rec, SUBFO_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    HRFID,
+    MFFSCE,
+    RFID,
+    STOP
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
+      (instrs
+    FABSD_rec, FABSS_rec,
+    FMR_rec,
+    FNABSD_rec, FNABSS_rec,
+    FNEGD_rec, FNEGS_rec,
+    MTFSB1,
+    RFEBB,
+    SC
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+      (instrs
+    ADDC, ADDC8,
+    ADDC8_rec, ADDC_rec,
+    ADDC8O, ADDCO,
+    FCPSGND_rec, FCPSGNS_rec,
+    MTFSF, MTFSFb,
+    MTFSFI, MTFSFIb,
+    SUBFC, SUBFC8,
+    SUBFC8_rec, SUBFC_rec,
+    SUBFC8O, SUBFCO
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
+      (instrs
+    MTFSFI_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 4 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+      (instrs
+    MTFSF_rec
+)>;
+
+// 4-way crack instructions
+// 3 Cycles ALU operations, 3 Cycles ALU operations, 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+      (instrs
+    ADDC8O_rec, ADDCO_rec,
+    SUBFC8O_rec, SUBFCO_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+      (instrs
+    VSTRIBL_rec,
+    VSTRIBR_rec,
+    VSTRIHL_rec,
+    VSTRIHR_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read, P10FX_Read],
+      (instrs
+    MTCRF, MTCRF8
+)>;
+
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+      (instrs
+    LBZ, LBZ8,
+    LD,  LDtoc,  LDtocBA,  LDtocCPT,  LDtocJTI,  LDtocL, SPILLTOVSR_LD,
+    LDBRX,
+     DFLOADf32, DFLOADf64, LFD,
+    LFDX,  XFLOADf32, XFLOADf64,
+    LFIWAX, LIWAX,
+    LFIWZX, LIWZX,
+    LHA, LHA8,
+    LHAX, LHAX8,
+    LHBRX, LHBRX8,
+    LHZ, LHZ8,
+    LVEBX,
+    LVEHX,
+    LVEWX,
+    LVX,
+    LVXL,
+    LWA, LWA_32,
+    LWAX, LWAX_32,
+    LWBRX, LWBRX8,
+    LWZ, LWZ8,  LWZtoc, LWZtocL,
+    LXSD,
+    LXSDX,
+    LXSIBZX,
+    LXSIHZX,
+    LXSIWAX,
+    LXSIWZX,
+    LXV,
+    LXVB16X,
+    LXVD2X,
+    LXVDSX,
+    LXVH8X,
+    LXVRBX,
+    LXVRDX,
+    LXVRHX,
+    LXVRWX,
+    LXVW4X,
+    LXVWSX,
+    LXVX
+)>;
+
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+      (instrs
+    DCBT,
+    DCBTST,
+    ICBT,
+    LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
+    LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
+    LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
+    LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
+    LXVL,
+    LXVLL
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
+      (instrs
+    HASHCHK,
+    HASHCHKP
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY],
+      (instrs
+    SLBIA
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read],
+      (instrs
+    DARN,
+    LBARX, LBARXL,
+    LDARX, LDARXL,
+    LHARX, LHARXL,
+    LWARX, LWARXL,
+    SLBFEE_rec,
+    SLBIE,
+    SLBMFEE,
+    SLBMFEV
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+      (instrs
+    LBZCIX,
+    LDCIX,
+    LHZCIX,
+    LWZCIX,
+    MTSPR, MTSPR8, MTSR, MTVRSAVE, MTVRSAVEv
+)>;
+
+// Expand instructions
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+      (instrs
+    LMW
+)>;
+
+// Expand instructions
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+      (instrs
+    LSWI
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY],
+      (instrs
+    LBZU, LBZU8,
+    LBZUX, LBZUX8,
+    LDU,
+    LDUX,
+    LFDU,
+    LFDUX,
+    LHAU, LHAU8,
+    LHAUX, LHAUX8,
+    LHZU, LHZU8,
+    LHZUX, LHZUX8,
+    LWAUX,
+    LWZU, LWZU8,
+    LWZUX, LWZUX8
+)>;
+
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+      (instrs
+    PLBZ, PLBZ8, PLBZ8pc, PLBZpc,
+    PLD, PLDpc,
+    PLFD, PLFDpc,
+    PLFS, PLFSpc,
+    PLHA, PLHA8, PLHA8pc, PLHApc,
+    PLHZ, PLHZ8, PLHZ8pc, PLHZpc,
+    PLWA, PLWA8, PLWA8pc, PLWApc,
+    PLWZ, PLWZ8, PLWZ8pc, PLWZpc,
+    PLXSD, PLXSDpc,
+    PLXSSP, PLXSSPpc,
+    PLXV, PLXVpc,
+    PLXVP, PLXVPpc
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
+      (instrs
+    LFS,
+    LFSX,
+    LXSSP,
+    LXSSPX
+)>;
+
+// 4-way crack instructions
+// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    LFSU,
+    LFSUX
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+      (instrs
+    TLBIEL
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read, P10LD_Read],
+      (instrs
+    SLBMTE
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
+      (instrs
+    LXVP,
+    LXVPX
+)>;
+
+// Single crack instructions
+// 13 Cycles Unknown operations, 1 input operands
+def : InstRW<[P10W_MFL_13C, P10W_DISP_EVEN, P10W_DISP_ANY],
+      (instrs
+    MFSPR, MFSPR8, MFSR, MFTB8, MFVRSAVE, MFVRSAVEv
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 0 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY],
+      (instrs
+    XXSETACCZ
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 2 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read],
+      (instrs
+    XVBF16GER2,
+    XVF16GER2,
+    XVF32GER,
+    XVF64GER,
+    XVI16GER2,
+    XVI16GER2S,
+    XVI4GER8,
+    XVI8GER4
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read, P10MM_Read],
+      (instrs
+    XVBF16GER2NN,
+    XVBF16GER2NP,
+    XVBF16GER2PN,
+    XVBF16GER2PP,
+    XVF16GER2NN,
+    XVF16GER2NP,
+    XVF16GER2PN,
+    XVF16GER2PP,
+    XVF32GERNN,
+    XVF32GERNP,
+    XVF32GERPN,
+    XVF32GERPP,
+    XVF64GERNN,
+    XVF64GERNP,
+    XVF64GERPN,
+    XVF64GERPP,
+    XVI16GER2PP,
+    XVI16GER2SPP,
+    XVI4GER8PP,
+    XVI8GER4PP,
+    XVI8GER4SPP
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 4 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+      (instrs
+    PMXVF32GER,
+    PMXVF64GER
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 5 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+      (instrs
+    PMXVBF16GER2,
+    PMXVF16GER2,
+    PMXVF32GERNN,
+    PMXVF32GERNP,
+    PMXVF32GERPN,
+    PMXVF32GERPP,
+    PMXVF64GERNN,
+    PMXVF64GERNP,
+    PMXVF64GERPN,
+    PMXVF64GERPP,
+    PMXVI16GER2,
+    PMXVI16GER2S,
+    PMXVI4GER8,
+    PMXVI8GER4
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 6 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+      (instrs
+    PMXVBF16GER2NN,
+    PMXVBF16GER2NP,
+    PMXVBF16GER2PN,
+    PMXVBF16GER2PP,
+    PMXVF16GER2NN,
+    PMXVF16GER2NP,
+    PMXVF16GER2PN,
+    PMXVF16GER2PP,
+    PMXVI16GER2PP,
+    PMXVI16GER2SPP,
+    PMXVI4GER8PP,
+    PMXVI8GER4PP,
+    PMXVI8GER4SPP
+)>;
+
+// 2-way crack instructions
+// 10 Cycles SIMD Matrix Multiply Engine operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C],
+      (instrs
+    XXMTACC
+)>;
+
+// 4-way crack instructions
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 Cycles ALU operations, 10 Cycles SIMD Matrix Multiply Engine operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C, P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C],
+      (instrs
+    XXMFACC
+)>;
+
+// 5 Cycles GPR Multiply operations, 2 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read],
+      (instrs
+    MULHD,
+    MULHDU,
+    MULHW,
+    MULHWU,
+    MULLD,
+    MULLDO,
+    MULLI, MULLI8,
+    MULLW,
+    MULLWO,
+    VMULHSD,
+    VMULHUD,
+    VMULLD
+)>;
+
+// 5 Cycles GPR Multiply operations, 3 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read, P10MU_Read],
+      (instrs
+    MADDHD,
+    MADDHDU,
+    MADDLD, MADDLD8
+)>;
+
+// 2-way crack instructions
+// 5 Cycles GPR Multiply operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    MULHD_rec,
+    MULHDU_rec,
+    MULHW_rec,
+    MULHWU_rec,
+    MULLD_rec,
+    MULLDO_rec,
+    MULLW_rec,
+    MULLWO_rec
+)>;
+
+// 4 Cycles Permute operations, 0 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY],
+      (instrs
+    VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH
+)>;
+
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
+      (instrs
+    LVSL,
+    LVSR,
+    MFVSRLD,
+    MTVSRWS,
+    VCLZLSBB,
+    VCTZLSBB,
+    VGBBD,
+    VPRTYBQ,
+    VSPLTISB,
+    VSPLTISH,
+    VSTRIBL,
+    VSTRIBR,
+    VSTRIHL,
+    VSTRIHR,
+    VUPKHPX,
+    VUPKHSB,
+    VUPKHSH,
+    VUPKHSW,
+    VUPKLPX,
+    VUPKLSB,
+    VUPKLSH,
+    VUPKLSW,
+    XVCVBF16SPN,
+    XXBRD,
+    XXBRH,
+    XXBRQ,
+    XXBRW,
+    XXSPLTIB
+)>;
+
+// 4 Cycles Permute operations, 2 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read],
+      (instrs
+    BPERMD,
+    MTVSRDD,
+    VBPERMD,
+    VBPERMQ,
+    VCLRLB,
+    VCLRRB,
+    VEXTRACTD,
+    VEXTRACTUB,
+    VEXTRACTUH,
+    VEXTRACTUW,
+    VEXTUBLX,
+    VEXTUBRX,
+    VEXTUHLX,
+    VEXTUHRX,
+    VEXTUWLX,
+    VEXTUWRX,
+    VINSERTD,
+    VINSERTW,
+    VMRGHB,
+    VMRGHH,
+    VMRGHW,
+    VMRGLB,
+    VMRGLH,
+    VMRGLW,
+    VPKPX,
+    VPKSDSS,
+    VPKSDUS,
+    VPKSHSS,
+    VPKSHUS,
+    VPKSWSS,
+    VPKSWUS,
+    VPKUDUM,
+    VPKUDUS,
+    VPKUHUM,
+    VPKUHUS,
+    VPKUWUM,
+    VPKUWUS,
+    VSL,
+    VSLO,
+    VSLV,
+    VSPLTB, VSPLTBs,
+    VSPLTH, VSPLTHs,
+    VSPLTW,
+    VSR,
+    VSRO,
+    VSRV,
+    XXEXTRACTUW,
+    XXGENPCVDM,
+    XXGENPCVHM,
+    XXGENPCVWM,
+    XXMRGHW,
+    XXMRGLW,
+    XXPERM,
+    XXPERMDI, XXPERMDIs,
+    XXPERMR,
+    XXSLDWI, XXSLDWIs,
+    XXSPLTW, XXSPLTWs
+)>;
+
+// 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read],
+      (instrs
+    VEXTDDVLX,
+    VEXTDDVRX,
+    VEXTDUBVLX,
+    VEXTDUBVRX,
+    VEXTDUHVLX,
+    VEXTDUHVRX,
+    VEXTDUWVLX,
+    VEXTDUWVRX,
+    VINSBLX,
+    VINSBRX,
+    VINSBVLX,
+    VINSBVRX,
+    VINSD,
+    VINSDLX,
+    VINSDRX,
+    VINSERTB,
+    VINSERTH,
+    VINSHLX,
+    VINSHRX,
+    VINSHVLX,
+    VINSHVRX,
+    VINSW,
+    VINSWLX,
+    VINSWRX,
+    VINSWVLX,
+    VINSWVRX,
+    VPERM,
+    VPERMR,
+    VPERMXOR,
+    VSLDBI,
+    VSLDOI,
+    VSRDBI,
+    XXINSERTW
+)>;
+
+// 2-way crack instructions
+// 4 Cycles Permute operations, and 7 Cycles VMX Multiply operations, 2 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_EVEN, P10W_vMU_7C, P10W_DISP_ANY],
+      (instrs
+    VSUMSWS
+)>;
+
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read],
+      (instrs
+    XXSPLTIDP,
+    XXSPLTIW
+)>;
+
+// 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read],
+      (instrs
+    XXBLENDVB,
+    XXBLENDVD,
+    XXBLENDVH,
+    XXBLENDVW,
+    XXSPLTI32DX
+)>;
+
+// 4 Cycles Permute operations, 4 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P10PM_Read],
+      (instrs
+    XXEVAL,
+    XXPERMX
+)>;
+
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read],
+      (instrs
+    DCBST,
+    DCBZ,
+    ICBI
+)>;
+
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+      (instrs
+    DCBF,
+    PSTXVP, PSTXVPpc,
+    STB, STB8,
+    STBU, STBU8,
+    STBUX, STBUX8,
+    SPILLTOVSR_ST, STD,
+    STDBRX,
+    STDU,
+    STDUX,
+     DFSTOREf32, DFSTOREf64, STFD,
+    STFDU,
+    STFDUX,
+    STFDX,
+    STFIWX, STIWX,
+    STFS,
+    STFSU,
+    STFSUX,
+    STFSX,
+    STH, STH8,
+    STHBRX,
+    STHU, STHU8,
+    STHUX, STHUX8,
+    STVEBX,
+    STVEHX,
+    STVEWX,
+    STVX,
+    STVXL,
+    STW, STW8,
+    STWBRX,
+    STWU, STWU8,
+    STWUX, STWUX8,
+    STXSD,
+    STXSDX,
+    STXSIBX, STXSIBXv,
+    STXSIHX, STXSIHXv,
+    STXSIWX,
+    STXSSP,
+    STXSSPX,
+    STXV,
+    STXVB16X,
+    STXVD2X,
+    STXVH8X,
+    STXVRBX,
+    STXVRDX,
+    STXVRHX,
+    STXVRWX,
+    STXVW4X,
+    STXVX
+)>;
+
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+      (instrs
+    CP_COPY, CP_COPY8,
+    STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
+    SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
+    STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
+    STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
+    STXVL,
+    STXVLL
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 0 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+      (instrs
+    EnforceIEIO,
+    MSGSYNC,
+    SLBSYNC,
+    TCHECK,
+    TLBSYNC
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read],
+      (instrs
+    TEND
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+      (instrs
+    SLBIEG,
+    STBCX,
+    STDCX,
+    STHCX,
+    STWCX,
+    TLBIE
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+      (instrs
+    CP_PASTE8_rec, CP_PASTE_rec,
+    STBCIX,
+    STDCIX,
+    STHCIX,
+    STWCIX
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    ISYNC
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    SYNC
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
+      (instrs
+    LDAT,
+    LWAT
+)>;
+
+// 4-way crack instructions
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
+      (instrs
+    STDAT,
+    STWAT
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+      (instrs
+    STMW
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+      (instrs
+    STSWI
+)>;
+
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read],
+      (instrs
+    PSTB, PSTB8, PSTB8pc, PSTBpc,
+    PSTD, PSTDpc,
+    PSTFD, PSTFDpc,
+    PSTFS, PSTFSpc,
+    PSTH, PSTH8, PSTH8pc, PSTHpc,
+    PSTW, PSTW8, PSTW8pc, PSTWpc,
+    PSTXSD, PSTXSDpc,
+    PSTXSSP, PSTXSSPpc,
+    PSTXV, PSTXVpc
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read],
+      (instrs
+    STXVP,
+    STXVPX
+)>;
+
+// FIXME - Miss scheduling information from datasheet
+// Temporary set it as 1 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_SX, P10W_DISP_ANY],
+      (instrs
+    ATTN,
+    CP_ABORT,
+    DCBA,
+    DCBI,
+    DCBZL,
+    DCCCI,
+    ICBLC,
+    ICBLQ,
+    ICBTLS,
+    ICCCI,
+    LA,
+    LDMX,
+    MFDCR,
+    MFPMR,
+    MFSRIN,
+    MSYNC,
+    MTDCR,
+    MTPMR,
+    MTSRIN,
+    NAP,
+    TLBIA,
+    TLBLD,
+    TLBLI,
+    TLBRE2,
+    TLBSX2,
+    TLBSX2D,
+    TLBWE2
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+      (instrs
+    CLRBHRB,
+    MFMSR
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read],
+      (instrs
+    MFTB
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read, P10SX_Read],
+      (instrs
+    MFBHRBE,
+    MTMSR,
+    MTMSRD
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    ADDPCIS
+)>;
+
+// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_PAIR, P10SX_Read],
+      (instrs
+    PADDI, PADDI8, PADDI8pc, PADDIpc, PLI, PLI8
+)>;
+
+// 7 Cycles VMX Multiply operations, 2 input operands
+def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read],
+      (instrs
+    VMULESB,
+    VMULESD,
+    VMULESH,
+    VMULESW,
+    VMULEUB,
+    VMULEUD,
+    VMULEUH,
+    VMULEUW,
+    VMULHSW,
+    VMULHUW,
+    VMULOSB,
+    VMULOSD,
+    VMULOSH,
+    VMULOSW,
+    VMULOUB,
+    VMULOUD,
+    VMULOUH,
+    VMULOUW,
+    VMULUWM,
+    VSUM2SWS,
+    VSUM4SBS,
+    VSUM4SHS,
+    VSUM4UBS
+)>;
+
+// 7 Cycles VMX Multiply operations, 3 input operands
+def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read, P10vMU_Read],
+      (instrs
+    VMHADDSHS,
+    VMHRADDSHS,
+    VMLADDUHM,
+    VMSUMCUD,
+    VMSUMMBM,
+    VMSUMSHM,
+    VMSUMSHS,
+    VMSUMUBM,
+    VMSUMUDM,
+    VMSUMUHM,
+    VMSUMUHS
+)>;

diff  --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index e41e0133a7b9..8e983acb450b 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -590,8 +590,7 @@ def : ProcessorModel<"pwr6x", G5Model,
 def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
 def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
 def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
-// No scheduler model yet.
-def : ProcessorModel<"pwr10", P9Model, ProcessorFeatures.P10Features>;
+def : ProcessorModel<"pwr10", P10Model, ProcessorFeatures.P10Features>;
 // No scheduler model for future CPU.
 def : ProcessorModel<"future", NoSchedModel,
                   ProcessorFeatures.FutureFeatures>;

diff  --git a/llvm/lib/Target/PowerPC/PPCSchedPredicates.td b/llvm/lib/Target/PowerPC/PPCSchedPredicates.td
new file mode 100644
index 000000000000..18f325e99a60
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCSchedPredicates.td
@@ -0,0 +1,294 @@
+//===--- PPCSchedPredicates.td - PowerPC Scheduling Preds -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines scheduling predicate definitions that are used by the
+// PowerPC subtargets.
+//===----------------------------------------------------------------------===//
+// Identify instructions that write BF pipelines with 7 cycles.
+def P10W_BF_7C_Pred : MCSchedPredicate<
+      CheckOpcode<[FADD,
+                   FADDS,
+                   FADDS_rec,
+                   FADD_rec,
+                   FCFID,
+                   FCFIDS,
+                   FCFIDS_rec,
+                   FCFIDU,
+                   FCFIDUS,
+                   FCFIDUS_rec,
+                   FCFIDU_rec,
+                   FCFID_rec,
+                   FCTID,
+                   FCTIDU,
+                   FCTIDUZ,
+                   FCTIDUZ_rec,
+                   FCTIDU_rec,
+                   FCTIDZ,
+                   FCTIDZ_rec,
+                   FCTID_rec,
+                   FCTIW,
+                   FCTIWU,
+                   FCTIWUZ,
+                   FCTIWUZ_rec,
+                   FCTIWU_rec,
+                   FCTIWZ,
+                   FCTIWZ_rec,
+                   FCTIW_rec,
+                   FMADD,
+                   FMADDS,
+                   FMADDS_rec,
+                   FMADD_rec,
+                   FMSUB,
+                   FMSUBS,
+                   FMSUBS_rec,
+                   FMSUB_rec,
+                   FMUL,
+                   FMULS,
+                   FMULS_rec,
+                   FMUL_rec,
+                   FNMADD,
+                   FNMADDS,
+                   FNMADDS_rec,
+                   FNMADD_rec,
+                   FNMSUB,
+                   FNMSUBS,
+                   FNMSUBS_rec,
+                   FNMSUB_rec,
+                   FRE,
+                   FRES,
+                   FRES_rec,
+                   FRE_rec,
+                   FRIMD, FRIMS,
+                   FRIMD_rec, FRIMS_rec,
+                   FRIND, FRINS,
+                   FRIND_rec, FRINS_rec,
+                   FRIPD, FRIPS,
+                   FRIPD_rec, FRIPS_rec,
+                   FRIZD, FRIZS,
+                   FRIZD_rec, FRIZS_rec,
+                   FRSP,
+                   FRSP_rec,
+                   FRSQRTE,
+                   FRSQRTES,
+                   FRSQRTES_rec,
+                   FRSQRTE_rec,
+                   FSELD, FSELS,
+                   FSELD_rec, FSELS_rec,
+                   FSUB,
+                   FSUBS,
+                   FSUBS_rec,
+                   FSUB_rec,
+                   VADDFP,
+                   VCFSX, VCFSX_0,
+                   VCFUX, VCFUX_0,
+                   VCTSXS, VCTSXS_0,
+                   VCTUXS, VCTUXS_0,
+                   VEXPTEFP,
+                   VEXPTEFP,
+                   VLOGEFP,
+                   VMADDFP,
+                   VNMSUBFP,
+                   VREFP,
+                   VRFIM,
+                   VRFIN,
+                   VRFIP,
+                   VRFIZ,
+                   VRSQRTEFP,
+                   VSUBFP,
+                   XSADDDP,
+                   XSADDSP,
+                   XSCVDPHP,
+                   XSCVDPSP,
+                   XSCVDPSPN,
+                   XSCVDPSXDS, XSCVDPSXDSs,
+                   XSCVDPSXWS, XSCVDPSXWSs,
+                   XSCVDPUXDS, XSCVDPUXDSs,
+                   XSCVDPUXWS, XSCVDPUXWSs,
+                   XSCVSPDP,
+                   XSCVSXDDP,
+                   XSCVSXDSP,
+                   XSCVUXDDP,
+                   XSCVUXDSP,
+                   XSMADDADP,
+                   XSMADDASP,
+                   XSMADDMDP,
+                   XSMADDMSP,
+                   XSMSUBADP,
+                   XSMSUBASP,
+                   XSMSUBMDP,
+                   XSMSUBMSP,
+                   XSMULDP,
+                   XSMULSP,
+                   XSNMADDADP,
+                   XSNMADDASP,
+                   XSNMADDMDP,
+                   XSNMADDMSP,
+                   XSNMSUBADP,
+                   XSNMSUBASP,
+                   XSNMSUBMDP,
+                   XSNMSUBMSP,
+                   XSRDPI,
+                   XSRDPIC,
+                   XSRDPIM,
+                   XSRDPIP,
+                   XSRDPIZ,
+                   XSREDP,
+                   XSRESP,
+                   XSRSP,
+                   XSRSQRTEDP,
+                   XSRSQRTESP,
+                   XSSUBDP,
+                   XSSUBSP,
+                   XVADDDP,
+                   XVADDSP,
+                   XVCVDPSP,
+                   XVCVDPSXDS,
+                   XVCVDPSXWS,
+                   XVCVDPUXDS,
+                   XVCVDPUXWS,
+                   XVCVSPBF16,
+                   XVCVSPDP,
+                   XVCVSPHP,
+                   XVCVSPSXDS,
+                   XVCVSPSXWS,
+                   XVCVSPUXDS,
+                   XVCVSPUXWS,
+                   XVCVSXDDP,
+                   XVCVSXDSP,
+                   XVCVSXWDP,
+                   XVCVSXWSP,
+                   XVCVUXDDP,
+                   XVCVUXDSP,
+                   XVCVUXWDP,
+                   XVCVUXWSP,
+                   XVMADDADP,
+                   XVMADDASP,
+                   XVMADDMDP,
+                   XVMADDMSP,
+                   XVMSUBADP,
+                   XVMSUBASP,
+                   XVMSUBMDP,
+                   XVMSUBMSP,
+                   XVMULDP,
+                   XVMULSP,
+                   XVNMADDADP,
+                   XVNMADDASP,
+                   XVNMADDMDP,
+                   XVNMADDMSP,
+                   XVNMSUBADP,
+                   XVNMSUBASP,
+                   XVNMSUBMDP,
+                   XVNMSUBMSP,
+                   XVRDPI,
+                   XVRDPIC,
+                   XVRDPIM,
+                   XVRDPIP,
+                   XVRDPIZ,
+                   XVREDP,
+                   XVRESP,
+                   XVRSPI,
+                   XVRSPIC,
+                   XVRSPIM,
+                   XVRSPIP,
+                   XVRSPIZ,
+                   XVRSQRTEDP,
+                   XVRSQRTESP,
+                   XVSUBDP,
+                   XVSUBSP]>
+>;
+
+// Identify instructions that write CY pipelines with 7 cycles.
+def P10W_CY_7C_Pred : MCSchedPredicate<
+      CheckOpcode<[CFUGED,
+                   CNTLZDM,
+                   CNTTZDM,
+                   PDEPD,
+                   PEXTD,
+                   VCFUGED,
+                   VCIPHER,
+                   VCIPHERLAST,
+                   VCLZDM,
+                   VCTZDM,
+                   VGNB,
+                   VNCIPHER,
+                   VNCIPHERLAST,
+                   VPDEPD,
+                   VPEXTD,
+                   VPMSUMB,
+                   VPMSUMD,
+                   VPMSUMH,
+                   VPMSUMW,
+                   VSBOX]>
+>;
+
+// Identify instructions that write MM pipelines with 10 cycles.
+def P10W_MM_10C_Pred : MCSchedPredicate<
+      CheckOpcode<[PMXVBF16GER2,
+                   PMXVBF16GER2NN,
+                   PMXVBF16GER2NP,
+                   PMXVBF16GER2PN,
+                   PMXVBF16GER2PP,
+                   PMXVF16GER2,
+                   PMXVF16GER2NN,
+                   PMXVF16GER2NP,
+                   PMXVF16GER2PN,
+                   PMXVF16GER2PP,
+                   PMXVF32GER,
+                   PMXVF32GERNN,
+                   PMXVF32GERNP,
+                   PMXVF32GERPN,
+                   PMXVF32GERPP,
+                   PMXVF64GER,
+                   PMXVF64GERNN,
+                   PMXVF64GERNP,
+                   PMXVF64GERPN,
+                   PMXVF64GERPP,
+                   PMXVI16GER2,
+                   PMXVI16GER2PP,
+                   PMXVI16GER2S,
+                   PMXVI16GER2SPP,
+                   PMXVI4GER8,
+                   PMXVI4GER8PP,
+                   PMXVI8GER4,
+                   PMXVI8GER4PP,
+                   PMXVI8GER4SPP,
+                   XVBF16GER2,
+                   XVBF16GER2NN,
+                   XVBF16GER2NP,
+                   XVBF16GER2PN,
+                   XVBF16GER2PP,
+                   XVF16GER2,
+                   XVF16GER2NN,
+                   XVF16GER2NP,
+                   XVF16GER2PN,
+                   XVF16GER2PP,
+                   XVF32GER,
+                   XVF32GERNN,
+                   XVF32GERNP,
+                   XVF32GERPN,
+                   XVF32GERPP,
+                   XVF64GER,
+                   XVF64GERNN,
+                   XVF64GERNP,
+                   XVF64GERPN,
+                   XVF64GERPP,
+                   XVI16GER2,
+                   XVI16GER2PP,
+                   XVI16GER2S,
+                   XVI16GER2SPP,
+                   XVI4GER8,
+                   XVI4GER8PP,
+                   XVI8GER4,
+                   XVI8GER4PP,
+                   XVI8GER4SPP,
+                   XXMFACC,
+                   XXMFACC,
+                   XXMTACC,
+                   XXSETACCZ]>
+>;

diff  --git a/llvm/lib/Target/PowerPC/PPCSchedule.td b/llvm/lib/Target/PowerPC/PPCSchedule.td
index e378d57d325e..f65dbae16d3a 100644
--- a/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -128,7 +128,9 @@ def IIC_SprMTPMR     : InstrItinClass;
 
 //===----------------------------------------------------------------------===//
 // Processor instruction itineraries.
+include "PPCInstrInfo.td"
 
+include "PPCSchedPredicates.td"
 include "PPCScheduleG3.td"
 include "PPCSchedule440.td"
 include "PPCScheduleG4.td"
@@ -137,6 +139,7 @@ include "PPCScheduleG5.td"
 include "PPCScheduleP7.td"
 include "PPCScheduleP8.td"
 include "PPCScheduleP9.td"
+include "PPCScheduleP10.td"
 include "PPCScheduleA2.td"
 include "PPCScheduleE500.td"
 include "PPCScheduleE500mc.td"

diff  --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
new file mode 100644
index 000000000000..bf56491f373a
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
@@ -0,0 +1,416 @@
+//===--- PPCScheduleP10.td - P10 Scheduling Definitions -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines the resources required by P10 instructions.
+//===----------------------------------------------------------------------===//
+// Modeling pipeline forwarding logic.
+def P10BR_Read : SchedRead;
+def P10DF_Read : SchedRead;
+def P10DV_Read : SchedRead;
+def P10DX_Read : SchedRead;
+def P10F2_Read : SchedRead;
+def P10FX_Read : SchedRead;
+def P10LD_Read : SchedRead;
+def P10MU_Read : SchedRead;
+def P10PM_Read : SchedRead;
+def P10ST_Read : SchedRead;
+def P10SX_Read : SchedRead;
+def P10vMU_Read : SchedRead;
+
+def P10Model : SchedMachineModel {
+  let IssueWidth = 8;
+
+  // TODO - Need to be updated according to P10 UM.
+  let MicroOpBufferSize = 44;
+
+  // TODO - tune this on real HW once it arrives. For now, we will use the same
+  // value as we do on P9.
+  let LoopMicroOpBufferSize = 60;
+
+  let CompleteModel = 1;
+
+  // Do not support SPE (Signal Procesing Engine) on Power 10.
+  let UnsupportedFeatures = [HasSPE, IsE500, IsBookE];
+}
+
+let SchedModel = P10Model in {
+
+  // ***************** Processor Resources *****************
+
+  // Pipeline Groups
+
+  def P10_BF : ProcResource<4>; // Four Binary Floating Point pipelines.
+  def P10_BR : ProcResource<2>; // Two Branch pipelines.
+  def P10_CY : ProcResource<4>; // Four Crypto pipelines.
+  def P10_DF : ProcResource<1>; // One Decimal Floating Point pipelines.
+  def P10_DV : ProcResource<2>; // Two Fixed-point divide (DIV) pipelines.
+  def P10_DX : ProcResource<2>; // Two 128-bit fixed-point and BCD pipelines.
+  def P10_FX : ProcResource<4>; // Four ALU pipelines.
+  def P10_LD : ProcResource<2>; // Two Load pipelines.
+  def P10_MM : ProcResource<2>; // Two 512-bit SIMD matrix multiply engine pipelines.
+  def P10_PM : ProcResource<4>; // Four 128-bit permute (PM) pipelines.
+  def P10_ST : ProcResource<2>; // Two ST-D pipelines.
+  def P10_SX : ProcResource<2>; // Two Simple Fixed-point (SFX) pipelines.
+
+  // Dispatch Groups
+
+  // Dispatch to any slots
+  def P10_ANY_SLOT : ProcResource<8>;
+
+  let Super = P10_ANY_SLOT in {
+
+    // Dispatch to even slots
+    def P10_EVEN_SLOT : ProcResource<4>;
+
+    // Dispatch to odd slots
+    def P10_ODD_SLOT : ProcResource<4>;
+  }
+
+  // Dispatch Rules
+  let NumMicroOps = 0, Latency = 1 in {
+    // Dispatch Rule '-'
+    def P10W_DISP_ANY : SchedWriteRes<[P10_ANY_SLOT]>;
+
+    // Dispatch Rule '-', even slot
+    def P10W_DISP_EVEN : SchedWriteRes<[P10_EVEN_SLOT]>;
+
+    // Dispatch Rule 'P'
+    def P10W_DISP_PAIR : SchedWriteRes<[P10_EVEN_SLOT, P10_ODD_SLOT]>;
+  }
+
+  // ***************** SchedWriteRes Definitions *****************
+
+  // A BF pipeline may take from 7 to 36 cycles to complete.
+  // Some BF operations may keep the pipeline busy for up to 10 cycles.
+  def P10W_BF_7C : SchedWriteRes<[P10_BF]> {
+    let Latency = 7;
+  }
+
+  def P10W_BF_22C : SchedWriteRes<[P10_BF]> {
+    let ResourceCycles = [ 5 ];
+    let Latency = 22;
+  }
+
+  def P10W_BF_24C : SchedWriteRes<[P10_BF]> {
+    let ResourceCycles = [ 8 ];
+    let Latency = 24;
+  }
+
+  def P10W_BF_26C : SchedWriteRes<[P10_BF]> {
+    let ResourceCycles = [ 5 ];
+    let Latency = 26;
+  }
+
+  def P10W_BF_27C : SchedWriteRes<[P10_BF]> {
+    let ResourceCycles = [ 7 ];
+    let Latency = 27;
+  }
+
+  def P10W_BF_36C : SchedWriteRes<[P10_BF]> {
+    let ResourceCycles = [ 10 ];
+    let Latency = 36;
+  }
+
+  // A BR pipeline may take 2 cycles to complete.
+  def P10W_BR_2C : SchedWriteRes<[P10_BR]> {
+    let Latency = 2;
+  }
+
+  // A CY pipeline may take 7 cycles to complete.
+  def P10W_CY_7C : SchedWriteRes<[P10_CY]> {
+    let Latency = 7;
+  }
+
+  // A DF pipeline may take from 13 to 174 cycles to complete.
+  // Some DF operations may keep the pipeline busy for up to 67 cycles.
+  def P10W_DF_13C : SchedWriteRes<[P10_DF]> {
+    let Latency = 13;
+  }
+
+  def P10W_DF_24C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 16 ];
+    let Latency = 24;
+  }
+
+  def P10W_DF_25C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 17 ];
+    let Latency = 25;
+  }
+
+  def P10W_DF_26C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 18 ];
+    let Latency = 26;
+  }
+
+  def P10W_DF_32C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 22 ];
+    let Latency = 32;
+  }
+
+  def P10W_DF_33C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 25 ];
+    let Latency = 33;
+  }
+
+  def P10W_DF_34C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 25 ];
+    let Latency = 34;
+  }
+
+  def P10W_DF_38C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 30 ];
+    let Latency = 38;
+  }
+
+  def P10W_DF_40C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 17 ];
+    let Latency = 40;
+  }
+
+  def P10W_DF_43C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 34 ];
+    let Latency = 43;
+  }
+
+  def P10W_DF_59C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 49 ];
+    let Latency = 59;
+  }
+
+  def P10W_DF_61C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 12 ];
+    let Latency = 61;
+  }
+
+  def P10W_DF_68C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 15 ];
+    let Latency = 68;
+  }
+
+  def P10W_DF_77C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 67 ];
+    let Latency = 77;
+  }
+
+  def P10W_DF_87C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 12 ];
+    let Latency = 87;
+  }
+
+  def P10W_DF_100C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 32 ];
+    let Latency = 100;
+  }
+
+  def P10W_DF_174C : SchedWriteRes<[P10_DF]> {
+    let ResourceCycles = [ 33 ];
+    let Latency = 174;
+  }
+
+  // A DV pipeline may take from 20 to 83 cycles to complete.
+  // Some DV operations may keep the pipeline busy for up to 33 cycles.
+  def P10W_DV_20C : SchedWriteRes<[P10_DV]> {
+    let ResourceCycles = [ 10 ];
+    let Latency = 20;
+  }
+
+  def P10W_DV_25C : SchedWriteRes<[P10_DV]> {
+    let ResourceCycles = [ 10 ];
+    let Latency = 25;
+  }
+
+  def P10W_DV_27C : SchedWriteRes<[P10_DV]> {
+    let ResourceCycles = [ 10 ];
+    let Latency = 27;
+  }
+
+  def P10W_DV_41C : SchedWriteRes<[P10_DV]> {
+    let ResourceCycles = [ 10 ];
+    let Latency = 41;
+  }
+
+  def P10W_DV_43C : SchedWriteRes<[P10_DV]> {
+    let ResourceCycles = [ 21 ];
+    let Latency = 43;
+  }
+
+  def P10W_DV_47C : SchedWriteRes<[P10_DV]> {
+    let ResourceCycles = [ 21 ];
+    let Latency = 47;
+  }
+
+  def P10W_DV_54C : SchedWriteRes<[P10_DV]> {
+    let ResourceCycles = [ 33 ];
+    let Latency = 54;
+  }
+
+  def P10W_DV_60C : SchedWriteRes<[P10_DV]> {
+    let ResourceCycles = [ 33 ];
+    let Latency = 60;
+  }
+
+  def P10W_DV_75C : SchedWriteRes<[P10_DV]> {
+    let ResourceCycles = [ 21 ];
+    let Latency = 75;
+  }
+
+  def P10W_DV_83C : SchedWriteRes<[P10_DV]> {
+    let ResourceCycles = [ 33 ];
+    let Latency = 83;
+  }
+
+  // A DX pipeline may take 5 cycles to complete.
+  def P10W_DX_5C : SchedWriteRes<[P10_DX]> {
+    let Latency = 5;
+  }
+
+  // A F2 pipeline may take 4 cycles to complete.
+  def P10W_F2_4C : SchedWriteRes<[P10_FX]> {
+    let Latency = 4;
+  }
+
+  // A FX pipeline may take from 2 to 3 cycles to complete.
+  def P10W_FX_2C : SchedWriteRes<[P10_FX]> {
+    let Latency = 2;
+  }
+
+  def P10W_FX_3C : SchedWriteRes<[P10_FX]> {
+    let Latency = 3;
+  }
+
+  // A LD pipeline may take 6 cycles to complete.
+  def P10W_LD_6C : SchedWriteRes<[P10_LD]> {
+    let Latency = 6;
+  }
+
+  // A MF pipeline may take 13 cycles to complete.
+  def P10W_MF_13C : SchedWriteRes<[P10_SX]> {
+    let Latency = 13;
+  }
+
+  // A MFL pipeline may take 13 cycles to complete.
+  def P10W_MFL_13C : SchedWriteRes<[P10_SX]> {
+    let Latency = 13;
+  }
+
+  // A MM pipeline may take 10 cycles to complete.
+  def P10W_MM_10C : SchedWriteRes<[P10_MM]> {
+    let Latency = 10;
+  }
+
+  // A MU pipeline may take 5 cycles to complete.
+  def P10W_MU_5C : SchedWriteRes<[P10_BF]> {
+    let Latency = 5;
+  }
+
+  // A PM pipeline may take 4 cycles to complete.
+  def P10W_PM_4C : SchedWriteRes<[P10_PM]> {
+    let Latency = 4;
+  }
+
+  // A ST pipeline may take 3 cycles to complete.
+  def P10W_ST_3C : SchedWriteRes<[P10_ST]> {
+    let Latency = 3;
+  }
+
+  // A SX pipeline may take from 0 to 3 cycles to complete.
+  def P10W_SX : SchedWriteRes<[P10_SX]> {
+    let Latency = 0;
+  }
+
+  def P10W_SX_3C : SchedWriteRes<[P10_SX]> {
+    let Latency = 3;
+  }
+
+  // A vMU pipeline may take 7 cycles to complete.
+  def P10W_vMU_7C : SchedWriteRes<[P10_BF]> {
+    let Latency = 7;
+  }
+
+  // ***************** Read Advance Definitions *****************
+
+  // Modeling pipeline forwarding logic.
+  def P10BF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+  def P10BF_Read_2C : SchedReadAdvance<2, [P10W_BF_7C]>;
+  def P10BR_Read_1C : SchedReadAdvance<1, [P10W_FX_3C, P10W_F2_4C]>;
+  def P10CY_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_DF_13C, P10W_MM_10C]>;
+  def P10CY_Read_3C : SchedReadAdvance<3, [P10W_CY_7C]>;
+  def P10DF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+  def P10DV_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+  def P10DX_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+  def P10F2_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+  def P10FX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+  def P10LD_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C]>;
+  def P10MM_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C]>;
+  def P10MM_Read_6C : SchedReadAdvance<6, [P10W_MM_10C]>;
+  def P10MU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_DF_13C]>;
+  def P10PM_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+  def P10ST_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+  def P10SX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C, P10W_MM_10C]>;
+  def P10vMU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+
+  // Save 1 cycles if pipeline BF reads the data from pipelines DX, MU, vMU, CY, DF, MM.
+  // Save 2 cycles if pipeline BF reads the data from pipelines BF.
+  def P10BF_Read : SchedReadVariant<[
+        SchedVar<P10W_BF_7C_Pred, [P10BF_Read_2C]>,
+        SchedVar<NoSchedPred,     [P10BF_Read_1C]>
+  ]>;
+
+  // Save 1 cycles if pipeline CY reads the data from pipelines DX, MU, vMU, BF, DF, MM.
+  // Save 3 cycles if pipeline CY reads the data from pipelines CY.
+  def P10CY_Read : SchedReadVariant<[
+        SchedVar<P10W_CY_7C_Pred, [P10CY_Read_3C]>,
+        SchedVar<NoSchedPred,     [P10CY_Read_1C]>
+  ]>;
+
+  // Save 1 cycles if pipeline MM reads the data from pipelines DX, MU, vMU, BF, CY, DF.
+  // Save 6 cycles if pipeline MM reads the data from pipelines MM.
+  def P10MM_Read : SchedReadVariant<[
+        SchedVar<P10W_MM_10C_Pred, [P10MM_Read_6C]>,
+        SchedVar<NoSchedPred,     [P10MM_Read_1C]>
+  ]>;
+
+  // Save 1 cycles if pipeline BR reads the data from pipelines FX, F2.
+  def : SchedAlias<P10BR_Read, P10BR_Read_1C>;
+
+  // Save 1 cycles if pipeline DF reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+  def : SchedAlias<P10DF_Read, P10DF_Read_1C>;
+
+  // Save 1 cycles if pipeline DV reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+  def : SchedAlias<P10DV_Read, P10DV_Read_1C>;
+
+  // Save 1 cycles if pipeline DX reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+  def : SchedAlias<P10DX_Read, P10DX_Read_1C>;
+
+  // Save 1 cycles if pipeline F2 reads the data from pipelines ST, SX, FX, F2, PM.
+  def : SchedAlias<P10F2_Read, P10F2_Read_1C>;
+
+  // Save 1 cycles if pipeline FX reads the data from pipelines ST, SX, FX, F2, PM.
+  def : SchedAlias<P10FX_Read, P10FX_Read_1C>;
+
+  // Save 1 cycles if pipeline LD reads the data from pipelines ST, SX, FX, F2.
+  def : SchedAlias<P10LD_Read, P10LD_Read_1C>;
+
+  // Save 1 cycles if pipeline MU reads the data from pipelines DX, MU, DF.
+  def : SchedAlias<P10MU_Read, P10MU_Read_1C>;
+
+  // Save 1 cycles if pipeline PM reads the data from pipelines ST, SX, FX, F2, PM.
+  def : SchedAlias<P10PM_Read, P10PM_Read_1C>;
+
+  // Save 1 cycles if pipeline ST reads the data from pipelines ST, SX, FX, F2, PM.
+  def : SchedAlias<P10ST_Read, P10ST_Read_1C>;
+
+  // Save 1 cycles if pipeline SX reads the data from pipelines ST, SX, FX, F2, PM, MM.
+  def : SchedAlias<P10SX_Read, P10SX_Read_1C>;
+
+  // Save 1 cycles if pipeline vMU reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+  def : SchedAlias<P10vMU_Read, P10vMU_Read_1C>;
+
+  include "P10InstrResources.td"
+}

diff  --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 571cc219ff2b..3dc069ecad8a 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -9,8 +9,6 @@
 // This file defines the itinerary class data for the POWER9 processor.
 //
 //===----------------------------------------------------------------------===//
-include "PPCInstrInfo.td"
-
 def P9Model : SchedMachineModel {
   // The maximum number of instructions to be issued at the same time.
   // While a value of 8 is technically correct since 8 instructions can be

diff  --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
index 9e8f8d073a1a..d49821148860 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -105,8 +105,8 @@ define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %
 ;
 ; CHECK-BE-LABEL: vec_xst_trunc_ss:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    sldi r3, r5, 1
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, r6, r3
 ; CHECK-BE-NEXT:    blr
 ;
@@ -136,8 +136,8 @@ define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %
 ;
 ; CHECK-BE-LABEL: vec_xst_trunc_us:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    sldi r3, r5, 1
+; CHECK-BE-NEXT:    vsldoi v2, v2, v2, 10
 ; CHECK-BE-NEXT:    stxsihx v2, r6, r3
 ; CHECK-BE-NEXT:    blr
 ;
@@ -167,8 +167,8 @@ define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %
 ;
 ; CHECK-BE-LABEL: vec_xst_trunc_si:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-BE-NEXT:    sldi r3, r5, 2
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-BE-NEXT:    stfiwx f0, r6, r3
 ; CHECK-BE-NEXT:    blr
 ;
@@ -198,8 +198,8 @@ define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %
 ;
 ; CHECK-BE-LABEL: vec_xst_trunc_ui:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-BE-NEXT:    sldi r3, r5, 2
+; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-BE-NEXT:    stfiwx f0, r6, r3
 ; CHECK-BE-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/constant-pool.ll b/llvm/test/CodeGen/PowerPC/constant-pool.ll
index 697b5eebe432..4185a41b50f2 100644
--- a/llvm/test/CodeGen/PowerPC/constant-pool.ll
+++ b/llvm/test/CodeGen/PowerPC/constant-pool.ll
@@ -364,15 +364,15 @@ define ppc_fp128 @three_constants_ppcf128(ppc_fp128 %a, ppc_fp128 %c) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset v31, -16
-; CHECK-NEXT:    xxlxor f4, f4, f4
 ; CHECK-NEXT:    xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT:    xxlxor f4, f4, f4
 ; CHECK-NEXT:    stxv vs63, 32(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    xxsplti32dx vs63, 0, 1074935889
 ; CHECK-NEXT:    xxsplti32dx vs3, 1, -343597384
 ; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd at notoc
-; CHECK-NEXT:    xxlxor f4, f4, f4
 ; CHECK-NEXT:    xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT:    xxlxor f4, f4, f4
 ; CHECK-NEXT:    xxsplti32dx vs3, 1, -1719329096
 ; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd at notoc

diff  --git a/llvm/test/CodeGen/PowerPC/int128_ldst.ll b/llvm/test/CodeGen/PowerPC/int128_ldst.ll
index f72c2c062b65..7daef40937fc 100644
--- a/llvm/test/CodeGen/PowerPC/int128_ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/int128_ldst.ll
@@ -420,8 +420,8 @@ define dso_local i128 @ld_disjoint_unalign64___int128___int128(i64 %ptr) {
 ; CHECK-P10-NEXT:    rldicr 4, 3, 0, 23
 ; CHECK-P10-NEXT:    pli 5, 232
 ; CHECK-P10-NEXT:    pli 3, 3567587329
-; CHECK-P10-NEXT:    pli 6, 3567587337
 ; CHECK-P10-NEXT:    rldimi 3, 5, 32, 0
+; CHECK-P10-NEXT:    pli 6, 3567587337
 ; CHECK-P10-NEXT:    rldimi 6, 5, 32, 0
 ; CHECK-P10-NEXT:    ldx 3, 4, 3
 ; CHECK-P10-NEXT:    ldx 4, 4, 6
@@ -465,8 +465,8 @@ define dso_local i128 @ld_disjoint_align64___int128___int128(i64 %ptr) {
 ; CHECK-P10-NEXT:    rldicr 4, 3, 0, 23
 ; CHECK-P10-NEXT:    pli 3, 244140625
 ; CHECK-P10-NEXT:    pli 5, 232
-; CHECK-P10-NEXT:    pli 6, 3567587336
 ; CHECK-P10-NEXT:    rldic 3, 3, 12, 24
+; CHECK-P10-NEXT:    pli 6, 3567587336
 ; CHECK-P10-NEXT:    rldimi 6, 5, 32, 0
 ; CHECK-P10-NEXT:    ldx 3, 4, 3
 ; CHECK-P10-NEXT:    ldx 4, 4, 6
@@ -584,8 +584,8 @@ define dso_local i128 @ld_cst_unalign64___int128___int128() {
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    pli 4, 232
 ; CHECK-P10-NEXT:    pli 3, 3567587329
-; CHECK-P10-NEXT:    pli 5, 3567587337
 ; CHECK-P10-NEXT:    rldimi 3, 4, 32, 0
+; CHECK-P10-NEXT:    pli 5, 3567587337
 ; CHECK-P10-NEXT:    rldimi 5, 4, 32, 0
 ; CHECK-P10-NEXT:    ld 3, 0(3)
 ; CHECK-P10-NEXT:    ld 4, 0(5)

diff  --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
index ae6b146b0254..11a06034e384 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
@@ -26,10 +26,10 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ; CHECK-NEXT:    xxlor vs0, v2, v2
 ; CHECK-NEXT:    xxlor vs1, v3, v3
 ; CHECK-NEXT:    stxvp vsp34, 128(r1) # 32-byte Folded Spill
-; CHECK-NEXT:    ld r30, 272(r1)
-; CHECK-NEXT:    stxvp vsp36, 96(r1) # 32-byte Folded Spill
 ; CHECK-NEXT:    xxlor vs2, v4, v4
 ; CHECK-NEXT:    xxlor vs3, v5, v5
+; CHECK-NEXT:    ld r30, 272(r1)
+; CHECK-NEXT:    stxvp vsp36, 96(r1) # 32-byte Folded Spill
 ; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    xvf16ger2pp acc0, v2, v4
 ; CHECK-NEXT:    xxmfacc acc0
@@ -38,9 +38,9 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ; CHECK-NEXT:    bl foo at notoc
 ; CHECK-NEXT:    lxvp vsp0, 64(r1)
 ; CHECK-NEXT:    lxvp vsp2, 32(r1)
+; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxvp vsp34, 128(r1) # 32-byte Folded Reload
 ; CHECK-NEXT:    lxvp vsp36, 96(r1) # 32-byte Folded Reload
-; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    xvf16ger2pp acc0, v2, v4
 ; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    stxv vs0, 48(r30)
@@ -69,10 +69,10 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ; CHECK-BE-NEXT:    xxlor vs0, v2, v2
 ; CHECK-BE-NEXT:    xxlor vs1, v3, v3
 ; CHECK-BE-NEXT:    stxvp vsp34, 208(r1) # 32-byte Folded Spill
-; CHECK-BE-NEXT:    ld r30, 368(r1)
 ; CHECK-BE-NEXT:    xxlor vs2, v4, v4
 ; CHECK-BE-NEXT:    xxlor vs3, v5, v5
 ; CHECK-BE-NEXT:    stxvp vsp36, 176(r1) # 32-byte Folded Spill
+; CHECK-BE-NEXT:    ld r30, 368(r1)
 ; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    xvf16ger2pp acc0, v2, v4
 ; CHECK-BE-NEXT:    xxmfacc acc0
@@ -82,9 +82,9 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ; CHECK-BE-NEXT:    nop
 ; CHECK-BE-NEXT:    lxvp vsp0, 112(r1)
 ; CHECK-BE-NEXT:    lxvp vsp2, 144(r1)
+; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxvp vsp34, 208(r1) # 32-byte Folded Reload
 ; CHECK-BE-NEXT:    lxvp vsp36, 176(r1) # 32-byte Folded Reload
-; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    xvf16ger2pp acc0, v2, v4
 ; CHECK-BE-NEXT:    xxmfacc acc0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r30)

diff  --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index 77bf4a4eaa96..7f02c9f0215c 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -409,29 +409,29 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
 ; CHECK-NEXT:    xvf32gerpp acc2, vs0, vs1
 ; CHECK-NEXT:    lxv vs0, 32(r7)
 ; CHECK-NEXT:    lxv vs1, 48(r7)
-; CHECK-NEXT:    xxmfacc acc2
 ; CHECK-NEXT:    xvf32gerpn acc1, vs0, vs1
 ; CHECK-NEXT:    lxv vs12, 64(r7)
 ; CHECK-NEXT:    lxv vs13, 80(r7)
-; CHECK-NEXT:    rldic r7, r4, 6, 26
 ; CHECK-NEXT:    xxsetaccz acc0
+; CHECK-NEXT:    rldic r7, r4, 6, 26
 ; CHECK-NEXT:    addi r4, r4, 3
-; CHECK-NEXT:    xxmfacc acc1
+; CHECK-NEXT:    add r8, r3, r7
+; CHECK-NEXT:    xxmfacc acc2
 ; CHECK-NEXT:    xvf32gernp acc0, vs12, vs13
 ; CHECK-NEXT:    stxvx vs11, r3, r7
-; CHECK-NEXT:    add r7, r3, r7
+; CHECK-NEXT:    stxv vs8, 48(r8)
+; CHECK-NEXT:    xxmfacc acc1
+; CHECK-NEXT:    stxv vs9, 32(r8)
+; CHECK-NEXT:    stxv vs10, 16(r8)
+; CHECK-NEXT:    stxv vs4, 112(r8)
+; CHECK-NEXT:    stxv vs5, 96(r8)
 ; CHECK-NEXT:    xxmfacc acc0
-; CHECK-NEXT:    stxv vs8, 48(r7)
-; CHECK-NEXT:    stxv vs9, 32(r7)
-; CHECK-NEXT:    stxv vs10, 16(r7)
-; CHECK-NEXT:    stxv vs4, 112(r7)
-; CHECK-NEXT:    stxv vs5, 96(r7)
-; CHECK-NEXT:    stxv vs6, 80(r7)
-; CHECK-NEXT:    stxv vs7, 64(r7)
-; CHECK-NEXT:    stxv vs0, 176(r7)
-; CHECK-NEXT:    stxv vs1, 160(r7)
-; CHECK-NEXT:    stxv vs2, 144(r7)
-; CHECK-NEXT:    stxv vs3, 128(r7)
+; CHECK-NEXT:    stxv vs6, 80(r8)
+; CHECK-NEXT:    stxv vs7, 64(r8)
+; CHECK-NEXT:    stxv vs0, 176(r8)
+; CHECK-NEXT:    stxv vs1, 160(r8)
+; CHECK-NEXT:    stxv vs2, 144(r8)
+; CHECK-NEXT:    stxv vs3, 128(r8)
 ; CHECK-NEXT:    bdnz .LBB9_2
 ; CHECK-NEXT:  # %bb.3: # %for.cond.cleanup
 ; CHECK-NEXT:    blr
@@ -458,29 +458,29 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
 ; CHECK-BE-NEXT:    xvf32gerpp acc2, vs0, vs1
 ; CHECK-BE-NEXT:    lxv vs0, 32(r7)
 ; CHECK-BE-NEXT:    lxv vs1, 48(r7)
-; CHECK-BE-NEXT:    xxmfacc acc2
 ; CHECK-BE-NEXT:    xvf32gerpn acc1, vs0, vs1
 ; CHECK-BE-NEXT:    lxv vs12, 64(r7)
 ; CHECK-BE-NEXT:    lxv vs13, 80(r7)
-; CHECK-BE-NEXT:    rldic r7, r4, 6, 26
 ; CHECK-BE-NEXT:    xxsetaccz acc0
+; CHECK-BE-NEXT:    rldic r7, r4, 6, 26
 ; CHECK-BE-NEXT:    addi r4, r4, 3
-; CHECK-BE-NEXT:    xxmfacc acc1
+; CHECK-BE-NEXT:    add r8, r3, r7
+; CHECK-BE-NEXT:    xxmfacc acc2
 ; CHECK-BE-NEXT:    xvf32gernp acc0, vs12, vs13
 ; CHECK-BE-NEXT:    stxvx vs8, r3, r7
-; CHECK-BE-NEXT:    add r7, r3, r7
+; CHECK-BE-NEXT:    stxv vs9, 16(r8)
+; CHECK-BE-NEXT:    xxmfacc acc1
+; CHECK-BE-NEXT:    stxv vs11, 48(r8)
+; CHECK-BE-NEXT:    stxv vs10, 32(r8)
+; CHECK-BE-NEXT:    stxv vs5, 80(r8)
+; CHECK-BE-NEXT:    stxv vs4, 64(r8)
 ; CHECK-BE-NEXT:    xxmfacc acc0
-; CHECK-BE-NEXT:    stxv vs9, 16(r7)
-; CHECK-BE-NEXT:    stxv vs11, 48(r7)
-; CHECK-BE-NEXT:    stxv vs10, 32(r7)
-; CHECK-BE-NEXT:    stxv vs5, 80(r7)
-; CHECK-BE-NEXT:    stxv vs4, 64(r7)
-; CHECK-BE-NEXT:    stxv vs7, 112(r7)
-; CHECK-BE-NEXT:    stxv vs6, 96(r7)
-; CHECK-BE-NEXT:    stxv vs1, 144(r7)
-; CHECK-BE-NEXT:    stxv vs0, 128(r7)
-; CHECK-BE-NEXT:    stxv vs3, 176(r7)
-; CHECK-BE-NEXT:    stxv vs2, 160(r7)
+; CHECK-BE-NEXT:    stxv vs7, 112(r8)
+; CHECK-BE-NEXT:    stxv vs6, 96(r8)
+; CHECK-BE-NEXT:    stxv vs1, 144(r8)
+; CHECK-BE-NEXT:    stxv vs0, 128(r8)
+; CHECK-BE-NEXT:    stxv vs3, 176(r8)
+; CHECK-BE-NEXT:    stxv vs2, 160(r8)
 ; CHECK-BE-NEXT:    bdnz .LBB9_2
 ; CHECK-BE-NEXT:  # %bb.3: # %for.cond.cleanup
 ; CHECK-BE-NEXT:    blr
@@ -671,8 +671,8 @@ define void @test_ldst_2(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
-; CHECK-NEXT:    lxvp vsp36, 0(r4)
 ; CHECK-NEXT:    xxmtacc acc0
+; CHECK-NEXT:    lxvp vsp36, 0(r4)
 ; CHECK-NEXT:    xvf64gernp acc0, vsp36, v2
 ; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    stxv vs0, 48(r7)
@@ -687,8 +687,8 @@ define void @test_ldst_2(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    lxvp vsp36, 0(r4)
 ; CHECK-BE-NEXT:    xxmtacc acc0
+; CHECK-BE-NEXT:    lxvp vsp36, 0(r4)
 ; CHECK-BE-NEXT:    xvf64gernp acc0, vsp36, v2
 ; CHECK-BE-NEXT:    xxmfacc acc0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r7)
@@ -715,8 +715,8 @@ define void @test_ldst_3(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vp
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
-; CHECK-NEXT:    lxvp vsp36, 0(r5)
 ; CHECK-NEXT:    xxmtacc acc0
+; CHECK-NEXT:    lxvp vsp36, 0(r5)
 ; CHECK-NEXT:    xvf64gernp acc0, vsp36, v2
 ; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    stxv vs0, 48(r9)
@@ -731,8 +731,8 @@ define void @test_ldst_3(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vp
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    lxvp vsp36, 0(r5)
 ; CHECK-BE-NEXT:    xxmtacc acc0
+; CHECK-BE-NEXT:    lxvp vsp36, 0(r5)
 ; CHECK-BE-NEXT:    xvf64gernp acc0, vsp36, v2
 ; CHECK-BE-NEXT:    xxmfacc acc0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r9)

diff  --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
index a2eeceb09977..a9c060ed6df5 100644
--- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
@@ -13,9 +13,9 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmr v1, v4
 ; CHECK-NEXT:    vmr v4, v3
-; CHECK-NEXT:    ld r3, 96(r1)
 ; CHECK-NEXT:    vmr v0, v2
 ; CHECK-NEXT:    xxlor vs3, v5, v5
+; CHECK-NEXT:    ld r3, 96(r1)
 ; CHECK-NEXT:    xxlor vs0, v0, v0
 ; CHECK-NEXT:    xxlor vs1, v1, v1
 ; CHECK-NEXT:    xxlor vs2, v4, v4
@@ -37,9 +37,9 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ; CHECK-BE:       # %bb.0:
 ; CHECK-BE-NEXT:    vmr v1, v4
 ; CHECK-BE-NEXT:    vmr v4, v3
-; CHECK-BE-NEXT:    ld r3, 112(r1)
 ; CHECK-BE-NEXT:    vmr v0, v2
 ; CHECK-BE-NEXT:    xxlor vs3, v5, v5
+; CHECK-BE-NEXT:    ld r3, 112(r1)
 ; CHECK-BE-NEXT:    xxlor vs0, v0, v0
 ; CHECK-BE-NEXT:    xxlor vs1, v1, v1
 ; CHECK-BE-NEXT:    xxlor vs2, v4, v4
@@ -73,9 +73,9 @@ define void @intrinsics2(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    lxv v2, 0(r3)
 ; CHECK-NEXT:    lxv v3, 0(r4)
+; CHECK-NEXT:    xxlor vs0, v2, v2
 ; CHECK-NEXT:    lxv v4, 0(r5)
 ; CHECK-NEXT:    lxv v5, 0(r6)
-; CHECK-NEXT:    xxlor vs0, v2, v2
 ; CHECK-NEXT:    xxlor vs1, v3, v3
 ; CHECK-NEXT:    xxlor vs2, v4, v4
 ; CHECK-NEXT:    xxlor vs3, v5, v5
@@ -97,9 +97,9 @@ define void @intrinsics2(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <
 ; CHECK-BE:       # %bb.0:
 ; CHECK-BE-NEXT:    lxv v2, 0(r3)
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
+; CHECK-BE-NEXT:    xxlor vs0, v2, v2
 ; CHECK-BE-NEXT:    lxv v4, 0(r5)
 ; CHECK-BE-NEXT:    lxv v5, 0(r6)
-; CHECK-BE-NEXT:    xxlor vs0, v2, v2
 ; CHECK-BE-NEXT:    xxlor vs1, v3, v3
 ; CHECK-BE-NEXT:    xxlor vs2, v4, v4
 ; CHECK-BE-NEXT:    xxlor vs3, v5, v5
@@ -1406,8 +1406,8 @@ define void @test34(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
-; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v5, 0(r4)
+; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v4, 16(r4)
 ; CHECK-NEXT:    xvf64gerpp acc0, vsp36, v2
 ; CHECK-NEXT:    xxmfacc acc0
@@ -1423,8 +1423,8 @@ define void @test34(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    xvf64gerpp acc0, vsp36, v2
 ; CHECK-BE-NEXT:    xxmfacc acc0
@@ -1454,8 +1454,8 @@ define void @test35(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
-; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v5, 0(r4)
+; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v4, 16(r4)
 ; CHECK-NEXT:    xvf64gerpn acc0, vsp36, v2
 ; CHECK-NEXT:    xxmfacc acc0
@@ -1471,8 +1471,8 @@ define void @test35(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    xvf64gerpn acc0, vsp36, v2
 ; CHECK-BE-NEXT:    xxmfacc acc0
@@ -1502,8 +1502,8 @@ define void @test36(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
-; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v5, 0(r4)
+; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v4, 16(r4)
 ; CHECK-NEXT:    xvf64gernp acc0, vsp36, v2
 ; CHECK-NEXT:    xxmfacc acc0
@@ -1519,8 +1519,8 @@ define void @test36(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    xvf64gernp acc0, vsp36, v2
 ; CHECK-BE-NEXT:    xxmfacc acc0
@@ -1550,8 +1550,8 @@ define void @test37(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
-; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v5, 0(r4)
+; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v4, 16(r4)
 ; CHECK-NEXT:    xvf64gernn acc0, vsp36, v2
 ; CHECK-NEXT:    xxmfacc acc0
@@ -1567,8 +1567,8 @@ define void @test37(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    xvf64gernn acc0, vsp36, v2
 ; CHECK-BE-NEXT:    xxmfacc acc0
@@ -1634,8 +1634,8 @@ define void @test39(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
-; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v5, 0(r4)
+; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v4, 16(r4)
 ; CHECK-NEXT:    pmxvf64gerpp acc0, vsp36, v2, 0, 0
 ; CHECK-NEXT:    xxmfacc acc0
@@ -1651,8 +1651,8 @@ define void @test39(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    pmxvf64gerpp acc0, vsp36, v2, 0, 0
 ; CHECK-BE-NEXT:    xxmfacc acc0
@@ -1682,8 +1682,8 @@ define void @test40(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
-; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v5, 0(r4)
+; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v4, 16(r4)
 ; CHECK-NEXT:    pmxvf64gerpn acc0, vsp36, v2, 0, 0
 ; CHECK-NEXT:    xxmfacc acc0
@@ -1699,8 +1699,8 @@ define void @test40(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    pmxvf64gerpn acc0, vsp36, v2, 0, 0
 ; CHECK-BE-NEXT:    xxmfacc acc0
@@ -1730,8 +1730,8 @@ define void @test41(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
-; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v5, 0(r4)
+; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v4, 16(r4)
 ; CHECK-NEXT:    pmxvf64gernp acc0, vsp36, v2, 0, 0
 ; CHECK-NEXT:    xxmfacc acc0
@@ -1747,8 +1747,8 @@ define void @test41(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    pmxvf64gernp acc0, vsp36, v2, 0, 0
 ; CHECK-BE-NEXT:    xxmfacc acc0
@@ -1778,8 +1778,8 @@ define void @test42(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-NEXT:    lxv vs0, 48(r3)
 ; CHECK-NEXT:    lxv vs3, 0(r3)
 ; CHECK-NEXT:    lxv vs2, 16(r3)
-; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v5, 0(r4)
+; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    lxv v4, 16(r4)
 ; CHECK-NEXT:    pmxvf64gernn acc0, vsp36, v2, 0, 0
 ; CHECK-NEXT:    xxmfacc acc0
@@ -1795,8 +1795,8 @@ define void @test42(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
+; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    pmxvf64gernn acc0, vsp36, v2, 0, 0
 ; CHECK-BE-NEXT:    xxmfacc acc0

diff  --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
index 26cf24382dc5..25fd19e1f3c7 100644
--- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
@@ -13,13 +13,13 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble
 define void @testPHI1(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) {
 ; CHECK-LABEL: testPHI1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    cmpwi r5, 3
 ; CHECK-NEXT:    xxsetaccz acc0
+; CHECK-NEXT:    cmpwi r5, 3
 ; CHECK-NEXT:    blt cr0, .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    clrldi r5, r5, 32
 ; CHECK-NEXT:    lxv v2, 0(r4)
 ; CHECK-NEXT:    lxv v3, 16(r4)
+; CHECK-NEXT:    clrldi r5, r5, 32
 ; CHECK-NEXT:    addi r4, r4, 32
 ; CHECK-NEXT:    addi r5, r5, -2
 ; CHECK-NEXT:    mtctr r5
@@ -40,13 +40,13 @@ define void @testPHI1(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) {
 ;
 ; CHECK-BE-LABEL: testPHI1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    cmpwi r5, 3
 ; CHECK-BE-NEXT:    xxsetaccz acc0
+; CHECK-BE-NEXT:    cmpwi r5, 3
 ; CHECK-BE-NEXT:    blt cr0, .LBB0_3
 ; CHECK-BE-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-BE-NEXT:    clrldi r5, r5, 32
 ; CHECK-BE-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-NEXT:    lxv v3, 16(r4)
+; CHECK-BE-NEXT:    clrldi r5, r5, 32
 ; CHECK-BE-NEXT:    addi r4, r4, 32
 ; CHECK-BE-NEXT:    addi r5, r5, -2
 ; CHECK-BE-NEXT:    mtctr r5
@@ -110,8 +110,8 @@ define dso_local void @testPHI2(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %L
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxv v2, 0(r4)
 ; CHECK-NEXT:    lxv v3, 16(r4)
-; CHECK-NEXT:    lxv vs4, 32(r4)
 ; CHECK-NEXT:    cmpwi r5, 4
+; CHECK-NEXT:    lxv vs4, 32(r4)
 ; CHECK-NEXT:    xvf64ger acc0, vsp34, vs4
 ; CHECK-NEXT:    blt cr0, .LBB1_3
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
@@ -138,8 +138,8 @@ define dso_local void @testPHI2(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %L
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-NEXT:    lxv v3, 16(r4)
-; CHECK-BE-NEXT:    lxv vs4, 32(r4)
 ; CHECK-BE-NEXT:    cmpwi r5, 4
+; CHECK-BE-NEXT:    lxv vs4, 32(r4)
 ; CHECK-BE-NEXT:    xvf64ger acc0, vsp34, vs4
 ; CHECK-BE-NEXT:    blt cr0, .LBB1_3
 ; CHECK-BE-NEXT:  # %bb.1: # %for.body.preheader
@@ -273,8 +273,8 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun
 ; CHECK-NEXT:    xvf32gernp acc0, v2, v2
 ; CHECK-NEXT:    bdnz .LBB3_4
 ; CHECK-NEXT:  .LBB3_5: # %for.cond.cleanup
-; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    stxv vs0, 48(r5)
 ; CHECK-NEXT:    stxv vs1, 32(r5)
 ; CHECK-NEXT:    stxv vs2, 16(r5)
@@ -305,8 +305,8 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun
 ; CHECK-BE-NEXT:    xvf32gernp acc0, v2, v2
 ; CHECK-BE-NEXT:    bdnz .LBB3_4
 ; CHECK-BE-NEXT:  .LBB3_5: # %for.cond.cleanup
-; CHECK-BE-NEXT:    li r3, 0
 ; CHECK-BE-NEXT:    xxmfacc acc0
+; CHECK-BE-NEXT:    li r3, 0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r5)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r5)
 ; CHECK-BE-NEXT:    stxv vs3, 48(r5)

diff  --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
index 6ab9642f92fb..0d9662dc1242 100644
--- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
@@ -10,215 +10,220 @@ target triple = "powerpc64le-unknown-linux-gnu"
 define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_type_of_x]* %.x, i32* %.l, <2 x double>* %.vy01, <2 x double>* %.vy02, <2 x double>* %.vy03, <2 x double>* %.vy04, <2 x double>* %.vy05, <2 x double>* %.vy06, <2 x double>* %.vy07, <2 x double>* %.vy08, <2 x double>* %.vy09, <2 x double>* %.vy0a, <2 x double>* %.vy0b, <2 x double>* %.vy0c, <2 x double>* %.vy21, <2 x double>* %.vy22, <2 x double>* %.vy23, <2 x double>* %.vy24, <2 x double>* %.vy25, <2 x double>* %.vy26, <2 x double>* %.vy27, <2 x double>* %.vy28, <2 x double>* %.vy29, <2 x double>* %.vy2a, <2 x double>* %.vy2b, <2 x double>* %.vy2c) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stdu 1, -576(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 576
-; CHECK-NEXT:    .cfi_offset r14, -160
-; CHECK-NEXT:    .cfi_offset r15, -152
-; CHECK-NEXT:    .cfi_offset r16, -144
-; CHECK-NEXT:    .cfi_offset r17, -136
-; CHECK-NEXT:    .cfi_offset r18, -128
-; CHECK-NEXT:    .cfi_offset r19, -120
-; CHECK-NEXT:    .cfi_offset r20, -112
-; CHECK-NEXT:    .cfi_offset r21, -104
-; CHECK-NEXT:    .cfi_offset r22, -96
-; CHECK-NEXT:    .cfi_offset r23, -88
-; CHECK-NEXT:    .cfi_offset r24, -80
-; CHECK-NEXT:    .cfi_offset r25, -72
-; CHECK-NEXT:    .cfi_offset r26, -64
-; CHECK-NEXT:    .cfi_offset r27, -56
-; CHECK-NEXT:    .cfi_offset r28, -48
-; CHECK-NEXT:    .cfi_offset r29, -40
-; CHECK-NEXT:    .cfi_offset r30, -32
-; CHECK-NEXT:    .cfi_offset r31, -24
+; CHECK-NEXT:    stdu 1, -592(1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 592
+; CHECK-NEXT:    .cfi_offset r14, -192
+; CHECK-NEXT:    .cfi_offset r15, -184
+; CHECK-NEXT:    .cfi_offset r16, -176
+; CHECK-NEXT:    .cfi_offset r17, -168
+; CHECK-NEXT:    .cfi_offset r18, -160
+; CHECK-NEXT:    .cfi_offset r19, -152
+; CHECK-NEXT:    .cfi_offset r20, -144
+; CHECK-NEXT:    .cfi_offset r21, -136
+; CHECK-NEXT:    .cfi_offset r22, -128
+; CHECK-NEXT:    .cfi_offset r23, -120
+; CHECK-NEXT:    .cfi_offset r24, -112
+; CHECK-NEXT:    .cfi_offset r25, -104
+; CHECK-NEXT:    .cfi_offset r26, -96
+; CHECK-NEXT:    .cfi_offset r27, -88
+; CHECK-NEXT:    .cfi_offset r28, -80
+; CHECK-NEXT:    .cfi_offset r29, -72
+; CHECK-NEXT:    .cfi_offset r30, -64
+; CHECK-NEXT:    .cfi_offset r31, -56
+; CHECK-NEXT:    .cfi_offset f26, -48
+; CHECK-NEXT:    .cfi_offset f27, -40
+; CHECK-NEXT:    .cfi_offset f28, -32
+; CHECK-NEXT:    .cfi_offset f29, -24
 ; CHECK-NEXT:    .cfi_offset f30, -16
 ; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v20, -352
-; CHECK-NEXT:    .cfi_offset v21, -336
-; CHECK-NEXT:    .cfi_offset v22, -320
-; CHECK-NEXT:    .cfi_offset v23, -304
-; CHECK-NEXT:    .cfi_offset v24, -288
-; CHECK-NEXT:    .cfi_offset v25, -272
-; CHECK-NEXT:    .cfi_offset v26, -256
-; CHECK-NEXT:    .cfi_offset v27, -240
-; CHECK-NEXT:    .cfi_offset v28, -224
-; CHECK-NEXT:    .cfi_offset v29, -208
-; CHECK-NEXT:    .cfi_offset v30, -192
-; CHECK-NEXT:    .cfi_offset v31, -176
+; CHECK-NEXT:    .cfi_offset v20, -384
+; CHECK-NEXT:    .cfi_offset v21, -368
+; CHECK-NEXT:    .cfi_offset v22, -352
+; CHECK-NEXT:    .cfi_offset v23, -336
+; CHECK-NEXT:    .cfi_offset v24, -320
+; CHECK-NEXT:    .cfi_offset v25, -304
+; CHECK-NEXT:    .cfi_offset v26, -288
+; CHECK-NEXT:    .cfi_offset v27, -272
+; CHECK-NEXT:    .cfi_offset v28, -256
+; CHECK-NEXT:    .cfi_offset v29, -240
+; CHECK-NEXT:    .cfi_offset v30, -224
+; CHECK-NEXT:    .cfi_offset v31, -208
 ; CHECK-NEXT:    lwz 4, 0(4)
-; CHECK-NEXT:    std 14, 416(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 15, 424(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv 52, 224(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 53, 240(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 54, 256(1) # 16-byte Folded Spill
-; CHECK-NEXT:    std 16, 432(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 17, 440(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv 55, 272(1) # 16-byte Folded Spill
-; CHECK-NEXT:    std 18, 448(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 19, 456(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv 56, 288(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 57, 304(1) # 16-byte Folded Spill
-; CHECK-NEXT:    std 20, 464(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 21, 472(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv 58, 320(1) # 16-byte Folded Spill
-; CHECK-NEXT:    std 22, 480(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 23, 488(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv 59, 336(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 60, 352(1) # 16-byte Folded Spill
-; CHECK-NEXT:    std 24, 496(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 25, 504(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv 61, 368(1) # 16-byte Folded Spill
-; CHECK-NEXT:    std 26, 512(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 27, 520(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv 62, 384(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 63, 400(1) # 16-byte Folded Spill
-; CHECK-NEXT:    std 28, 528(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 29, 536(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 14, 400(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 15, 408(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    cmpwi 4, 1
-; CHECK-NEXT:    std 30, 544(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 31, 552(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 30, 560(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 31, 568(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 16, 416(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 17, 424(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 18, 432(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 19, 440(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 20, 448(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 21, 456(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 22, 464(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 23, 472(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 24, 480(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 25, 488(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 26, 496(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 27, 504(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 28, 512(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 29, 520(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 30, 528(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 31, 536(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 26, 544(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 27, 552(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 28, 560(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 29, 568(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 30, 576(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 31, 584(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stxv 52, 208(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 53, 224(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 54, 240(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 55, 256(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 56, 272(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 57, 288(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 58, 304(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 59, 320(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 60, 336(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 61, 352(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 62, 368(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 63, 384(1) # 16-byte Folded Spill
 ; CHECK-NEXT:    blt 0, .LBB0_7
 ; CHECK-NEXT:  # %bb.1: # %_loop_1_do_.lr.ph
-; CHECK-NEXT:    mr 23, 5
+; CHECK-NEXT:    mr 22, 5
 ; CHECK-NEXT:    lwz 5, 0(3)
 ; CHECK-NEXT:    cmpwi 5, 1
 ; CHECK-NEXT:    blt 0, .LBB0_7
 ; CHECK-NEXT:  # %bb.2: # %_loop_1_do_.preheader
+; CHECK-NEXT:    mr 14, 6
+; CHECK-NEXT:    ld 6, 712(1)
+; CHECK-NEXT:    lwa 3, 0(7)
 ; CHECK-NEXT:    addi 5, 5, 1
-; CHECK-NEXT:    li 20, 9
+; CHECK-NEXT:    std 8, 40(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 9, 48(1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr 11, 10
+; CHECK-NEXT:    cmpldi 5, 9
+; CHECK-NEXT:    lxv 4, 0(8)
+; CHECK-NEXT:    ld 8, 696(1)
+; CHECK-NEXT:    ld 10, 736(1)
 ; CHECK-NEXT:    ld 28, 824(1)
-; CHECK-NEXT:    ld 19, 712(1)
-; CHECK-NEXT:    lwa 3, 0(7)
-; CHECK-NEXT:    ld 7, 784(1)
-; CHECK-NEXT:    ld 12, 776(1)
-; CHECK-NEXT:    ld 11, 768(1)
-; CHECK-NEXT:    ld 2, 760(1)
+; CHECK-NEXT:    std 6, 88(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 10, 96(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 0, 0(6)
+; CHECK-NEXT:    li 6, 9
+; CHECK-NEXT:    ld 7, 688(1)
+; CHECK-NEXT:    ld 27, 840(1)
 ; CHECK-NEXT:    ld 29, 832(1)
-; CHECK-NEXT:    cmpldi 5, 9
-; CHECK-NEXT:    ld 27, 816(1)
-; CHECK-NEXT:    ld 26, 808(1)
-; CHECK-NEXT:    ld 25, 800(1)
-; CHECK-NEXT:    ld 24, 792(1)
-; CHECK-NEXT:    iselgt 5, 5, 20
-; CHECK-NEXT:    ld 30, 752(1)
-; CHECK-NEXT:    ld 22, 744(1)
-; CHECK-NEXT:    ld 21, 736(1)
-; CHECK-NEXT:    ld 20, 728(1)
-; CHECK-NEXT:    ld 18, 704(1)
-; CHECK-NEXT:    ld 17, 696(1)
-; CHECK-NEXT:    ld 16, 688(1)
-; CHECK-NEXT:    ld 14, 680(1)
-; CHECK-NEXT:    sldi 0, 3, 2
-; CHECK-NEXT:    std 5, 216(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 28, 208(1) # 8-byte Folded Spill
-; CHECK-NEXT:    mr 5, 4
-; CHECK-NEXT:    ld 4, 720(1)
-; CHECK-NEXT:    std 19, 96(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 4, 104(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 11, 0(4)
-; CHECK-NEXT:    mr 4, 5
-; CHECK-NEXT:    ld 5, 216(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 15, 672(1)
-; CHECK-NEXT:    sldi 31, 3, 1
+; CHECK-NEXT:    ld 26, 816(1)
+; CHECK-NEXT:    ld 25, 808(1)
+; CHECK-NEXT:    ld 24, 800(1)
+; CHECK-NEXT:    ld 23, 792(1)
 ; CHECK-NEXT:    std 8, 32(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 9, 40(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 43, 0(8)
-; CHECK-NEXT:    mr 8, 6
+; CHECK-NEXT:    sldi 0, 3, 1
+; CHECK-NEXT:    sldi 31, 3, 2
+; CHECK-NEXT:    std 28, 184(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 29, 192(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 25, 168(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 26, 176(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 23, 152(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 24, 160(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 27, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT:    iselgt 5, 5, 6
 ; CHECK-NEXT:    sldi 6, 3, 3
-; CHECK-NEXT:    std 2, 144(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 11, 152(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 3, 0(2)
-; CHECK-NEXT:    lxv 2, 0(11)
-; CHECK-NEXT:    lxv 0, 0(7)
-; CHECK-NEXT:    add 6, 6, 23
-; CHECK-NEXT:    lxv 7, 0(28)
-; CHECK-NEXT:    add 28, 3, 31
-; CHECK-NEXT:    lxv 42, 0(9)
-; CHECK-NEXT:    lxv 41, 0(10)
-; CHECK-NEXT:    lxv 40, 0(15)
-; CHECK-NEXT:    lxv 39, 0(14)
-; CHECK-NEXT:    lxv 38, 0(16)
-; CHECK-NEXT:    lxv 33, 0(17)
-; CHECK-NEXT:    lxv 37, 0(18)
-; CHECK-NEXT:    lxv 13, 0(19)
-; CHECK-NEXT:    lxv 10, 0(20)
-; CHECK-NEXT:    lxv 8, 0(21)
-; CHECK-NEXT:    lxv 6, 0(22)
-; CHECK-NEXT:    lxv 4, 0(30)
-; CHECK-NEXT:    lxv 1, 0(12)
-; CHECK-NEXT:    lxv 32, 0(24)
-; CHECK-NEXT:    lxv 36, 0(25)
-; CHECK-NEXT:    lxv 12, 0(26)
-; CHECK-NEXT:    lxv 9, 0(27)
-; CHECK-NEXT:    lxv 5, 0(29)
+; CHECK-NEXT:    ld 21, 784(1)
+; CHECK-NEXT:    ld 20, 776(1)
+; CHECK-NEXT:    ld 19, 768(1)
+; CHECK-NEXT:    ld 18, 760(1)
+; CHECK-NEXT:    std 18, 120(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 19, 128(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 20, 136(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 21, 144(1) # 8-byte Folded Spill
+; CHECK-NEXT:    add 2, 6, 22
+; CHECK-NEXT:    ld 17, 752(1)
+; CHECK-NEXT:    ld 16, 744(1)
+; CHECK-NEXT:    lxv 3, 0(9)
+; CHECK-NEXT:    ld 6, 728(1)
 ; CHECK-NEXT:    addi 5, 5, -2
+; CHECK-NEXT:    std 7, 80(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 6, 72(1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld 15, 720(1)
+; CHECK-NEXT:    ld 9, 704(1)
+; CHECK-NEXT:    lxv 43, 0(8)
+; CHECK-NEXT:    ld 8, 848(1)
+; CHECK-NEXT:    std 11, 56(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 15, 64(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 2, 0(11)
 ; CHECK-NEXT:    sldi 11, 3, 4
-; CHECK-NEXT:    std 12, 160(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 7, 168(1) # 8-byte Folded Spill
-; CHECK-NEXT:    add 7, 3, 0
-; CHECK-NEXT:    add 12, 11, 23
-; CHECK-NEXT:    addi 11, 6, 32
-; CHECK-NEXT:    addi 12, 12, 32
-; CHECK-NEXT:    std 22, 128(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 30, 136(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 26, 192(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 27, 200(1) # 8-byte Folded Spill
-; CHECK-NEXT:    mulli 26, 3, 48
-; CHECK-NEXT:    mulli 22, 3, 6
-; CHECK-NEXT:    sldi 6, 7, 3
-; CHECK-NEXT:    add 30, 23, 6
-; CHECK-NEXT:    std 29, 216(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 24, 176(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 25, 184(1) # 8-byte Folded Spill
-; CHECK-NEXT:    li 25, 1
-; CHECK-NEXT:    li 24, 0
-; CHECK-NEXT:    std 10, 48(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 15, 56(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 14, 64(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 16, 72(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 17, 80(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 18, 88(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 20, 112(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 21, 120(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    rldicl 5, 5, 61, 3
+; CHECK-NEXT:    lxv 1, 0(7)
+; CHECK-NEXT:    add 7, 3, 31
+; CHECK-NEXT:    add 12, 11, 22
+; CHECK-NEXT:    addi 11, 2, 32
 ; CHECK-NEXT:    addi 2, 5, 1
+; CHECK-NEXT:    lxv 6, 0(28)
 ; CHECK-NEXT:    sldi 5, 3, 5
-; CHECK-NEXT:    add 29, 23, 5
+; CHECK-NEXT:    add 28, 3, 0
+; CHECK-NEXT:    lxv 42, 0(9)
+; CHECK-NEXT:    lxv 41, 0(15)
+; CHECK-NEXT:    lxv 40, 0(6)
+; CHECK-NEXT:    lxv 39, 0(10)
+; CHECK-NEXT:    lxv 38, 0(16)
+; CHECK-NEXT:    sldi 30, 7, 3
+; CHECK-NEXT:    addi 12, 12, 32
+; CHECK-NEXT:    add 30, 22, 30
+; CHECK-NEXT:    std 16, 104(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 17, 112(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 33, 0(17)
+; CHECK-NEXT:    lxv 32, 0(18)
+; CHECK-NEXT:    lxv 37, 0(19)
+; CHECK-NEXT:    lxv 36, 0(20)
+; CHECK-NEXT:    lxv 13, 0(21)
+; CHECK-NEXT:    lxv 12, 0(23)
+; CHECK-NEXT:    li 23, 0
+; CHECK-NEXT:    lxv 11, 0(24)
+; CHECK-NEXT:    li 24, 1
+; CHECK-NEXT:    lxv 9, 0(25)
+; CHECK-NEXT:    mulli 25, 3, 6
+; CHECK-NEXT:    lxv 8, 0(26)
+; CHECK-NEXT:    mulli 26, 3, 48
+; CHECK-NEXT:    lxv 5, 0(29)
+; CHECK-NEXT:    add 29, 22, 5
 ; CHECK-NEXT:    sldi 5, 28, 3
-; CHECK-NEXT:    add 27, 23, 5
-; CHECK-NEXT:    mr 5, 23
+; CHECK-NEXT:    lxv 7, 0(27)
+; CHECK-NEXT:    add 27, 22, 5
+; CHECK-NEXT:    mr 5, 22
+; CHECK-NEXT:    lxv 10, 0(8)
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_3: # %_loop_2_do_.lr.ph
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
-; CHECK-NEXT:    maddld 6, 22, 24, 7
-; CHECK-NEXT:    maddld 20, 22, 24, 0
+; CHECK-NEXT:    maddld 6, 25, 23, 7
 ; CHECK-NEXT:    mtctr 2
 ; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 21, 23, 6
-; CHECK-NEXT:    sldi 6, 20, 3
-; CHECK-NEXT:    add 20, 23, 6
-; CHECK-NEXT:    maddld 6, 22, 24, 28
+; CHECK-NEXT:    add 21, 22, 6
+; CHECK-NEXT:    maddld 6, 25, 23, 31
+; CHECK-NEXT:    sldi 6, 6, 3
+; CHECK-NEXT:    add 20, 22, 6
+; CHECK-NEXT:    maddld 6, 25, 23, 28
 ; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 19, 23, 6
-; CHECK-NEXT:    maddld 6, 22, 24, 31
+; CHECK-NEXT:    add 19, 22, 6
+; CHECK-NEXT:    maddld 6, 25, 23, 0
 ; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 18, 23, 6
-; CHECK-NEXT:    maddld 6, 22, 24, 3
+; CHECK-NEXT:    add 18, 22, 6
+; CHECK-NEXT:    maddld 6, 25, 23, 3
 ; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 17, 23, 6
-; CHECK-NEXT:    mulld 6, 22, 24
+; CHECK-NEXT:    add 17, 22, 6
+; CHECK-NEXT:    mulld 6, 25, 23
 ; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 16, 23, 6
-; CHECK-NEXT:    mr 6, 8
+; CHECK-NEXT:    add 16, 22, 6
+; CHECK-NEXT:    mr 6, 14
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_4: # %_loop_2_do_
 ; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lxvp 34, 0(6)
 ; CHECK-NEXT:    lxvp 44, 0(16)
+; CHECK-NEXT:    xvmaddadp 4, 45, 35
 ; CHECK-NEXT:    lxvp 46, 0(17)
+; CHECK-NEXT:    xvmaddadp 3, 47, 35
 ; CHECK-NEXT:    lxvp 48, 0(18)
 ; CHECK-NEXT:    lxvp 50, 0(19)
 ; CHECK-NEXT:    lxvp 62, 0(20)
@@ -228,135 +233,135 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
 ; CHECK-NEXT:    lxvp 54, 32(17)
 ; CHECK-NEXT:    lxvp 52, 32(18)
 ; CHECK-NEXT:    lxvp 30, 32(19)
+; CHECK-NEXT:    lxvp 28, 32(20)
+; CHECK-NEXT:    lxvp 26, 32(21)
+; CHECK-NEXT:    xvmaddadp 2, 49, 35
+; CHECK-NEXT:    xvmaddadp 1, 51, 35
+; CHECK-NEXT:    xvmaddadp 43, 63, 35
+; CHECK-NEXT:    xvmaddadp 42, 61, 35
+; CHECK-NEXT:    xvmaddadp 0, 44, 34
+; CHECK-NEXT:    xvmaddadp 41, 46, 34
+; CHECK-NEXT:    xvmaddadp 40, 48, 34
+; CHECK-NEXT:    xvmaddadp 39, 50, 34
+; CHECK-NEXT:    xvmaddadp 38, 62, 34
+; CHECK-NEXT:    xvmaddadp 33, 60, 34
+; CHECK-NEXT:    xvmaddadp 32, 57, 59
+; CHECK-NEXT:    xvmaddadp 37, 55, 59
+; CHECK-NEXT:    xvmaddadp 36, 53, 59
+; CHECK-NEXT:    xvmaddadp 13, 31, 59
+; CHECK-NEXT:    xvmaddadp 12, 29, 59
+; CHECK-NEXT:    xvmaddadp 11, 27, 59
+; CHECK-NEXT:    xvmaddadp 9, 56, 58
+; CHECK-NEXT:    xvmaddadp 8, 54, 58
+; CHECK-NEXT:    xvmaddadp 6, 52, 58
+; CHECK-NEXT:    xvmaddadp 5, 30, 58
+; CHECK-NEXT:    xvmaddadp 7, 28, 58
+; CHECK-NEXT:    xvmaddadp 10, 26, 58
 ; CHECK-NEXT:    addi 6, 6, 64
 ; CHECK-NEXT:    addi 16, 16, 64
 ; CHECK-NEXT:    addi 17, 17, 64
 ; CHECK-NEXT:    addi 18, 18, 64
 ; CHECK-NEXT:    addi 19, 19, 64
-; CHECK-NEXT:    xvmaddadp 43, 45, 35
-; CHECK-NEXT:    xvmaddadp 42, 47, 35
-; CHECK-NEXT:    xvmaddadp 41, 49, 35
-; CHECK-NEXT:    xvmaddadp 40, 51, 35
-; CHECK-NEXT:    xvmaddadp 39, 63, 35
-; CHECK-NEXT:    xvmaddadp 38, 61, 35
-; CHECK-NEXT:    xvmaddadp 33, 44, 34
-; CHECK-NEXT:    xvmaddadp 37, 46, 34
-; CHECK-NEXT:    xvmaddadp 13, 48, 34
-; CHECK-NEXT:    xvmaddadp 11, 50, 34
-; CHECK-NEXT:    xvmaddadp 10, 62, 34
-; CHECK-NEXT:    xvmaddadp 8, 60, 34
-; CHECK-NEXT:    lxvp 34, 32(20)
-; CHECK-NEXT:    lxvp 44, 32(21)
 ; CHECK-NEXT:    addi 20, 20, 64
 ; CHECK-NEXT:    addi 21, 21, 64
-; CHECK-NEXT:    xvmaddadp 6, 57, 59
-; CHECK-NEXT:    xvmaddadp 4, 55, 59
-; CHECK-NEXT:    xvmaddadp 3, 53, 59
-; CHECK-NEXT:    xvmaddadp 2, 31, 59
-; CHECK-NEXT:    xvmaddadp 32, 56, 58
-; CHECK-NEXT:    xvmaddadp 36, 54, 58
-; CHECK-NEXT:    xvmaddadp 12, 52, 58
-; CHECK-NEXT:    xvmaddadp 9, 30, 58
-; CHECK-NEXT:    xvmaddadp 1, 35, 59
-; CHECK-NEXT:    xvmaddadp 0, 45, 59
-; CHECK-NEXT:    xvmaddadp 7, 34, 58
-; CHECK-NEXT:    xvmaddadp 5, 44, 58
 ; CHECK-NEXT:    bdnz .LBB0_4
 ; CHECK-NEXT:  # %bb.5: # %_loop_2_endl_
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    addi 25, 25, 6
+; CHECK-NEXT:    addi 24, 24, 6
 ; CHECK-NEXT:    add 5, 5, 26
 ; CHECK-NEXT:    add 11, 11, 26
 ; CHECK-NEXT:    add 30, 30, 26
 ; CHECK-NEXT:    add 12, 12, 26
 ; CHECK-NEXT:    add 29, 29, 26
 ; CHECK-NEXT:    add 27, 27, 26
-; CHECK-NEXT:    addi 24, 24, 1
-; CHECK-NEXT:    cmpld 25, 4
+; CHECK-NEXT:    addi 23, 23, 1
+; CHECK-NEXT:    cmpld 24, 4
 ; CHECK-NEXT:    ble 0, .LBB0_3
 ; CHECK-NEXT:  # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit
-; CHECK-NEXT:    ld 3, 32(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 43, 0(3)
 ; CHECK-NEXT:    ld 3, 40(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 42, 0(3)
+; CHECK-NEXT:    stxv 4, 0(3)
 ; CHECK-NEXT:    ld 3, 48(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 41, 0(3)
+; CHECK-NEXT:    stxv 3, 0(3)
 ; CHECK-NEXT:    ld 3, 56(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 40, 0(3)
-; CHECK-NEXT:    ld 3, 64(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 39, 0(3)
-; CHECK-NEXT:    ld 3, 72(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 38, 0(3)
+; CHECK-NEXT:    stxv 2, 0(3)
 ; CHECK-NEXT:    ld 3, 80(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 33, 0(3)
+; CHECK-NEXT:    stxv 1, 0(3)
+; CHECK-NEXT:    ld 3, 32(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 43, 0(3)
 ; CHECK-NEXT:    ld 3, 88(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 37, 0(3)
+; CHECK-NEXT:    stxv 42, 0(9)
+; CHECK-NEXT:    stxv 0, 0(3)
+; CHECK-NEXT:    ld 3, 64(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 41, 0(3)
+; CHECK-NEXT:    ld 3, 72(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 40, 0(3)
 ; CHECK-NEXT:    ld 3, 96(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 13, 0(3)
+; CHECK-NEXT:    stxv 39, 0(3)
 ; CHECK-NEXT:    ld 3, 104(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 11, 0(3)
+; CHECK-NEXT:    stxv 38, 0(3)
 ; CHECK-NEXT:    ld 3, 112(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 10, 0(3)
+; CHECK-NEXT:    stxv 33, 0(3)
 ; CHECK-NEXT:    ld 3, 120(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 8, 0(3)
+; CHECK-NEXT:    stxv 32, 0(3)
 ; CHECK-NEXT:    ld 3, 128(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 6, 0(3)
+; CHECK-NEXT:    stxv 37, 0(3)
 ; CHECK-NEXT:    ld 3, 136(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 4, 0(3)
+; CHECK-NEXT:    stxv 36, 0(3)
 ; CHECK-NEXT:    ld 3, 144(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 3, 0(3)
+; CHECK-NEXT:    stxv 13, 0(3)
 ; CHECK-NEXT:    ld 3, 152(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 2, 0(3)
+; CHECK-NEXT:    stxv 12, 0(3)
 ; CHECK-NEXT:    ld 3, 160(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 1, 0(3)
+; CHECK-NEXT:    stxv 11, 0(3)
 ; CHECK-NEXT:    ld 3, 168(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 0, 0(3)
+; CHECK-NEXT:    stxv 9, 0(3)
 ; CHECK-NEXT:    ld 3, 176(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 32, 0(3)
+; CHECK-NEXT:    stxv 8, 0(3)
 ; CHECK-NEXT:    ld 3, 184(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 36, 0(3)
+; CHECK-NEXT:    stxv 6, 0(3)
 ; CHECK-NEXT:    ld 3, 192(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 12, 0(3)
+; CHECK-NEXT:    stxv 5, 0(3)
 ; CHECK-NEXT:    ld 3, 200(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 9, 0(3)
-; CHECK-NEXT:    ld 3, 208(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 7, 0(3)
-; CHECK-NEXT:    ld 3, 216(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 5, 0(3)
+; CHECK-NEXT:    stxv 10, 0(8)
 ; CHECK-NEXT:  .LBB0_7: # %_return_bb
-; CHECK-NEXT:    lxv 63, 400(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 62, 384(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 61, 368(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 60, 352(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 59, 336(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 58, 320(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 57, 304(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 56, 288(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 55, 272(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 54, 256(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 53, 240(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 52, 224(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lfd 31, 568(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 30, 560(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 31, 552(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 30, 544(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 29, 536(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 28, 528(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 27, 520(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 26, 512(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 25, 504(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 24, 496(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 23, 488(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 22, 480(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 21, 472(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 20, 464(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 19, 456(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 18, 448(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 17, 440(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 16, 432(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 15, 424(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 14, 416(1) # 8-byte Folded Reload
-; CHECK-NEXT:    addi 1, 1, 576
+; CHECK-NEXT:    lxv 63, 384(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 62, 368(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 61, 352(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 60, 336(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 59, 320(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 58, 304(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 57, 288(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 56, 272(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 55, 256(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 54, 240(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 53, 224(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 52, 208(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lfd 31, 584(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 30, 576(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 29, 568(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 28, 560(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 27, 552(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 26, 544(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 31, 536(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 30, 528(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 29, 520(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 28, 512(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 27, 504(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 26, 496(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 25, 488(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 24, 480(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 23, 472(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 22, 464(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 21, 456(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 20, 448(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 19, 440(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 18, 432(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 17, 424(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 16, 416(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 15, 408(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 14, 400(1) # 8-byte Folded Reload
+; CHECK-NEXT:    addi 1, 1, 592
 ; CHECK-NEXT:    blr
 entry:
   %_val_l_ = load i32, i32* %.l, align 4

diff  --git a/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll b/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll
index ac922d867354..3616fb6036b7 100644
--- a/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll
@@ -26,34 +26,34 @@ define dso_local signext i32 @test_FI_elim([40 x i8]* noalias nocapture derefere
 ; CHECK-NEXT:    stdu r1, -80(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    lxv v2, 0(r3)
 ; CHECK-NEXT:    mr r9, r6
 ; CHECK-NEXT:    mr r6, r5
-; CHECK-NEXT:    li r5, 3
-; CHECK-NEXT:    li r10, -127
-; CHECK-NEXT:    lxv v2, 0(r3)
-; CHECK-NEXT:    stb r5, 0(0)
-; CHECK-NEXT:    stb r10, 0(r3)
-; CHECK-NEXT:    stb r5, 0(r3)
-; CHECK-NEXT:    lbz r5, 2(r7)
-; CHECK-NEXT:    li r2, 1
-; CHECK-NEXT:    stb r10, 0(r3)
-; CHECK-NEXT:    pstxv v2, 64(r1), 0
-; CHECK-NEXT:    vaddudm v3, v2, v2
-; CHECK-NEXT:    mfvsrd r11, v2
 ; CHECK-NEXT:    li r0, 4
+; CHECK-NEXT:    li r11, 3
+; CHECK-NEXT:    std r0, 0(r3)
+; CHECK-NEXT:    stb r11, 0(0)
+; CHECK-NEXT:    li r12, -127
+; CHECK-NEXT:    stb r12, 0(r3)
+; CHECK-NEXT:    li r2, 1
+; CHECK-NEXT:    stb r11, 0(r3)
+; CHECK-NEXT:    stb r12, 0(r3)
 ; CHECK-NEXT:    stw r2, 0(r3)
+; CHECK-NEXT:    mfvsrd r5, v2
+; CHECK-NEXT:    vaddudm v3, v2, v2
+; CHECK-NEXT:    pstxv v2, 64(r1), 0
+; CHECK-NEXT:    neg r5, r5
+; CHECK-NEXT:    mfvsrd r10, v3
+; CHECK-NEXT:    std r5, 0(r3)
+; CHECK-NEXT:    lbz r5, 2(r7)
 ; CHECK-NEXT:    mr r7, r9
+; CHECK-NEXT:    neg r10, r10
+; CHECK-NEXT:    std r2, 0(r3)
 ; CHECK-NEXT:    std r0, 0(r3)
+; CHECK-NEXT:    std r10, 0(r3)
 ; CHECK-NEXT:    rlwinm r5, r5, 0, 27, 27
-; CHECK-NEXT:    mfvsrd r12, v3
-; CHECK-NEXT:    neg r11, r11
 ; CHECK-NEXT:    stb r5, 0(0)
 ; CHECK-NEXT:    lbz r5, 2(r8)
-; CHECK-NEXT:    neg r12, r12
-; CHECK-NEXT:    std r11, 0(r3)
-; CHECK-NEXT:    std r2, 0(r3)
-; CHECK-NEXT:    std r0, 0(r3)
-; CHECK-NEXT:    std r12, 0(r3)
 ; CHECK-NEXT:    rlwinm r5, r5, 0, 27, 27
 ; CHECK-NEXT:    stb r5, 0(r3)
 ; CHECK-NEXT:    li r5, 2
@@ -74,36 +74,36 @@ define dso_local signext i32 @test_FI_elim([40 x i8]* noalias nocapture derefere
 ; CHECK-BE-NEXT:    .cfi_def_cfa_offset 176
 ; CHECK-BE-NEXT:    .cfi_offset lr, 16
 ; CHECK-BE-NEXT:    .cfi_offset r30, -16
+; CHECK-BE-NEXT:    lxv v2, 0(r3)
 ; CHECK-BE-NEXT:    mr r9, r6
 ; CHECK-BE-NEXT:    mr r6, r5
-; CHECK-BE-NEXT:    li r5, 3
-; CHECK-BE-NEXT:    li r11, -127
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    li r0, 4
+; CHECK-BE-NEXT:    li r11, 3
 ; CHECK-BE-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    pstxv v2, 144(r1), 0
-; CHECK-BE-NEXT:    stb r5, 0(0)
-; CHECK-BE-NEXT:    stb r11, 0(r3)
-; CHECK-BE-NEXT:    stb r5, 0(r3)
-; CHECK-BE-NEXT:    lbz r5, 2(r7)
-; CHECK-BE-NEXT:    vaddudm v3, v2, v2
-; CHECK-BE-NEXT:    mfvsrld r10, v2
+; CHECK-BE-NEXT:    std r0, 0(r3)
+; CHECK-BE-NEXT:    stb r11, 0(0)
+; CHECK-BE-NEXT:    li r12, -127
+; CHECK-BE-NEXT:    stb r12, 0(r3)
 ; CHECK-BE-NEXT:    li r30, 1
 ; CHECK-BE-NEXT:    stb r11, 0(r3)
-; CHECK-BE-NEXT:    li r0, 4
+; CHECK-BE-NEXT:    stb r12, 0(r3)
+; CHECK-BE-NEXT:    mfvsrld r5, v2
+; CHECK-BE-NEXT:    vaddudm v3, v2, v2
 ; CHECK-BE-NEXT:    stw r30, 0(r3)
+; CHECK-BE-NEXT:    pstxv v2, 144(r1), 0
+; CHECK-BE-NEXT:    mfvsrld r10, v3
+; CHECK-BE-NEXT:    neg r5, r5
+; CHECK-BE-NEXT:    std r5, 0(r3)
+; CHECK-BE-NEXT:    lbz r5, 2(r7)
 ; CHECK-BE-NEXT:    mr r7, r9
+; CHECK-BE-NEXT:    neg r10, r10
+; CHECK-BE-NEXT:    std r30, 0(r3)
 ; CHECK-BE-NEXT:    std r0, 0(r3)
+; CHECK-BE-NEXT:    std r10, 0(r3)
 ; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 27, 27
-; CHECK-BE-NEXT:    mfvsrld r12, v3
 ; CHECK-BE-NEXT:    stb r5, 0(0)
 ; CHECK-BE-NEXT:    lbz r5, 2(r8)
-; CHECK-BE-NEXT:    neg r10, r10
-; CHECK-BE-NEXT:    neg r12, r12
-; CHECK-BE-NEXT:    std r10, 0(r3)
-; CHECK-BE-NEXT:    std r30, 0(r3)
-; CHECK-BE-NEXT:    std r0, 0(r3)
 ; CHECK-BE-NEXT:    rlwinm r5, r5, 0, 27, 27
-; CHECK-BE-NEXT:    std r12, 0(r3)
 ; CHECK-BE-NEXT:    stb r5, 0(r3)
 ; CHECK-BE-NEXT:    li r5, 2
 ; CHECK-BE-NEXT:    stw r5, 0(r3)

diff  --git a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
index cb7bf5124816..27b7d2d47ebe 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
@@ -82,17 +82,17 @@ define dso_local double @P10_Spill_CR_EQ(%2* %arg) local_unnamed_addr #0 {
 ; CHECK-NEXT:  .LBB0_12: # %bb40
 ; CHECK-NEXT:    mcrf cr6, cr4
 ; CHECK-NEXT:    crnot 4*cr4+eq, 4*cr4+eq
-; CHECK-NEXT:    crand 4*cr4+lt, 4*cr7+lt, 4*cr2+un
+; CHECK-NEXT:    crand 4*cr4+gt, 4*cr7+lt, 4*cr2+un
+; CHECK-NEXT:    crand 4*cr4+lt, 4*cr1+lt, 4*cr5+lt
 ; CHECK-NEXT:    # implicit-def: $x6
-; CHECK-NEXT:    crand 4*cr4+gt, 4*cr1+lt, 4*cr5+lt
-; CHECK-NEXT:    bc 4, 4*cr4+gt, .LBB0_14
+; CHECK-NEXT:    bc 4, 4*cr4+lt, .LBB0_14
 ; CHECK-NEXT:  # %bb.13: # %bb48
 ; CHECK-NEXT:    ld r6, 0(r3)
 ; CHECK-NEXT:  .LBB0_14: # %bb50
 ; CHECK-NEXT:    cmpwi r5, -1
 ; CHECK-NEXT:    crand 4*cr4+un, 4*cr3+lt, 4*cr4+eq
 ; CHECK-NEXT:    # implicit-def: $r5
-; CHECK-NEXT:    bc 4, 4*cr4+lt, .LBB0_16
+; CHECK-NEXT:    bc 4, 4*cr4+gt, .LBB0_16
 ; CHECK-NEXT:  # %bb.15: # %bb52
 ; CHECK-NEXT:    lwz r5, 0(r3)
 ; CHECK-NEXT:  .LBB0_16: # %bb54
@@ -111,18 +111,18 @@ define dso_local double @P10_Spill_CR_EQ(%2* %arg) local_unnamed_addr #0 {
 ; CHECK-NEXT:    cmpwi cr1, r5, 1
 ; CHECK-NEXT:    crand lt, gt, 4*cr4+eq
 ; CHECK-NEXT:    # implicit-def: $x5
+; CHECK-NEXT:    crand 4*cr4+eq, 4*cr3+eq, 4*cr4+eq
 ; CHECK-NEXT:    setnbc r8, 4*cr5+gt
 ; CHECK-NEXT:    crand 4*cr5+lt, 4*cr2+eq, 4*cr5+lt
-; CHECK-NEXT:    crand 4*cr4+eq, 4*cr3+eq, 4*cr4+eq
-; CHECK-NEXT:    crand gt, 4*cr1+lt, 4*cr4+lt
+; CHECK-NEXT:    crand gt, 4*cr1+lt, 4*cr4+gt
 ; CHECK-NEXT:    stw r8, -24(r1)
 ; CHECK-NEXT:    setnbc r8, 4*cr5+lt
 ; CHECK-NEXT:    cmpwi cr5, r7, 1
 ; CHECK-NEXT:    stw r8, -28(r1)
-; CHECK-NEXT:    lwz r6, 92(r6)
 ; CHECK-NEXT:    crand eq, 4*cr5+lt, 4*cr4+un
+; CHECK-NEXT:    lwz r6, 92(r6)
 ; CHECK-NEXT:    cmpwi cr6, r6, 1
-; CHECK-NEXT:    crand un, 4*cr6+lt, 4*cr4+gt
+; CHECK-NEXT:    crand un, 4*cr6+lt, 4*cr4+lt
 ; CHECK-NEXT:    bc 4, gt, .LBB0_20
 ; CHECK-NEXT:  # %bb.19: # %bb68
 ; CHECK-NEXT:    ld r5, 0(r3)
@@ -134,8 +134,8 @@ define dso_local double @P10_Spill_CR_EQ(%2* %arg) local_unnamed_addr #0 {
 ; CHECK-NEXT:    rlwimi r6, r7, 12, 20, 20
 ; CHECK-NEXT:    mtocrf 4, r6
 ; CHECK-NEXT:    ld r6, 0(r3)
-; CHECK-NEXT:    crandc 4*cr5+gt, lt, 4*cr3+eq
 ; CHECK-NEXT:    lwz r8, -16(r1)
+; CHECK-NEXT:    crandc 4*cr5+gt, lt, 4*cr3+eq
 ; CHECK-NEXT:    # implicit-def: $cr5eq
 ; CHECK-NEXT:    crandc 4*cr5+lt, 4*cr5+lt, 4*cr7+eq
 ; CHECK-NEXT:    mfocrf r7, 4
@@ -156,32 +156,32 @@ define dso_local double @P10_Spill_CR_EQ(%2* %arg) local_unnamed_addr #0 {
 ; CHECK-NEXT:    setbc r5, 4*cr5+un
 ; CHECK-NEXT:    # implicit-def: $cr5un
 ; CHECK-NEXT:    mfocrf r8, 4
-; CHECK-NEXT:    add r5, r7, r5
 ; CHECK-NEXT:    rlwimi r8, r9, 9, 23, 23
 ; CHECK-NEXT:    lwz r9, -4(r1)
+; CHECK-NEXT:    add r5, r7, r5
 ; CHECK-NEXT:    mtocrf 4, r8
-; CHECK-NEXT:    mtocrf 128, r9
-; CHECK-NEXT:    lwz r9, -8(r1)
 ; CHECK-NEXT:    isel r3, 0, r3, 4*cr5+lt
 ; CHECK-NEXT:    setbc r8, 4*cr5+un
 ; CHECK-NEXT:    isel r6, 0, r6, 4*cr5+gt
 ; CHECK-NEXT:    isel r4, 0, r4, 4*cr5+eq
+; CHECK-NEXT:    mtocrf 128, r9
+; CHECK-NEXT:    lwz r9, -8(r1)
 ; CHECK-NEXT:    add r5, r8, r5
 ; CHECK-NEXT:    iseleq r3, 0, r3
 ; CHECK-NEXT:    mtfprd f0, r5
+; CHECK-NEXT:    xscvsxddp f0, f0
 ; CHECK-NEXT:    mtocrf 128, r9
 ; CHECK-NEXT:    lwz r9, -12(r1)
 ; CHECK-NEXT:    lwz r12, 8(r1)
-; CHECK-NEXT:    xscvsxddp f0, f0
 ; CHECK-NEXT:    iseleq r6, 0, r6
-; CHECK-NEXT:    mtocrf 128, r9
 ; CHECK-NEXT:    add r3, r6, r3
+; CHECK-NEXT:    xsmuldp f0, f0, f2
+; CHECK-NEXT:    mtocrf 128, r9
 ; CHECK-NEXT:    mtocrf 32, r12
 ; CHECK-NEXT:    mtocrf 16, r12
 ; CHECK-NEXT:    mtocrf 8, r12
 ; CHECK-NEXT:    iseleq r4, 0, r4
 ; CHECK-NEXT:    add r3, r4, r3
-; CHECK-NEXT:    xsmuldp f0, f0, f2
 ; CHECK-NEXT:    mtfprd f1, r3
 ; CHECK-NEXT:    xscvsxddp f1, f1
 ; CHECK-NEXT:    xsadddp f1, f0, f1

diff  --git a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
index b4166bde22ab..4c4d9c9a046d 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
@@ -17,10 +17,9 @@
 
 define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
 ; CHECK-LABEL: P10_Spill_CR_GT:
-; CHECK:         .localentry P10_Spill_CR_GT, 1
-; CHECK-NEXT:  # %bb.0: # %bb
-; CHECK-NEXT:    mflr r0
+; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    mfcr r12
+; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stw r12, 8(r1)
 ; CHECK-NEXT:    stdu r1, -64(r1)
@@ -49,8 +48,8 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
 ; CHECK-NEXT:  .LBB0_1: # %bb43
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    bl call_1 at notoc
-; CHECK-NEXT:    li r4, 0
 ; CHECK-NEXT:    setnbc r3, 4*cr4+eq
+; CHECK-NEXT:    li r4, 0
 ; CHECK-NEXT:    stb r4, 0(r3)
 ; CHECK-NEXT:    li r4, 0
 ; CHECK-NEXT:    .p2align 4
@@ -210,8 +209,8 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
 ;
 ; CHECK-BE-LABEL: P10_Spill_CR_GT:
 ; CHECK-BE:       # %bb.0: # %bb
-; CHECK-BE-NEXT:    mflr r0
 ; CHECK-BE-NEXT:    mfcr r12
+; CHECK-BE-NEXT:    mflr r0
 ; CHECK-BE-NEXT:    std r0, 16(r1)
 ; CHECK-BE-NEXT:    stw r12, 8(r1)
 ; CHECK-BE-NEXT:    stdu r1, -144(r1)
@@ -242,8 +241,8 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
 ; CHECK-BE-NEXT:    #
 ; CHECK-BE-NEXT:    bl call_1
 ; CHECK-BE-NEXT:    nop
-; CHECK-BE-NEXT:    li r4, 0
 ; CHECK-BE-NEXT:    setnbc r3, 4*cr4+eq
+; CHECK-BE-NEXT:    li r4, 0
 ; CHECK-BE-NEXT:    stb r4, 0(r3)
 ; CHECK-BE-NEXT:    li r4, 0
 ; CHECK-BE-NEXT:    .p2align 4

diff  --git a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
index b19f1903336b..6c6b26bc24e4 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
@@ -25,10 +25,9 @@ declare void @call_4() local_unnamed_addr
 
 define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-LABEL: P10_Spill_CR_LT:
-; CHECK:         .localentry P10_Spill_CR_LT, 1
-; CHECK-NEXT:  # %bb.0: # %bb
-; CHECK-NEXT:    mflr r0
+; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    mfcr r12
+; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stw r12, 8(r1)
 ; CHECK-NEXT:    stdu r1, -80(r1)
@@ -90,8 +89,8 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ;
 ; CHECK-BE-LABEL: P10_Spill_CR_LT:
 ; CHECK-BE:       # %bb.0: # %bb
-; CHECK-BE-NEXT:    mflr r0
 ; CHECK-BE-NEXT:    mfcr r12
+; CHECK-BE-NEXT:    mflr r0
 ; CHECK-BE-NEXT:    std r0, 16(r1)
 ; CHECK-BE-NEXT:    stw r12, 8(r1)
 ; CHECK-BE-NEXT:    stdu r1, -160(r1)

diff  --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
index 91424e82e99f..64570379ea0c 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
@@ -36,8 +36,8 @@ declare i8 @call_6(%1*, i32) local_unnamed_addr
 define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unnamed_addr {
 ; CHECK-LABEL: P10_Spill_CR_UN:
 ; CHECK:       # %bb.0: # %bb
-; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    mfcr r12
+; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stw r12, 8(r1)
 ; CHECK-NEXT:    stdu r1, -224(r1)
@@ -84,8 +84,8 @@ define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unn
 ; CHECK-NEXT:  # %bb.4: # %bb37
 ; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_14
 ; CHECK-NEXT:  .LBB0_5: # %bb42
-; CHECK-NEXT:    li r4, 0
 ; CHECK-NEXT:    paddi r3, 0, global_1 at PCREL, 1
+; CHECK-NEXT:    li r4, 0
 ; CHECK-NEXT:    cmpwi r28, 0
 ; CHECK-NEXT:    isel r3, r3, r4, 4*cr2+gt
 ; CHECK-NEXT:    crnot 4*cr2+lt, eq
@@ -145,19 +145,19 @@ define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unn
 ; CHECK-NEXT:    # implicit-def: $r3
 ; CHECK-NEXT:  .LBB0_15: # %bb50
 ; CHECK-NEXT:    li r4, 0
+; CHECK-NEXT:    xxspltidp vs3, -1082130432
+; CHECK-NEXT:    xxspltidp vs4, -1082130432
 ; CHECK-NEXT:    extsh r9, r3
 ; CHECK-NEXT:    extsw r6, r28
 ; CHECK-NEXT:    li r5, 0
-; CHECK-NEXT:    xxspltidp vs3, -1082130432
-; CHECK-NEXT:    xxspltidp vs4, -1082130432
+; CHECK-NEXT:    li r7, 0
 ; CHECK-NEXT:    std r30, 104(r1)
 ; CHECK-NEXT:    std r29, 96(r1)
-; CHECK-NEXT:    li r7, 0
 ; CHECK-NEXT:    li r8, 0
 ; CHECK-NEXT:    li r10, 0
+; CHECK-NEXT:    xxlxor f1, f1, f1
 ; CHECK-NEXT:    std r4, 152(r1)
 ; CHECK-NEXT:    li r4, -1
-; CHECK-NEXT:    xxlxor f1, f1, f1
 ; CHECK-NEXT:    std r4, 112(r1)
 ; CHECK-NEXT:    li r4, 1024
 ; CHECK-NEXT:    bl call_4 at notoc
@@ -182,8 +182,8 @@ define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unn
 ;
 ; CHECK-BE-LABEL: P10_Spill_CR_UN:
 ; CHECK-BE:       # %bb.0: # %bb
-; CHECK-BE-NEXT:    mflr r0
 ; CHECK-BE-NEXT:    mfcr r12
+; CHECK-BE-NEXT:    mflr r0
 ; CHECK-BE-NEXT:    std r0, 16(r1)
 ; CHECK-BE-NEXT:    stw r12, 8(r1)
 ; CHECK-BE-NEXT:    stdu r1, -240(r1)
@@ -303,19 +303,19 @@ define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unn
 ; CHECK-BE-NEXT:    # implicit-def: $r3
 ; CHECK-BE-NEXT:  .LBB0_15: # %bb50
 ; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    xxspltidp vs3, -1082130432
+; CHECK-BE-NEXT:    xxspltidp vs4, -1082130432
 ; CHECK-BE-NEXT:    extsh r9, r3
 ; CHECK-BE-NEXT:    extsw r6, r28
 ; CHECK-BE-NEXT:    li r5, 0
-; CHECK-BE-NEXT:    xxspltidp vs3, -1082130432
-; CHECK-BE-NEXT:    xxspltidp vs4, -1082130432
+; CHECK-BE-NEXT:    li r7, 0
 ; CHECK-BE-NEXT:    std r30, 120(r1)
 ; CHECK-BE-NEXT:    std r29, 112(r1)
-; CHECK-BE-NEXT:    li r7, 0
 ; CHECK-BE-NEXT:    li r8, 0
 ; CHECK-BE-NEXT:    li r10, 0
+; CHECK-BE-NEXT:    xxlxor f1, f1, f1
 ; CHECK-BE-NEXT:    std r4, 168(r1)
 ; CHECK-BE-NEXT:    li r4, -1
-; CHECK-BE-NEXT:    xxlxor f1, f1, f1
 ; CHECK-BE-NEXT:    std r4, 128(r1)
 ; CHECK-BE-NEXT:    li r4, 1024
 ; CHECK-BE-NEXT:    bl call_4

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
index ae7db80de199..37cf078f53bf 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@@ -109,10 +109,10 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3
 ; CHECK-S-NEXT:    sub r29, r8, r9
 ; CHECK-S-NEXT:    add r9, r10, r9
 ; CHECK-S-NEXT:    sub r10, r10, r3
+; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    sub r12, r4, r5
 ; CHECK-S-NEXT:    add r0, r6, r5
 ; CHECK-S-NEXT:    sub r2, r6, r7
-; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    add r30, r8, r7
 ; CHECK-S-NEXT:    mullw r3, r3, r11
 ; CHECK-S-NEXT:    mullw r3, r3, r5

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
index 1eb48991db70..32b8a7486a7e 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
@@ -353,9 +353,9 @@ define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32
 ; CHECK-S-NEXT:    stdu r1, -32(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    mtctr r5
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    mr r12, r5
+; CHECK-S-NEXT:    mtctr r5
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    bctrl
 ; CHECK-S-NEXT:    plwz r4, globalVar at PCREL(0), 1
@@ -383,8 +383,8 @@ define dso_local signext i32 @IndirectCallNoGlobal(i32 signext %a, i32 signext %
 ; CHECK-S-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-S-NEXT:    std r0, 16(r1)
 ; CHECK-S-NEXT:    stdu r1, -48(r1)
-; CHECK-S-NEXT:    mtctr r5
 ; CHECK-S-NEXT:    mr r12, r5
+; CHECK-S-NEXT:    mtctr r5
 ; CHECK-S-NEXT:    mr r30, r4
 ; CHECK-S-NEXT:    bctrl
 ; CHECK-S-NEXT:    add r3, r3, r30

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
index 3a742588d23b..f1a05c105099 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
@@ -20,8 +20,8 @@
 define dso_local signext i32 @jumptable(i32 signext %param) {
 ; CHECK-R-LABEL: jumptable:
 ; CHECK-R:       # %bb.1: # %entry
-; CHECK-R-NEXT:    rldic r4, r4
 ; CHECK-R-NEXT:    paddi r5, 0, .LJTI0_0 at PCREL, 1
+; CHECK-R-NEXT:    rldic r4, r4
 ; CHECK-R-NEXT:    lwax r4, r4, r5
 ; CHECK-R-NEXT:    add r4, r4, r5
 ; CHECK-R-NEXT:    mtctr r4
@@ -35,8 +35,8 @@ define dso_local signext i32 @jumptable(i32 signext %param) {
 ; CHECK-A-LE-NEXT:    bctr
 ; CHECK-A-BE-LABEL: jumptable:
 ; CHECK-A-BE:       # %bb.1: # %entry
-; CHECK-A-BE-NEXT:    rldic r4, r4
 ; CHECK-A-BE-NEXT:    paddi r5, 0, .LJTI0_0 at PCREL, 1
+; CHECK-A-BE-NEXT:    rldic r4, r4
 ; CHECK-A-BE-NEXT:    lwax r4, r4, r5
 ; CHECK-A-BE-NEXT:    mtctr r4
 ; CHECK-A-BE-NEXT:    bctr

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
index 583e7950b6d5..1982332ffd5c 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
@@ -34,8 +34,7 @@ declare signext i32 @Function(...)
 
 define dso_local void @TailCallLocalFuncPtr() local_unnamed_addr {
 ; CHECK-LABEL: TailCallLocalFuncPtr:
-; CHECK:         .localentry TailCallLocalFuncPtr, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r12, FuncLocal at PCREL(0), 1
 ; CHECK-NEXT:    mtctr r12
 ; CHECK-NEXT:    bctr
@@ -48,8 +47,7 @@ entry:
 
 define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr {
 ; CHECK-LABEL: TailCallExtrnFuncPtr:
-; CHECK:         .localentry TailCallExtrnFuncPtr, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, Func at got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel0:
 ; CHECK-NEXT:    .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
@@ -65,8 +63,7 @@ entry:
 
 define dso_local signext i32 @TailCallParamFuncPtr(i32 (...)* nocapture %passedfunc) local_unnamed_addr {
 ; CHECK-LABEL: TailCallParamFuncPtr:
-; CHECK:         .localentry TailCallParamFuncPtr, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mtctr r3
 ; CHECK-NEXT:    mr r12, r3
 ; CHECK-NEXT:    bctr
@@ -79,8 +76,7 @@ entry:
 
 define dso_local signext i32 @NoTailIndirectCall(i32 (...)* nocapture %passedfunc, i32 signext %a) local_unnamed_addr {
 ; CHECK-LABEL: NoTailIndirectCall:
-; CHECK:         .localentry NoTailIndirectCall, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
@@ -88,8 +84,8 @@ define dso_local signext i32 @NoTailIndirectCall(i32 (...)* nocapture %passedfun
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
-; CHECK-NEXT:    mtctr r3
 ; CHECK-NEXT:    mr r12, r3
+; CHECK-NEXT:    mtctr r3
 ; CHECK-NEXT:    mr r30, r4
 ; CHECK-NEXT:    bctrl
 ; CHECK-NEXT:    add r3, r3, r30
@@ -108,8 +104,7 @@ entry:
 
 define dso_local signext i32 @TailCallDirect() local_unnamed_addr {
 ; CHECK-LABEL: TailCallDirect:
-; CHECK:         .localentry TailCallDirect, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    b Function at notoc
 ; CHECK-NEXT:    #TC_RETURNd8 Function at notoc 0
 entry:
@@ -119,8 +114,7 @@ entry:
 
 define dso_local signext i32 @NoTailCallDirect(i32 signext %a) local_unnamed_addr {
 ; CHECK-LABEL: NoTailCallDirect:
-; CHECK:         .localentry NoTailCallDirect, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
@@ -145,8 +139,7 @@ entry:
 
 define dso_local signext i32 @TailCallDirectLocal() local_unnamed_addr {
 ; CHECK-LABEL: TailCallDirectLocal:
-; CHECK:         .localentry TailCallDirectLocal, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    b LocalFunction at notoc
 ; CHECK-NEXT:    #TC_RETURNd8 LocalFunction at notoc 0
 entry:
@@ -156,8 +149,7 @@ entry:
 
 define dso_local signext i32 @NoTailCallDirectLocal(i32 signext %a) local_unnamed_addr {
 ; CHECK-LABEL: NoTailCallDirectLocal:
-; CHECK:         .localentry NoTailCallDirectLocal, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
@@ -182,8 +174,7 @@ entry:
 
 define dso_local signext i32 @TailCallAbs() local_unnamed_addr {
 ; CHECK-LABEL: TailCallAbs:
-; CHECK:         .localentry TailCallAbs, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li r3, 400
 ; CHECK-NEXT:    li r12, 400
 ; CHECK-NEXT:    mtctr r3
@@ -196,8 +187,7 @@ entry:
 
 define dso_local signext i32 @NoTailCallAbs(i32 signext %a) local_unnamed_addr {
 ; CHECK-LABEL: NoTailCallAbs:
-; CHECK:         .localentry NoTailCallAbs, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
@@ -227,8 +217,7 @@ entry:
 ; This function should be tail called and not inlined.
 define internal fastcc signext i32 @LocalFunction() unnamed_addr #0 {
 ; CHECK-LABEL: LocalFunction:
-; CHECK:         .localentry LocalFunction, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    li r3, 42
 ; CHECK-NEXT:    #NO_APP

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
index d4942d6ecd0b..49f8b43f7a82 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
@@ -16,11 +16,11 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
 ; CHECK-NEXT:    lxv v4, 0(0)
 ; CHECK-NEXT:    xxlxor v0, v0, v0
 ; CHECK-NEXT:    xxlxor v1, v1, v1
-; CHECK-NEXT:    stfd f14, -144(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f15, -136(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    xxlxor v2, v2, v2
 ; CHECK-NEXT:    li r6, 1
 ; CHECK-NEXT:    li r4, 16
+; CHECK-NEXT:    stfd f14, -144(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd f15, -136(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    extswsli r3, r3, 3
 ; CHECK-NEXT:    xvmaddadp v1, v4, v1
 ; CHECK-NEXT:    lxvdsx v5, 0, r3
@@ -29,30 +29,30 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
 ; CHECK-NEXT:  .LBB0_1: # %bb9
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi r6, r6, 2
-; CHECK-NEXT:    lxv vs1, -64(r5)
-; CHECK-NEXT:    lxv vs2, -16(r5)
 ; CHECK-NEXT:    lxv vs0, 16(0)
+; CHECK-NEXT:    lxv vs1, -64(r5)
+; CHECK-NEXT:    xxlxor v7, v7, v7
 ; CHECK-NEXT:    vmr v9, v0
 ; CHECK-NEXT:    xxlxor v10, v10, v10
-; CHECK-NEXT:    xxlxor v7, v7, v7
 ; CHECK-NEXT:    mulld r6, r6, r3
-; CHECK-NEXT:    xvmaddadp v9, vs1, v2
-; CHECK-NEXT:    xxlxor v8, v8, v8
-; CHECK-NEXT:    xvmaddadp v10, vs2, v10
 ; CHECK-NEXT:    xvmaddadp v7, vs0, v5
 ; CHECK-NEXT:    xvmuldp v6, vs0, v2
+; CHECK-NEXT:    lxv vs0, -16(r5)
+; CHECK-NEXT:    xvmaddadp v9, vs1, v2
+; CHECK-NEXT:    xxlxor v8, v8, v8
 ; CHECK-NEXT:    xvmaddadp v7, v2, v2
 ; CHECK-NEXT:    xvmaddadp v6, v2, v2
 ; CHECK-NEXT:    lxvdsx v14, r6, r4
-; CHECK-NEXT:    xvmaddadp v8, vs1, v8
 ; CHECK-NEXT:    li r6, 0
-; CHECK-NEXT:    xvmuldp v11, vs2, v14
+; CHECK-NEXT:    xvmaddadp v8, vs1, v8
+; CHECK-NEXT:    xvmaddadp v10, vs0, v10
 ; CHECK-NEXT:    xvmuldp v3, vs1, v14
+; CHECK-NEXT:    xvmuldp v11, vs0, v14
 ; CHECK-NEXT:    xvmuldp vs5, v14, v2
 ; CHECK-NEXT:    xvmuldp v13, v4, v14
+; CHECK-NEXT:    xxlor vs0, v2, v2
 ; CHECK-NEXT:    vmr v12, v2
 ; CHECK-NEXT:    xxlor vs14, v10, v10
-; CHECK-NEXT:    xxlor vs0, v2, v2
 ; CHECK-NEXT:    xxlor vs4, v2, v2
 ; CHECK-NEXT:    # kill: def $vsrp2 killed $vsrp2 def $uacc1
 ; CHECK-NEXT:    xxlor vs6, v6, v6
@@ -60,11 +60,11 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
 ; CHECK-NEXT:    xxlor vs8, v12, v12
 ; CHECK-NEXT:    xxlor vs9, v13, v13
 ; CHECK-NEXT:    vmr v12, v1
-; CHECK-NEXT:    xxlor vs15, v11, v11
-; CHECK-NEXT:    vmr v10, v2
 ; CHECK-NEXT:    xxlor vs1, v3, v3
 ; CHECK-NEXT:    xxlor vs2, v8, v8
 ; CHECK-NEXT:    xxlor vs3, v9, v9
+; CHECK-NEXT:    xxlor vs15, v11, v11
+; CHECK-NEXT:    vmr v10, v2
 ; CHECK-NEXT:    xxlor vs10, v12, v12
 ; CHECK-NEXT:    xxlor vs11, v13, v13
 ; CHECK-NEXT:    xxmtacc acc1
@@ -72,8 +72,8 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
 ; CHECK-NEXT:    xxlor vs13, v11, v11
 ; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    xxmtacc acc2
-; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; CHECK-NEXT:    xxmtacc acc3
+; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
 ; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
@@ -117,11 +117,11 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
 ; TRACKLIVE-NEXT:    lxv v4, 0(0)
 ; TRACKLIVE-NEXT:    xxlxor v0, v0, v0
 ; TRACKLIVE-NEXT:    xxlxor v1, v1, v1
-; TRACKLIVE-NEXT:    stfd f14, -144(r1) # 8-byte Folded Spill
-; TRACKLIVE-NEXT:    stfd f15, -136(r1) # 8-byte Folded Spill
 ; TRACKLIVE-NEXT:    xxlxor v2, v2, v2
 ; TRACKLIVE-NEXT:    li r6, 1
 ; TRACKLIVE-NEXT:    li r4, 16
+; TRACKLIVE-NEXT:    stfd f14, -144(r1) # 8-byte Folded Spill
+; TRACKLIVE-NEXT:    stfd f15, -136(r1) # 8-byte Folded Spill
 ; TRACKLIVE-NEXT:    extswsli r3, r3, 3
 ; TRACKLIVE-NEXT:    xvmaddadp v1, v4, v1
 ; TRACKLIVE-NEXT:    lxvdsx v5, 0, r3
@@ -131,33 +131,33 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
 ; TRACKLIVE-NEXT:    #
 ; TRACKLIVE-NEXT:    addi r6, r6, 2
 ; TRACKLIVE-NEXT:    lxv vs0, 16(0)
-; TRACKLIVE-NEXT:    xxlxor vs7, vs7, vs7
 ; TRACKLIVE-NEXT:    lxv vs1, -64(r5)
-; TRACKLIVE-NEXT:    lxv vs4, -16(r5)
-; TRACKLIVE-NEXT:    xxlxor vs12, vs12, vs12
+; TRACKLIVE-NEXT:    xxlxor vs7, vs7, vs7
 ; TRACKLIVE-NEXT:    xxlor vs3, v0, v0
 ; TRACKLIVE-NEXT:    xxlxor vs2, vs2, vs2
+; TRACKLIVE-NEXT:    xxlxor vs12, vs12, vs12
 ; TRACKLIVE-NEXT:    mulld r6, r6, r3
 ; TRACKLIVE-NEXT:    xxlor vs10, v2, v2
+; TRACKLIVE-NEXT:    xxlor vs4, v2, v2
 ; TRACKLIVE-NEXT:    xxlor vs8, vs10, vs10
 ; TRACKLIVE-NEXT:    xxlor vs10, v1, v1
 ; TRACKLIVE-NEXT:    xvmaddadp vs7, vs0, v5
 ; TRACKLIVE-NEXT:    xvmuldp vs6, vs0, v2
-; TRACKLIVE-NEXT:    xvmaddadp vs12, vs4, vs12
+; TRACKLIVE-NEXT:    lxv vs0, -16(r5)
 ; TRACKLIVE-NEXT:    xvmaddadp vs3, vs1, v2
 ; TRACKLIVE-NEXT:    xvmaddadp vs2, vs1, vs2
-; TRACKLIVE-NEXT:    xxlor vs0, v2, v2
 ; TRACKLIVE-NEXT:    lxvdsx v6, r6, r4
 ; TRACKLIVE-NEXT:    li r6, 0
 ; TRACKLIVE-NEXT:    xvmaddadp vs7, v2, v2
 ; TRACKLIVE-NEXT:    xvmaddadp vs6, v2, v2
-; TRACKLIVE-NEXT:    xxlor vs14, vs12, vs12
-; TRACKLIVE-NEXT:    xxlor vs12, v2, v2
+; TRACKLIVE-NEXT:    xvmaddadp vs12, vs0, vs12
 ; TRACKLIVE-NEXT:    xvmuldp v3, vs1, v6
 ; TRACKLIVE-NEXT:    xvmuldp vs11, v4, v6
-; TRACKLIVE-NEXT:    xvmuldp vs13, vs4, v6
+; TRACKLIVE-NEXT:    xvmuldp vs13, vs0, v6
 ; TRACKLIVE-NEXT:    xvmuldp vs5, v6, v2
-; TRACKLIVE-NEXT:    xxlor vs4, v2, v2
+; TRACKLIVE-NEXT:    xxlor vs0, v2, v2
+; TRACKLIVE-NEXT:    xxlor vs14, vs12, vs12
+; TRACKLIVE-NEXT:    xxlor vs12, v2, v2
 ; TRACKLIVE-NEXT:    xxlor vs1, v3, v3
 ; TRACKLIVE-NEXT:    xxlor vs9, vs11, vs11
 ; TRACKLIVE-NEXT:    xxlor vs15, vs13, vs13

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
index 5d108b313f8f..98791c6f2316 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
@@ -66,9 +66,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; LE-P10-NEXT:    clrldi r3, r3, 32
 ; LE-P10-NEXT:    addi r1, r1, 64
 ; LE-P10-NEXT:    ld r0, 16(r1)
-; LE-P10-NEXT:    mtlr r0
 ; LE-P10-NEXT:    hashchk r0, -24(r1)
 ; LE-P10-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    mtlr r0
 ; LE-P10-NEXT:    blr
 ;
 ; LE-P9-LABEL: caller:
@@ -189,8 +189,8 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P10-NEXT:    clrldi r3, r3, 32
 ; BE-P10-NEXT:    addi r1, r1, 144
 ; BE-P10-NEXT:    ld r0, 16(r1)
-; BE-P10-NEXT:    mtlr r0
 ; BE-P10-NEXT:    hashchk r0, -24(r1)
+; BE-P10-NEXT:    mtlr r0
 ; BE-P10-NEXT:    blr
 ;
 ; BE-P9-LABEL: caller:
@@ -244,9 +244,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; LE-P10-PRIV-NEXT:    clrldi r3, r3, 32
 ; LE-P10-PRIV-NEXT:    addi r1, r1, 64
 ; LE-P10-PRIV-NEXT:    ld r0, 16(r1)
-; LE-P10-PRIV-NEXT:    mtlr r0
 ; LE-P10-PRIV-NEXT:    hashchkp r0, -24(r1)
 ; LE-P10-PRIV-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    mtlr r0
 ; LE-P10-PRIV-NEXT:    blr
 ;
 ; LE-P9-PRIV-LABEL: caller:
@@ -302,8 +302,8 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P10-PRIV-NEXT:    clrldi r3, r3, 32
 ; BE-P10-PRIV-NEXT:    addi r1, r1, 144
 ; BE-P10-PRIV-NEXT:    ld r0, 16(r1)
-; BE-P10-PRIV-NEXT:    mtlr r0
 ; BE-P10-PRIV-NEXT:    hashchkp r0, -24(r1)
+; BE-P10-PRIV-NEXT:    mtlr r0
 ; BE-P10-PRIV-NEXT:    blr
 ;
 ; BE-P9-PRIV-LABEL: caller:
@@ -365,30 +365,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; LE-P10-NEXT:    lwz r4, 12(r3)
 ; LE-P10-NEXT:    std r14, 256(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    std r15, 264(r1) # 8-byte Folded Spill
-; LE-P10-NEXT:    stxv v20, 64(r1) # 16-byte Folded Spill
-; LE-P10-NEXT:    stxv v21, 80(r1) # 16-byte Folded Spill
-; LE-P10-NEXT:    stxv v22, 96(r1) # 16-byte Folded Spill
 ; LE-P10-NEXT:    std r16, 272(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
-; LE-P10-NEXT:    stxv v23, 112(r1) # 16-byte Folded Spill
 ; LE-P10-NEXT:    std r18, 288(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    std r19, 296(r1) # 8-byte Folded Spill
-; LE-P10-NEXT:    stxv v24, 128(r1) # 16-byte Folded Spill
-; LE-P10-NEXT:    stxv v25, 144(r1) # 16-byte Folded Spill
 ; LE-P10-NEXT:    std r20, 304(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
-; LE-P10-NEXT:    stxv v26, 160(r1) # 16-byte Folded Spill
 ; LE-P10-NEXT:    std r22, 320(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    std r23, 328(r1) # 8-byte Folded Spill
-; LE-P10-NEXT:    stxv v27, 176(r1) # 16-byte Folded Spill
-; LE-P10-NEXT:    stxv v28, 192(r1) # 16-byte Folded Spill
 ; LE-P10-NEXT:    std r24, 336(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    std r25, 344(r1) # 8-byte Folded Spill
-; LE-P10-NEXT:    stxv v29, 208(r1) # 16-byte Folded Spill
 ; LE-P10-NEXT:    std r26, 352(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    std r27, 360(r1) # 8-byte Folded Spill
-; LE-P10-NEXT:    stxv v30, 224(r1) # 16-byte Folded Spill
-; LE-P10-NEXT:    stxv v31, 240(r1) # 16-byte Folded Spill
 ; LE-P10-NEXT:    std r28, 368(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    std r29, 376(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    std r30, 384(r1) # 8-byte Folded Spill
@@ -411,6 +399,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; LE-P10-NEXT:    stfd f29, 520(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    stfd f30, 528(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    stfd f31, 536(r1) # 8-byte Folded Spill
+; LE-P10-NEXT:    stxv v20, 64(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v21, 80(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v22, 96(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v23, 112(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v24, 128(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v25, 144(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v26, 160(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v27, 176(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v28, 192(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v29, 208(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v30, 224(r1) # 16-byte Folded Spill
+; LE-P10-NEXT:    stxv v31, 240(r1) # 16-byte Folded Spill
 ; LE-P10-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
 ; LE-P10-NEXT:    stw r4, 52(r1)
 ; LE-P10-NEXT:    #APP
@@ -436,40 +436,40 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; LE-P10-NEXT:    lfd f29, 520(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    lfd f28, 512(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    lfd f27, 504(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f26, 496(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f25, 488(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f24, 480(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f23, 472(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f22, 464(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f21, 456(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f20, 448(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f19, 440(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f18, 432(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f17, 424(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f16, 416(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f15, 408(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lfd f14, 400(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r31, 392(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r30, 384(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r29, 376(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f26, 496(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r28, 368(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r27, 360(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r26, 352(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f25, 488(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r25, 344(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r24, 336(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r23, 328(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f24, 480(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r22, 320(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r21, 312(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lwz r4, 16(r4)
-; LE-P10-NEXT:    add r3, r4, r3
-; LE-P10-NEXT:    lfd f23, 472(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f22, 464(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r20, 304(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r19, 296(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r18, 288(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r17, 280(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r16, 272(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    ld r15, 264(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    ld r14, 256(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f21, 456(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f20, 448(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f19, 440(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f18, 432(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f17, 424(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f16, 416(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f15, 408(r1) # 8-byte Folded Reload
-; LE-P10-NEXT:    lfd f14, 400(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    lwz r4, 16(r4)
+; LE-P10-NEXT:    add r3, r4, r3
 ; LE-P10-NEXT:    clrldi r3, r3, 32
+; LE-P10-NEXT:    ld r14, 256(r1) # 8-byte Folded Reload
 ; LE-P10-NEXT:    addi r1, r1, 544
 ; LE-P10-NEXT:    ld r0, 16(r1)
 ; LE-P10-NEXT:    lwz r12, 8(r1)
@@ -1177,30 +1177,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; BE-P10-NEXT:    lwz r4, 12(r3)
 ; BE-P10-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
-; BE-P10-NEXT:    stxv v20, 144(r1) # 16-byte Folded Spill
-; BE-P10-NEXT:    stxv v21, 160(r1) # 16-byte Folded Spill
-; BE-P10-NEXT:    stxv v22, 176(r1) # 16-byte Folded Spill
 ; BE-P10-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
-; BE-P10-NEXT:    stxv v23, 192(r1) # 16-byte Folded Spill
 ; BE-P10-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
-; BE-P10-NEXT:    stxv v24, 208(r1) # 16-byte Folded Spill
-; BE-P10-NEXT:    stxv v25, 224(r1) # 16-byte Folded Spill
 ; BE-P10-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
-; BE-P10-NEXT:    stxv v26, 240(r1) # 16-byte Folded Spill
 ; BE-P10-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
-; BE-P10-NEXT:    stxv v27, 256(r1) # 16-byte Folded Spill
-; BE-P10-NEXT:    stxv v28, 272(r1) # 16-byte Folded Spill
 ; BE-P10-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
-; BE-P10-NEXT:    stxv v29, 288(r1) # 16-byte Folded Spill
 ; BE-P10-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
-; BE-P10-NEXT:    stxv v30, 304(r1) # 16-byte Folded Spill
-; BE-P10-NEXT:    stxv v31, 320(r1) # 16-byte Folded Spill
 ; BE-P10-NEXT:    std r28, 448(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    std r29, 456(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    std r30, 464(r1) # 8-byte Folded Spill
@@ -1223,6 +1211,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; BE-P10-NEXT:    stfd f29, 600(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    stfd f30, 608(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    stfd f31, 616(r1) # 8-byte Folded Spill
+; BE-P10-NEXT:    stxv v20, 144(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v21, 160(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v22, 176(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v23, 192(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v24, 208(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v25, 224(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v26, 240(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v27, 256(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v28, 272(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v29, 288(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v30, 304(r1) # 16-byte Folded Spill
+; BE-P10-NEXT:    stxv v31, 320(r1) # 16-byte Folded Spill
 ; BE-P10-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P10-NEXT:    stw r4, 132(r1)
 ; BE-P10-NEXT:    #APP
@@ -1249,40 +1249,40 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; BE-P10-NEXT:    lfd f29, 600(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    lfd f28, 592(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    lfd f27, 584(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f26, 576(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f25, 568(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f24, 560(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f23, 552(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f22, 544(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f21, 536(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r31, 472(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r30, 464(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r29, 456(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f26, 576(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r28, 448(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r27, 440(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r26, 432(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f25, 568(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r25, 424(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r24, 416(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r23, 408(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f24, 560(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r22, 400(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r21, 392(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lwz r4, 16(r4)
-; BE-P10-NEXT:    add r3, r4, r3
-; BE-P10-NEXT:    lfd f23, 552(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f22, 544(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r20, 384(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r19, 376(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r18, 368(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r17, 360(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r16, 352(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    ld r15, 344(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    ld r14, 336(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f21, 536(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
-; BE-P10-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
+; BE-P10-NEXT:    lwz r4, 16(r4)
+; BE-P10-NEXT:    add r3, r4, r3
 ; BE-P10-NEXT:    clrldi r3, r3, 32
+; BE-P10-NEXT:    ld r14, 336(r1) # 8-byte Folded Reload
 ; BE-P10-NEXT:    addi r1, r1, 624
 ; BE-P10-NEXT:    ld r0, 16(r1)
 ; BE-P10-NEXT:    lwz r12, 8(r1)
@@ -1582,30 +1582,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; LE-P10-PRIV-NEXT:    lwz r4, 12(r3)
 ; LE-P10-PRIV-NEXT:    std r14, 256(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r15, 264(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v20, 64(r1) # 16-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v21, 80(r1) # 16-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v22, 96(r1) # 16-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r16, 272(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v23, 112(r1) # 16-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r18, 288(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r19, 296(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v24, 128(r1) # 16-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v25, 144(r1) # 16-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r20, 304(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v26, 160(r1) # 16-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r22, 320(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r23, 328(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v27, 176(r1) # 16-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v28, 192(r1) # 16-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r24, 336(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r25, 344(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v29, 208(r1) # 16-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r26, 352(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r27, 360(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v30, 224(r1) # 16-byte Folded Spill
-; LE-P10-PRIV-NEXT:    stxv v31, 240(r1) # 16-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r28, 368(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r29, 376(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r30, 384(r1) # 8-byte Folded Spill
@@ -1628,6 +1616,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; LE-P10-PRIV-NEXT:    stfd f29, 520(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    stfd f30, 528(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    stfd f31, 536(r1) # 8-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v20, 64(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v21, 80(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v22, 96(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v23, 112(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v24, 128(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v25, 144(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v26, 160(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v27, 176(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v28, 192(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v29, 208(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v30, 224(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT:    stxv v31, 240(r1) # 16-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
 ; LE-P10-PRIV-NEXT:    stw r4, 52(r1)
 ; LE-P10-PRIV-NEXT:    #APP
@@ -1653,40 +1653,40 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; LE-P10-PRIV-NEXT:    lfd f29, 520(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    lfd f28, 512(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    lfd f27, 504(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f26, 496(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f25, 488(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f24, 480(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f23, 472(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f22, 464(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f21, 456(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f20, 448(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f19, 440(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f18, 432(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f17, 424(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f16, 416(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f15, 408(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lfd f14, 400(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r31, 392(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r30, 384(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r29, 376(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f26, 496(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r28, 368(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r27, 360(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r26, 352(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f25, 488(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r25, 344(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r24, 336(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r23, 328(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f24, 480(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r22, 320(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r21, 312(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lwz r4, 16(r4)
-; LE-P10-PRIV-NEXT:    add r3, r4, r3
-; LE-P10-PRIV-NEXT:    lfd f23, 472(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f22, 464(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r20, 304(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r19, 296(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r18, 288(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r17, 280(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r16, 272(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    ld r15, 264(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    ld r14, 256(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f21, 456(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f20, 448(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f19, 440(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f18, 432(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f17, 424(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f16, 416(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f15, 408(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT:    lfd f14, 400(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    lwz r4, 16(r4)
+; LE-P10-PRIV-NEXT:    add r3, r4, r3
 ; LE-P10-PRIV-NEXT:    clrldi r3, r3, 32
+; LE-P10-PRIV-NEXT:    ld r14, 256(r1) # 8-byte Folded Reload
 ; LE-P10-PRIV-NEXT:    addi r1, r1, 544
 ; LE-P10-PRIV-NEXT:    ld r0, 16(r1)
 ; LE-P10-PRIV-NEXT:    lwz r12, 8(r1)
@@ -1986,30 +1986,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; BE-P10-PRIV-NEXT:    lwz r4, 12(r3)
 ; BE-P10-PRIV-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v20, 144(r1) # 16-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v21, 160(r1) # 16-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v22, 176(r1) # 16-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v23, 192(r1) # 16-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v24, 208(r1) # 16-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v25, 224(r1) # 16-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v26, 240(r1) # 16-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v27, 256(r1) # 16-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v28, 272(r1) # 16-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v29, 288(r1) # 16-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v30, 304(r1) # 16-byte Folded Spill
-; BE-P10-PRIV-NEXT:    stxv v31, 320(r1) # 16-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r28, 448(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r29, 456(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r30, 464(r1) # 8-byte Folded Spill
@@ -2032,6 +2020,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; BE-P10-PRIV-NEXT:    stfd f29, 600(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    stfd f30, 608(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    stfd f31, 616(r1) # 8-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v20, 144(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v21, 160(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v22, 176(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v23, 192(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v24, 208(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v25, 224(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v26, 240(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v27, 256(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v28, 272(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v29, 288(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v30, 304(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT:    stxv v31, 320(r1) # 16-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P10-PRIV-NEXT:    stw r4, 132(r1)
 ; BE-P10-PRIV-NEXT:    #APP
@@ -2058,40 +2058,40 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
 ; BE-P10-PRIV-NEXT:    lfd f29, 600(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    lfd f28, 592(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    lfd f27, 584(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f26, 576(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f25, 568(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f24, 560(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f23, 552(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f22, 544(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f21, 536(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r31, 472(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r30, 464(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r29, 456(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f26, 576(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r28, 448(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r27, 440(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r26, 432(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f25, 568(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r25, 424(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r24, 416(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r23, 408(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f24, 560(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r22, 400(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r21, 392(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lwz r4, 16(r4)
-; BE-P10-PRIV-NEXT:    add r3, r4, r3
-; BE-P10-PRIV-NEXT:    lfd f23, 552(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f22, 544(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r20, 384(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r19, 376(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r18, 368(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r17, 360(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r16, 352(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    ld r15, 344(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    ld r14, 336(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f21, 536(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f20, 528(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f19, 520(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f18, 512(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f17, 504(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f16, 496(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f15, 488(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT:    lfd f14, 480(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT:    lwz r4, 16(r4)
+; BE-P10-PRIV-NEXT:    add r3, r4, r3
 ; BE-P10-PRIV-NEXT:    clrldi r3, r3, 32
+; BE-P10-PRIV-NEXT:    ld r14, 336(r1) # 8-byte Folded Reload
 ; BE-P10-PRIV-NEXT:    addi r1, r1, 624
 ; BE-P10-PRIV-NEXT:    ld r0, 16(r1)
 ; BE-P10-PRIV-NEXT:    lwz r12, 8(r1)
@@ -2416,9 +2416,9 @@ define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 {
 ; LE-P10-NEXT:    addi r1, r1, 64
 ; LE-P10-NEXT:    ld r0, 16(r1)
 ; LE-P10-NEXT:    clrldi r3, r3, 32
-; LE-P10-NEXT:    mtlr r0
 ; LE-P10-NEXT:    hashchk r0, -24(r1)
 ; LE-P10-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; LE-P10-NEXT:    mtlr r0
 ; LE-P10-NEXT:    blr
 ; LE-P10-NEXT:  .LBB2_2:
 ; LE-P10-NEXT:    li r3, 0
@@ -2603,8 +2603,8 @@ define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 {
 ; BE-P10-NEXT:    addi r1, r1, 144
 ; BE-P10-NEXT:    ld r0, 16(r1)
 ; BE-P10-NEXT:    clrldi r3, r3, 32
-; BE-P10-NEXT:    mtlr r0
 ; BE-P10-NEXT:    hashchk r0, -24(r1)
+; BE-P10-NEXT:    mtlr r0
 ; BE-P10-NEXT:    blr
 ; BE-P10-NEXT:  .LBB2_2:
 ; BE-P10-NEXT:    li r3, 0
@@ -2688,9 +2688,9 @@ define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 {
 ; LE-P10-PRIV-NEXT:    addi r1, r1, 64
 ; LE-P10-PRIV-NEXT:    ld r0, 16(r1)
 ; LE-P10-PRIV-NEXT:    clrldi r3, r3, 32
-; LE-P10-PRIV-NEXT:    mtlr r0
 ; LE-P10-PRIV-NEXT:    hashchkp r0, -24(r1)
 ; LE-P10-PRIV-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT:    mtlr r0
 ; LE-P10-PRIV-NEXT:    blr
 ; LE-P10-PRIV-NEXT:  .LBB2_2:
 ; LE-P10-PRIV-NEXT:    li r3, 0
@@ -2776,8 +2776,8 @@ define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 {
 ; BE-P10-PRIV-NEXT:    addi r1, r1, 144
 ; BE-P10-PRIV-NEXT:    ld r0, 16(r1)
 ; BE-P10-PRIV-NEXT:    clrldi r3, r3, 32
-; BE-P10-PRIV-NEXT:    mtlr r0
 ; BE-P10-PRIV-NEXT:    hashchkp r0, -24(r1)
+; BE-P10-PRIV-NEXT:    mtlr r0
 ; BE-P10-PRIV-NEXT:    blr
 ; BE-P10-PRIV-NEXT:  .LBB2_2:
 ; BE-P10-PRIV-NEXT:    li r3, 0
@@ -2870,10 +2870,10 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
 ; LE-P10-NEXT:    lis r12, -1
 ; LE-P10-NEXT:    std r30, -16(r1)
 ; LE-P10-NEXT:    mr r30, r1
-; LE-P10-NEXT:    ori r12, r12, 0
 ; LE-P10-NEXT:    std r0, 16(r1)
 ; LE-P10-NEXT:    hashst r0, -32(r1)
 ; LE-P10-NEXT:    clrldi r0, r1, 49
+; LE-P10-NEXT:    ori r12, r12, 0
 ; LE-P10-NEXT:    subc r0, r12, r0
 ; LE-P10-NEXT:    stdux r1, r1, r0
 ; LE-P10-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
@@ -2890,9 +2890,9 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
 ; LE-P10-NEXT:    lwz r3, 20(r29)
 ; LE-P10-NEXT:    lis r4, 0
 ; LE-P10-NEXT:    ori r4, r4, 65500
+; LE-P10-NEXT:    add r4, r1, r4
 ; LE-P10-NEXT:    stw r3, 32764(r1)
 ; LE-P10-NEXT:    lis r3, 0
-; LE-P10-NEXT:    add r4, r1, r4
 ; LE-P10-NEXT:    ori r3, r3, 32768
 ; LE-P10-NEXT:    add r3, r1, r3
 ; LE-P10-NEXT:    bl callee3 at notoc
@@ -3133,10 +3133,10 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
 ; BE-P10-NEXT:    lis r12, -1
 ; BE-P10-NEXT:    std r30, -16(r1)
 ; BE-P10-NEXT:    mr r30, r1
-; BE-P10-NEXT:    ori r12, r12, 0
 ; BE-P10-NEXT:    std r0, 16(r1)
 ; BE-P10-NEXT:    hashst r0, -32(r1)
 ; BE-P10-NEXT:    clrldi r0, r1, 49
+; BE-P10-NEXT:    ori r12, r12, 0
 ; BE-P10-NEXT:    subc r0, r12, r0
 ; BE-P10-NEXT:    stdux r1, r1, r0
 ; BE-P10-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
@@ -3153,9 +3153,9 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
 ; BE-P10-NEXT:    lwz r3, 20(r29)
 ; BE-P10-NEXT:    lis r4, 0
 ; BE-P10-NEXT:    ori r4, r4, 65500
+; BE-P10-NEXT:    add r4, r1, r4
 ; BE-P10-NEXT:    stw r3, 32764(r1)
 ; BE-P10-NEXT:    lis r3, 0
-; BE-P10-NEXT:    add r4, r1, r4
 ; BE-P10-NEXT:    ori r3, r3, 32768
 ; BE-P10-NEXT:    add r3, r1, r3
 ; BE-P10-NEXT:    bl callee3
@@ -3265,10 +3265,10 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
 ; LE-P10-PRIV-NEXT:    lis r12, -1
 ; LE-P10-PRIV-NEXT:    std r30, -16(r1)
 ; LE-P10-PRIV-NEXT:    mr r30, r1
-; LE-P10-PRIV-NEXT:    ori r12, r12, 0
 ; LE-P10-PRIV-NEXT:    std r0, 16(r1)
 ; LE-P10-PRIV-NEXT:    hashstp r0, -32(r1)
 ; LE-P10-PRIV-NEXT:    clrldi r0, r1, 49
+; LE-P10-PRIV-NEXT:    ori r12, r12, 0
 ; LE-P10-PRIV-NEXT:    subc r0, r12, r0
 ; LE-P10-PRIV-NEXT:    stdux r1, r1, r0
 ; LE-P10-PRIV-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
@@ -3285,9 +3285,9 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
 ; LE-P10-PRIV-NEXT:    lwz r3, 20(r29)
 ; LE-P10-PRIV-NEXT:    lis r4, 0
 ; LE-P10-PRIV-NEXT:    ori r4, r4, 65500
+; LE-P10-PRIV-NEXT:    add r4, r1, r4
 ; LE-P10-PRIV-NEXT:    stw r3, 32764(r1)
 ; LE-P10-PRIV-NEXT:    lis r3, 0
-; LE-P10-PRIV-NEXT:    add r4, r1, r4
 ; LE-P10-PRIV-NEXT:    ori r3, r3, 32768
 ; LE-P10-PRIV-NEXT:    add r3, r1, r3
 ; LE-P10-PRIV-NEXT:    bl callee3 at notoc
@@ -3396,10 +3396,10 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
 ; BE-P10-PRIV-NEXT:    lis r12, -1
 ; BE-P10-PRIV-NEXT:    std r30, -16(r1)
 ; BE-P10-PRIV-NEXT:    mr r30, r1
-; BE-P10-PRIV-NEXT:    ori r12, r12, 0
 ; BE-P10-PRIV-NEXT:    std r0, 16(r1)
 ; BE-P10-PRIV-NEXT:    hashstp r0, -32(r1)
 ; BE-P10-PRIV-NEXT:    clrldi r0, r1, 49
+; BE-P10-PRIV-NEXT:    ori r12, r12, 0
 ; BE-P10-PRIV-NEXT:    subc r0, r12, r0
 ; BE-P10-PRIV-NEXT:    stdux r1, r1, r0
 ; BE-P10-PRIV-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
@@ -3416,9 +3416,9 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
 ; BE-P10-PRIV-NEXT:    lwz r3, 20(r29)
 ; BE-P10-PRIV-NEXT:    lis r4, 0
 ; BE-P10-PRIV-NEXT:    ori r4, r4, 65500
+; BE-P10-PRIV-NEXT:    add r4, r1, r4
 ; BE-P10-PRIV-NEXT:    stw r3, 32764(r1)
 ; BE-P10-PRIV-NEXT:    lis r3, 0
-; BE-P10-PRIV-NEXT:    add r4, r1, r4
 ; BE-P10-PRIV-NEXT:    ori r3, r3, 32768
 ; BE-P10-PRIV-NEXT:    add r3, r1, r3
 ; BE-P10-PRIV-NEXT:    bl callee3

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
index e0a12a2b14b1..173560f830fb 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
@@ -3932,9 +3932,9 @@ entry:
 define dso_local void @st_not_disjoint64_double_uint8_t(i64 %ptr, double %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_double_uint8_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stxsibx f0, 0, r3
@@ -4371,9 +4371,9 @@ entry:
 define dso_local void @st_not_disjoint64_double_int8_t(i64 %ptr, double %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_double_int8_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stxsibx f0, 0, r3
@@ -4814,9 +4814,9 @@ entry:
 define dso_local void @st_not_disjoint64_double_uint16_t(i64 %ptr, double %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_double_uint16_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stxsihx f0, 0, r3
@@ -5257,9 +5257,9 @@ entry:
 define dso_local void @st_not_disjoint64_double_int16_t(i64 %ptr, double %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_double_int16_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stxsihx f0, 0, r3
@@ -5626,9 +5626,9 @@ entry:
 define dso_local void @st_not_disjoint64_double_uint32_t(i64 %ptr, double %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_double_uint32_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stfiwx f0, 0, r3
@@ -5958,9 +5958,9 @@ entry:
 define dso_local void @st_not_disjoint64_double_int32_t(i64 %ptr, double %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_double_int32_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stfiwx f0, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
index b6a5ccb93d03..b2c2109e930c 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
@@ -3954,9 +3954,9 @@ entry:
 define dso_local void @st_not_disjoint64_float_uint8_t(i64 %ptr, float %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_float_uint8_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stxsibx f0, 0, r3
@@ -4393,9 +4393,9 @@ entry:
 define dso_local void @st_not_disjoint64_float_int8_t(i64 %ptr, float %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_float_int8_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stxsibx f0, 0, r3
@@ -4836,9 +4836,9 @@ entry:
 define dso_local void @st_not_disjoint64_float_uint16_t(i64 %ptr, float %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_float_uint16_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stxsihx f0, 0, r3
@@ -5279,9 +5279,9 @@ entry:
 define dso_local void @st_not_disjoint64_float_int16_t(i64 %ptr, float %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_float_int16_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stxsihx f0, 0, r3
@@ -5648,9 +5648,9 @@ entry:
 define dso_local void @st_not_disjoint64_float_uint32_t(i64 %ptr, float %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_float_uint32_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpuxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stfiwx f0, 0, r3
@@ -5980,9 +5980,9 @@ entry:
 define dso_local void @st_not_disjoint64_float_int32_t(i64 %ptr, float %str) {
 ; CHECK-P10-LABEL: st_not_disjoint64_float_int32_t:
 ; CHECK-P10:       # %bb.0: # %entry
-; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    pli r4, 232
 ; CHECK-P10-NEXT:    pli r5, 3567587329
+; CHECK-P10-NEXT:    xscvdpsxws f0, f1
 ; CHECK-P10-NEXT:    rldimi r5, r4, 32, 0
 ; CHECK-P10-NEXT:    or r3, r3, r5
 ; CHECK-P10-NEXT:    stfiwx f0, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
index ce17ad3fea3b..03b7219f7458 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
@@ -6304,8 +6304,8 @@ define dso_local void @st_not_disjoint32_uint16_t_float(i64 %ptr, i16 zeroext %s
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -6340,8 +6340,8 @@ define dso_local void @st_disjoint_align32_uint16_t_float(i64 %ptr, i16 zeroext
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    pstfs f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -6692,8 +6692,8 @@ define dso_local void @st_not_disjoint32_uint16_t_double(i64 %ptr, i16 zeroext %
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -6728,8 +6728,8 @@ define dso_local void @st_disjoint_align32_uint16_t_double(i64 %ptr, i16 zeroext
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    pstfd f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7652,8 +7652,8 @@ define dso_local void @st_not_disjoint32_int16_t_float(i64 %ptr, i16 signext %st
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7688,8 +7688,8 @@ define dso_local void @st_disjoint_align32_int16_t_float(i64 %ptr, i16 signext %
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    pstfs f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -8041,8 +8041,8 @@ define dso_local void @st_not_disjoint32_int16_t_double(i64 %ptr, i16 signext %s
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -8077,8 +8077,8 @@ define dso_local void @st_disjoint_align32_int16_t_double(i64 %ptr, i16 signext
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    pstfd f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
index bbab8a76627f..a13152e8c7ea 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
@@ -6368,8 +6368,8 @@ define dso_local void @st_not_disjoint32_uint32_t_float(i64 %ptr, i32 zeroext %s
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -6404,8 +6404,8 @@ define dso_local void @st_disjoint_align32_uint32_t_float(i64 %ptr, i32 zeroext
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    pstfs f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -6756,8 +6756,8 @@ define dso_local void @st_not_disjoint32_uint32_t_double(i64 %ptr, i32 zeroext %
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -6792,8 +6792,8 @@ define dso_local void @st_disjoint_align32_uint32_t_double(i64 %ptr, i32 zeroext
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    pstfd f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7430,8 +7430,8 @@ define dso_local void @st_not_disjoint32_int32_t_float(i64 %ptr, i32 signext %st
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7466,8 +7466,8 @@ define dso_local void @st_disjoint_align32_int32_t_float(i64 %ptr, i32 signext %
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    pstfs f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7818,8 +7818,8 @@ define dso_local void @st_not_disjoint32_int32_t_double(i64 %ptr, i32 signext %s
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7854,8 +7854,8 @@ define dso_local void @st_disjoint_align32_int32_t_double(i64 %ptr, i32 signext
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    pstfd f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
index 3577d714eef0..a9f0bc31ab18 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
@@ -7006,8 +7006,8 @@ define dso_local void @st_not_disjoint32_uint64_t_float(i64 %ptr, i64 %str) {
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprd f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7042,8 +7042,8 @@ define dso_local void @st_disjoint_align32_uint64_t_float(i64 %ptr, i64 %str) {
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprd f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    pstfs f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7413,8 +7413,8 @@ define dso_local void @st_not_disjoint32_uint64_t_double(i64 %ptr, i64 %str) {
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprd f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7449,8 +7449,8 @@ define dso_local void @st_disjoint_align32_uint64_t_double(i64 %ptr, i64 %str) {
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprd f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    pstfd f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7820,8 +7820,8 @@ define dso_local void @st_not_disjoint32_int64_t_float(i64 %ptr, i64 %str) {
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprd f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7856,8 +7856,8 @@ define dso_local void @st_disjoint_align32_int64_t_float(i64 %ptr, i64 %str) {
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprd f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    pstfs f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -8227,8 +8227,8 @@ define dso_local void @st_not_disjoint32_int64_t_double(i64 %ptr, i64 %str) {
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprd f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -8263,8 +8263,8 @@ define dso_local void @st_disjoint_align32_int64_t_double(i64 %ptr, i64 %str) {
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprd f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    pstfd f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
index 9bbf7f79d4ff..333718e402bf 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
@@ -7484,8 +7484,8 @@ define dso_local void @st_not_disjoint32_uint8_t_float(i64 %ptr, i8 zeroext %str
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7520,8 +7520,8 @@ define dso_local void @st_disjoint_align32_uint8_t_float(i64 %ptr, i8 zeroext %s
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P10-NEXT:    pstfs f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7872,8 +7872,8 @@ define dso_local void @st_not_disjoint32_uint8_t_double(i64 %ptr, i8 zeroext %st
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -7908,8 +7908,8 @@ define dso_local void @st_disjoint_align32_uint8_t_double(i64 %ptr, i8 zeroext %
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwz f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvuxddp f0, f0
 ; CHECK-P10-NEXT:    pstfd f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -9118,8 +9118,8 @@ define dso_local void @st_not_disjoint32_int8_t_float(i64 %ptr, i8 signext %str)
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    stfs f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -9154,8 +9154,8 @@ define dso_local void @st_disjoint_align32_int8_t_float(i64 %ptr, i8 signext %st
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P10-NEXT:    pstfs f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;
@@ -9506,8 +9506,8 @@ define dso_local void @st_not_disjoint32_int8_t_double(i64 %ptr, i8 signext %str
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    ori r3, r3, 34463
-; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    oris r3, r3, 1
+; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    stfd f0, 0(r3)
 ; CHECK-P10-NEXT:    blr
 ;
@@ -9542,8 +9542,8 @@ define dso_local void @st_disjoint_align32_int8_t_double(i64 %ptr, i8 signext %s
 ; CHECK-P10:       # %bb.0: # %entry
 ; CHECK-P10-NEXT:    mtfprwa f0, r4
 ; CHECK-P10-NEXT:    lis r5, -15264
-; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    and r3, r3, r5
+; CHECK-P10-NEXT:    xscvsxddp f0, f0
 ; CHECK-P10-NEXT:    pstfd f0, 999990000(r3), 0
 ; CHECK-P10-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll b/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
index dcede94054cb..562f2dde467d 100644
--- a/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
+++ b/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
@@ -11,34 +11,34 @@ define dso_local void @test(<256 x i1>* %vpp, <256 x i1>* %vp2) local_unnamed_ad
 ; CHECK-NEXT:    stdu r1, -400(r1)
 ; CHECK-NEXT:    stfd f14, 256(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f15, 264(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv v20, 64(r1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv v21, 80(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stfd f16, 272(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f17, 280(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv v22, 96(r1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv v23, 112(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stfd f18, 288(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f19, 296(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv v24, 128(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stfd f20, 304(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f21, 312(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv v25, 144(r1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv v26, 160(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stfd f22, 320(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f23, 328(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv v27, 176(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stfd f24, 336(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f25, 344(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv v28, 192(r1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv v29, 208(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stfd f26, 352(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f27, 360(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv v30, 224(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stfd f28, 368(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f29, 376(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv v31, 240(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stfd f30, 384(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f31, 392(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stxv v20, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v21, 80(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v22, 96(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v23, 112(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v24, 128(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v25, 144(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v26, 160(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v27, 176(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v28, 192(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v29, 208(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v30, 224(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v31, 240(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    lxvp vsp34, 0(r3)
 ; CHECK-NEXT:    stxvp vsp34, 32(r1) # 32-byte Folded Spill
 ; CHECK-NEXT:    #APP
@@ -84,34 +84,34 @@ define dso_local void @test(<256 x i1>* %vpp, <256 x i1>* %vp2) local_unnamed_ad
 ; CHECK-BE-NEXT:    stdu r1, -416(r1)
 ; CHECK-BE-NEXT:    stfd f14, 272(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f15, 280(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v20, 80(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v21, 96(r1) # 16-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f16, 288(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f17, 296(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v22, 112(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v23, 128(r1) # 16-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f18, 304(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f19, 312(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v24, 144(r1) # 16-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f20, 320(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f21, 328(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v25, 160(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v26, 176(r1) # 16-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f22, 336(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f23, 344(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v27, 192(r1) # 16-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f24, 352(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f25, 360(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v28, 208(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v29, 224(r1) # 16-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f26, 368(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f27, 376(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v30, 240(r1) # 16-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f28, 384(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f29, 392(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT:    stxv v31, 256(r1) # 16-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f30, 400(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    stfd f31, 408(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v20, 80(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v21, 96(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v22, 112(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v23, 128(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v24, 144(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v25, 160(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v26, 176(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v27, 192(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v28, 208(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v29, 224(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v30, 240(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT:    stxv v31, 256(r1) # 16-byte Folded Spill
 ; CHECK-BE-NEXT:    lxvp vsp34, 0(r3)
 ; CHECK-BE-NEXT:    stxvp vsp34, 48(r1) # 32-byte Folded Spill
 ; CHECK-BE-NEXT:    #APP

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
index 4ecaf27c8886..b9777df02c61 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
@@ -70,12 +70,11 @@ define dso_local i32 @testCompare1(%struct.tree_common* nocapture readonly %arg1
 ; LE-NEXT:    blr
 ;
 ; CHECK-P10-LE-LABEL: testCompare1:
-; CHECK-P10-LE:         .localentry testCompare1, 1
-; CHECK-P10-LE-NEXT:  # %bb.0: # %entry
+; CHECK-P10-LE:       # %bb.0: # %entry
 ; CHECK-P10-LE-NEXT:    plbz r4, testCompare1 at PCREL(0), 1
 ; CHECK-P10-LE-NEXT:    lbz r3, 0(r3)
-; CHECK-P10-LE-NEXT:    clrlwi r3, r3, 31
 ; CHECK-P10-LE-NEXT:    clrlwi r4, r4, 31
+; CHECK-P10-LE-NEXT:    clrlwi r3, r3, 31
 ; CHECK-P10-LE-NEXT:    cmplw r4, r3
 ; CHECK-P10-LE-NEXT:    setbc r3, gt
 ; CHECK-P10-LE-NEXT:    b fn2 at notoc
@@ -122,14 +121,13 @@ define dso_local i32 @testCompare1(%struct.tree_common* nocapture readonly %arg1
 ; CHECK-P10-CMP-LE-NEXT:    blr
 ;
 ; CHECK-P10-CMP-BE-LABEL: testCompare1:
-; CHECK-P10-CMP-BE:         .localentry testCompare1, 1
-; CHECK-P10-CMP-BE-NEXT:  # %bb.0: # %entry
+; CHECK-P10-CMP-BE:       # %bb.0: # %entry
 ; CHECK-P10-CMP-BE-NEXT:    plbz r4, testCompare1 at PCREL(0), 1
 ; CHECK-P10-CMP-BE-NEXT:    lbz r3, 0(r3)
-; CHECK-P10-CMP-BE-NEXT:    clrlwi r3, r3, 31
 ; CHECK-P10-CMP-BE-NEXT:    clrlwi r4, r4, 31
-; CHECK-P10-CMP-BE-NEXT:    clrldi r3, r3, 32
+; CHECK-P10-CMP-BE-NEXT:    clrlwi r3, r3, 31
 ; CHECK-P10-CMP-BE-NEXT:    clrldi r4, r4, 32
+; CHECK-P10-CMP-BE-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-CMP-BE-NEXT:    sub r3, r3, r4
 ; CHECK-P10-CMP-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-P10-CMP-BE-NEXT:    b fn2 at notoc

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
index 0ecd1a12c9aa..e609a71962e1 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
@@ -72,12 +72,11 @@ define dso_local i32 @testCompare1(%struct.tree_common* nocapture readonly %arg1
 ; LE-NEXT:    blr
 ;
 ; CHECK-P10-LE-LABEL: testCompare1:
-; CHECK-P10-LE:         .localentry testCompare1, 1
-; CHECK-P10-LE-NEXT:  # %bb.0: # %entry
+; CHECK-P10-LE:       # %bb.0: # %entry
 ; CHECK-P10-LE-NEXT:    plbz r4, testCompare1 at PCREL(0), 1
 ; CHECK-P10-LE-NEXT:    lbz r3, 0(r3)
-; CHECK-P10-LE-NEXT:    clrlwi r3, r3, 31
 ; CHECK-P10-LE-NEXT:    clrlwi r4, r4, 31
+; CHECK-P10-LE-NEXT:    clrlwi r3, r3, 31
 ; CHECK-P10-LE-NEXT:    cmplw r4, r3
 ; CHECK-P10-LE-NEXT:    setbc r3, lt
 ; CHECK-P10-LE-NEXT:    b fn2 at notoc
@@ -124,14 +123,13 @@ define dso_local i32 @testCompare1(%struct.tree_common* nocapture readonly %arg1
 ; CHECK-P10-CMP-LE-NEXT:    blr
 ;
 ; CHECK-P10-CMP-BE-LABEL: testCompare1:
-; CHECK-P10-CMP-BE:         .localentry testCompare1, 1
-; CHECK-P10-CMP-BE-NEXT:  # %bb.0: # %entry
+; CHECK-P10-CMP-BE:       # %bb.0: # %entry
 ; CHECK-P10-CMP-BE-NEXT:    plbz r4, testCompare1 at PCREL(0), 1
 ; CHECK-P10-CMP-BE-NEXT:    lbz r3, 0(r3)
-; CHECK-P10-CMP-BE-NEXT:    clrlwi r3, r3, 31
 ; CHECK-P10-CMP-BE-NEXT:    clrlwi r4, r4, 31
-; CHECK-P10-CMP-BE-NEXT:    clrldi r3, r3, 32
+; CHECK-P10-CMP-BE-NEXT:    clrlwi r3, r3, 31
 ; CHECK-P10-CMP-BE-NEXT:    clrldi r4, r4, 32
+; CHECK-P10-CMP-BE-NEXT:    clrldi r3, r3, 32
 ; CHECK-P10-CMP-BE-NEXT:    sub r3, r4, r3
 ; CHECK-P10-CMP-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-P10-CMP-BE-NEXT:    b fn2 at notoc


        


More information about the llvm-commits mailing list