[llvm] 67c64d8 - [PowerPC] Implement scheduling model for Power10
Qiu Chaofan via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 18 00:32:55 PDT 2021
Author: Qiu Chaofan
Date: 2021-10-18T15:27:49+08:00
New Revision: 67c64d83378e7e84e30801420ebba453987e2546
URL: https://github.com/llvm/llvm-project/commit/67c64d83378e7e84e30801420ebba453987e2546
DIFF: https://github.com/llvm/llvm-project/commit/67c64d83378e7e84e30801420ebba453987e2546.diff
LOG: [PowerPC] Implement scheduling model for Power10
Reviewed By: jsji
Differential Revision: https://reviews.llvm.org/D110855
Added:
llvm/lib/Target/PowerPC/P10InstrResources.td
llvm/lib/Target/PowerPC/PPCSchedPredicates.td
llvm/lib/Target/PowerPC/PPCScheduleP10.td
Modified:
llvm/lib/Target/PowerPC/PPC.td
llvm/lib/Target/PowerPC/PPCSchedule.td
llvm/lib/Target/PowerPC/PPCScheduleP9.td
llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
llvm/test/CodeGen/PowerPC/constant-pool.ll
llvm/test/CodeGen/PowerPC/int128_ldst.ll
llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
llvm/test/CodeGen/PowerPC/mma-outer-product.ll
llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
llvm/test/CodeGen/PowerPC/p10-fi-elim.ll
llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
new file mode 100644
index 000000000000..5c040f31db02
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -0,0 +1,2075 @@
+//===--- P10InstrResources.td - P10 Scheduling Definitions -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines the itinerary class data for the POWER10 processor.
+//
+//===----------------------------------------------------------------------===//
+// 22 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_22C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ FDIVS,
+ XSDIVSP
+)>;
+
+// 2-way crack instructions
+// 22 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_22C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FDIVS_rec
+)>;
+
+// 24 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_24C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ XVDIVSP
+)>;
+
+// 26 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_26C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ FSQRTS,
+ XSSQRTSP
+)>;
+
+// 2-way crack instructions
+// 26 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_26C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FSQRTS_rec
+)>;
+
+// 27 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ XVSQRTSP
+)>;
+
+// 27 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ FDIV,
+ XSDIVDP,
+ XVDIVDP
+)>;
+
+// 2-way crack instructions
+// 27 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FDIV_rec
+)>;
+
+// 36 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_36C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ FSQRT,
+ XSSQRTDP,
+ XVSQRTDP
+)>;
+
+// 2-way crack instructions
+// 36 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_36C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FSQRT_rec
+)>;
+
+// 7 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ FCFID,
+ FCFIDS,
+ FCFIDU,
+ FCFIDUS,
+ FCTID,
+ FCTIDU,
+ FCTIDUZ,
+ FCTIDZ,
+ FCTIW,
+ FCTIWU,
+ FCTIWUZ,
+ FCTIWZ,
+ FRE,
+ FRES,
+ FRIMD, FRIMS,
+ FRIND, FRINS,
+ FRIPD, FRIPS,
+ FRIZD, FRIZS,
+ FRSP,
+ FRSQRTE,
+ FRSQRTES,
+ VCFSX, VCFSX_0,
+ VCFUX, VCFUX_0,
+ VCTSXS, VCTSXS_0,
+ VCTUXS, VCTUXS_0,
+ VLOGEFP,
+ VREFP,
+ VRFIM,
+ VRFIN,
+ VRFIP,
+ VRFIZ,
+ VRSQRTEFP,
+ XSCVDPHP,
+ XSCVDPSP,
+ XSCVDPSPN,
+ XSCVDPSXDS, XSCVDPSXDSs,
+ XSCVDPSXWS, XSCVDPSXWSs,
+ XSCVDPUXDS, XSCVDPUXDSs,
+ XSCVDPUXWS, XSCVDPUXWSs,
+ XSCVSPDP,
+ XSCVSXDDP,
+ XSCVSXDSP,
+ XSCVUXDDP,
+ XSCVUXDSP,
+ XSRDPI,
+ XSRDPIC,
+ XSRDPIM,
+ XSRDPIP,
+ XSRDPIZ,
+ XSREDP,
+ XSRESP,
+ XSRSP,
+ XSRSQRTEDP,
+ XSRSQRTESP,
+ XVCVDPSP,
+ XVCVDPSXDS,
+ XVCVDPSXWS,
+ XVCVDPUXDS,
+ XVCVDPUXWS,
+ XVCVSPBF16,
+ XVCVSPDP,
+ XVCVSPHP,
+ XVCVSPSXDS,
+ XVCVSPSXWS,
+ XVCVSPUXDS,
+ XVCVSPUXWS,
+ XVCVSXDDP,
+ XVCVSXDSP,
+ XVCVSXWDP,
+ XVCVSXWSP,
+ XVCVUXDDP,
+ XVCVUXDSP,
+ XVCVUXWDP,
+ XVCVUXWSP,
+ XVRDPI,
+ XVRDPIC,
+ XVRDPIM,
+ XVRDPIP,
+ XVRDPIZ,
+ XVREDP,
+ XVRESP,
+ XVRSPI,
+ XVRSPIC,
+ XVRSPIM,
+ XVRSPIP,
+ XVRSPIZ,
+ XVRSQRTEDP,
+ XVRSQRTESP
+)>;
+
+// 7 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ FADD,
+ FADDS,
+ FMUL,
+ FMULS,
+ FSUB,
+ FSUBS,
+ VADDFP,
+ VSUBFP,
+ XSADDDP,
+ XSADDSP,
+ XSMULDP,
+ XSMULSP,
+ XSSUBDP,
+ XSSUBSP,
+ XVADDDP,
+ XVADDSP,
+ XVMULDP,
+ XVMULSP,
+ XVSUBDP,
+ XVSUBSP
+)>;
+
+// 7 Cycles Binary Floating Point operations, 3 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read, P10BF_Read, P10BF_Read],
+ (instrs
+ FMADD,
+ FMADDS,
+ FMSUB,
+ FMSUBS,
+ FNMADD,
+ FNMADDS,
+ FNMSUB,
+ FNMSUBS,
+ FSELD, FSELS,
+ VMADDFP,
+ VNMSUBFP,
+ XSMADDADP,
+ XSMADDASP,
+ XSMADDMDP,
+ XSMADDMSP,
+ XSMSUBADP,
+ XSMSUBASP,
+ XSMSUBMDP,
+ XSMSUBMSP,
+ XSNMADDADP,
+ XSNMADDASP,
+ XSNMADDMDP,
+ XSNMADDMSP,
+ XSNMSUBADP,
+ XSNMSUBASP,
+ XSNMSUBMDP,
+ XSNMSUBMSP,
+ XVMADDADP,
+ XVMADDASP,
+ XVMADDMDP,
+ XVMADDMSP,
+ XVMSUBADP,
+ XVMSUBASP,
+ XVMSUBMDP,
+ XVMSUBMSP,
+ XVNMADDADP,
+ XVNMADDASP,
+ XVNMADDMDP,
+ XVNMADDMSP,
+ XVNMSUBADP,
+ XVNMSUBASP,
+ XVNMSUBMDP,
+ XVNMSUBMSP
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 7 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_BF_7C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ VEXPTEFP
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FADD_rec,
+ FADDS_rec,
+ FMUL_rec,
+ FMULS_rec,
+ FSUB_rec,
+ FSUBS_rec
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FCFID_rec,
+ FCFIDS_rec,
+ FCFIDU_rec,
+ FCFIDUS_rec,
+ FCTID_rec,
+ FCTIDU_rec,
+ FCTIDUZ_rec,
+ FCTIDZ_rec,
+ FCTIW_rec,
+ FCTIWU_rec,
+ FCTIWUZ_rec,
+ FCTIWZ_rec,
+ FRE_rec,
+ FRES_rec,
+ FRIMD_rec, FRIMS_rec,
+ FRIND_rec, FRINS_rec,
+ FRIPD_rec, FRIPS_rec,
+ FRIZD_rec, FRIZS_rec,
+ FRSP_rec,
+ FRSQRTE_rec,
+ FRSQRTES_rec
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FMADD_rec,
+ FMADDS_rec,
+ FMSUB_rec,
+ FMSUBS_rec,
+ FNMADD_rec,
+ FNMADDS_rec,
+ FNMSUB_rec,
+ FNMSUBS_rec,
+ FSELD_rec, FSELS_rec
+)>;
+
+// 2 Cycles Branch operations, 0 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY],
+ (instrs
+ BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
+ BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
+ BL, BL8, BL8_NOP, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_TLS, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_TLS
+)>;
+
+// 2 Cycles Branch operations, 1 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
+ (instrs
+ B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8,
+ BA, TAILBA, TAILBA8,
+ BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL_LWZinto_toc, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
+ BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
+ BLA, BLA8, BLA8_NOP
+)>;
+
+// 2 Cycles Branch operations, 3 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read],
+ (instrs
+ BCCTR, BCCTR8, BCCTR8n, BCCTRn, gBCCTR,
+ BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL
+)>;
+
+// 2 Cycles Branch operations, 4 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read, P10BR_Read],
+ (instrs
+ BDNZA, BDNZAm, BDNZAp, BDZA, BDZAm, BDZAp, gBCA, gBCAat,
+ BDNZLA, BDNZLAm, BDNZLAp, BDZLA, BDZLAm, BDZLAp, gBCLA, gBCLAat
+)>;
+
+// 7 Cycles Crypto operations, 1 input operands
+def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read],
+ (instrs
+ VSBOX
+)>;
+
+// 7 Cycles Crypto operations, 2 input operands
+def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read, P10CY_Read],
+ (instrs
+ CFUGED,
+ CNTLZDM,
+ CNTTZDM,
+ PDEPD,
+ PEXTD,
+ VCFUGED,
+ VCIPHER,
+ VCIPHERLAST,
+ VCLZDM,
+ VCTZDM,
+ VGNB,
+ VNCIPHER,
+ VNCIPHERLAST,
+ VPDEPD,
+ VPEXTD,
+ VPMSUMB,
+ VPMSUMD,
+ VPMSUMH,
+ VPMSUMW
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read],
+ (instrs
+ XSCVDPQP,
+ XSCVQPDP,
+ XSCVQPDPO,
+ XSCVQPSDZ,
+ XSCVQPSQZ,
+ XSCVQPSWZ,
+ XSCVQPUDZ,
+ XSCVQPUQZ,
+ XSCVQPUWZ,
+ XSCVSDQP,
+ XSCVSQQP,
+ XSCVUDQP,
+ XSCVUQQP
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ XSADDQP,
+ XSADDQPO,
+ XSSUBQP,
+ XSSUBQPO
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 3 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
+ (instrs
+ BCDSR_rec,
+ XSRQPI,
+ XSRQPIX,
+ XSRQPXP
+)>;
+
+// 2-way crack instructions
+// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY],
+ (instrs
+ HASHST,
+ HASHSTP
+)>;
+
+// 24 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_24C, P10W_DISP_ANY, P10DF_Read],
+ (instrs
+ BCDCTSQ_rec
+)>;
+
+// 25 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ XSMULQP,
+ XSMULQPO
+)>;
+
+// 25 Cycles Decimal Floating Point operations, 3 input operands
+def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
+ (instrs
+ XSMADDQP,
+ XSMADDQPO,
+ XSMSUBQP,
+ XSMSUBQPO,
+ XSNMADDQP,
+ XSNMADDQPO,
+ XSNMSUBQP,
+ XSNMSUBQPO
+)>;
+
+// 38 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ BCDCFSQ_rec
+)>;
+
+// 59 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_59C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ XSDIVQP,
+ XSDIVQPO
+)>;
+
+// 61 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_61C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ VDIVESQ,
+ VDIVEUQ,
+ VDIVSQ,
+ VDIVUQ
+)>;
+
+// 68 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_68C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ VMODSQ,
+ VMODUQ
+)>;
+
+// 77 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_77C, P10W_DISP_ANY, P10DF_Read],
+ (instrs
+ XSSQRTQP,
+ XSSQRTQPO
+)>;
+
+// 20 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_20C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ DIVW,
+ DIVWO,
+ DIVWU,
+ DIVWUO,
+ MODSW
+)>;
+
+// 2-way crack instructions
+// 20 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_20C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ DIVW_rec,
+ DIVWO_rec,
+ DIVWU_rec,
+ DIVWUO_rec
+)>;
+
+// 25 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_25C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ DIVD,
+ DIVDO,
+ DIVDU,
+ DIVDUO,
+ DIVWE,
+ DIVWEO,
+ DIVWEU,
+ DIVWEUO
+)>;
+
+// 2-way crack instructions
+// 25 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_25C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ DIVD_rec,
+ DIVDO_rec,
+ DIVDU_rec,
+ DIVDUO_rec,
+ DIVWE_rec,
+ DIVWEO_rec,
+ DIVWEU_rec,
+ DIVWEUO_rec
+)>;
+
+// 27 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_27C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ MODSD,
+ MODUD,
+ MODUW
+)>;
+
+// 41 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_41C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ DIVDE,
+ DIVDEO,
+ DIVDEU,
+ DIVDEUO
+)>;
+
+// 2-way crack instructions
+// 41 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_41C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ DIVDE_rec,
+ DIVDEO_rec,
+ DIVDEU_rec,
+ DIVDEUO_rec
+)>;
+
+// 43 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_43C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVSD,
+ VDIVUD
+)>;
+
+// 47 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_47C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VMODSD,
+ VMODUD
+)>;
+
+// 54 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_54C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVSW,
+ VDIVUW
+)>;
+
+// 60 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_60C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VMODSW,
+ VMODUW
+)>;
+
+// 75 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_75C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVESD,
+ VDIVEUD
+)>;
+
+// 83 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_83C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVESW,
+ VDIVEUW
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 1 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read],
+ (instrs
+ BCDCTN_rec,
+ VMUL10CUQ,
+ VMUL10UQ,
+ XSXSIGQP
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 2 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
+ (instrs
+ BCDCFN_rec,
+ BCDCFZ_rec,
+ BCDCPSGN_rec,
+ BCDCTZ_rec,
+ BCDSETSGN_rec,
+ BCDUS_rec,
+ BCDUTRUNC_rec,
+ VADDCUQ,
+ VADDUQM,
+ VMUL10ECUQ,
+ VMUL10EUQ,
+ VSUBCUQ,
+ VSUBUQM,
+ XSCMPEXPQP,
+ XSCMPOQP,
+ XSCMPUQP,
+ XSTSTDCQP,
+ XXGENPCVBM
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 3 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
+ (instrs
+ BCDS_rec,
+ BCDTRUNC_rec,
+ VADDECUQ,
+ VADDEUQM,
+ VSUBECUQ,
+ VSUBEUQM
+)>;
+
+// 4 Cycles ALU2 operations, 0 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY],
+ (instrs
+ TRAP, TW
+)>;
+
+// 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
+ (instrs
+ CNTLZD,
+ CNTLZD_rec,
+ CNTLZW, CNTLZW8,
+ CNTLZW8_rec, CNTLZW_rec,
+ CNTTZD,
+ CNTTZD_rec,
+ CNTTZW, CNTTZW8,
+ CNTTZW8_rec, CNTTZW_rec,
+ FTSQRT,
+ MTVSRBM,
+ MTVSRBMI,
+ MTVSRDM,
+ MTVSRHM,
+ MTVSRQM,
+ MTVSRWM,
+ POPCNTB, POPCNTB8,
+ POPCNTD,
+ POPCNTW,
+ VCLZB,
+ VCLZD,
+ VCLZH,
+ VCLZW,
+ VCTZB,
+ VCTZD,
+ VCTZH,
+ VCTZW,
+ VEXPANDBM,
+ VEXPANDDM,
+ VEXPANDHM,
+ VEXPANDQM,
+ VEXPANDWM,
+ VEXTRACTBM,
+ VEXTRACTDM,
+ VEXTRACTHM,
+ VEXTRACTQM,
+ VEXTRACTWM,
+ VPOPCNTB,
+ VPOPCNTD,
+ VPOPCNTH,
+ VPOPCNTW,
+ VPRTYBD,
+ VPRTYBW,
+ XSCVHPDP,
+ XSCVSPDPN,
+ XSTSQRTDP,
+ XVCVHPSP,
+ XVTLSBB,
+ XVTSQRTDP,
+ XVTSQRTSP
+)>;
+
+// 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
+ (instrs
+ CMPEQB,
+ EXTSWSLI_32_64_rec, EXTSWSLI_rec,
+ FCMPOD, FCMPOS,
+ FCMPUD, FCMPUS,
+ FTDIV,
+ SLD_rec,
+ SLW8_rec, SLW_rec,
+ SRD_rec,
+ SRW8_rec, SRW_rec,
+ VABSDUB,
+ VABSDUH,
+ VABSDUW,
+ VADDCUW,
+ VADDSBS,
+ VADDSHS,
+ VADDSWS,
+ VADDUBS,
+ VADDUHS,
+ VADDUWS,
+ VAVGSB,
+ VAVGSH,
+ VAVGSW,
+ VAVGUB,
+ VAVGUH,
+ VAVGUW,
+ VCMPBFP,
+ VCMPBFP_rec,
+ VCMPEQFP,
+ VCMPEQFP_rec,
+ VCMPEQUB_rec,
+ VCMPEQUD_rec,
+ VCMPEQUH_rec,
+ VCMPEQUQ,
+ VCMPEQUQ_rec,
+ VCMPEQUW_rec,
+ VCMPGEFP,
+ VCMPGEFP_rec,
+ VCMPGTFP,
+ VCMPGTFP_rec,
+ VCMPGTSB_rec,
+ VCMPGTSD_rec,
+ VCMPGTSH_rec,
+ VCMPGTSQ,
+ VCMPGTSQ_rec,
+ VCMPGTSW_rec,
+ VCMPGTUB_rec,
+ VCMPGTUD_rec,
+ VCMPGTUH_rec,
+ VCMPGTUQ,
+ VCMPGTUQ_rec,
+ VCMPGTUW_rec,
+ VCMPNEB_rec,
+ VCMPNEH_rec,
+ VCMPNEW_rec,
+ VCMPNEZB_rec,
+ VCMPNEZH_rec,
+ VCMPNEZW_rec,
+ VCMPSQ,
+ VCMPUQ,
+ VCNTMBB,
+ VCNTMBD,
+ VCNTMBH,
+ VCNTMBW,
+ VMAXFP,
+ VMINFP,
+ VSUBCUW,
+ VSUBSBS,
+ VSUBSHS,
+ VSUBSWS,
+ VSUBUBS,
+ VSUBUHS,
+ VSUBUWS,
+ XSCMPEQDP,
+ XSCMPEXPDP,
+ XSCMPGEDP,
+ XSCMPGTDP,
+ XSCMPODP,
+ XSCMPUDP,
+ XSMAXCDP,
+ XSMAXDP,
+ XSMAXJDP,
+ XSMINCDP,
+ XSMINDP,
+ XSMINJDP,
+ XSTDIVDP,
+ XSTSTDCDP,
+ XSTSTDCSP,
+ XVCMPEQDP,
+ XVCMPEQDP_rec,
+ XVCMPEQSP,
+ XVCMPEQSP_rec,
+ XVCMPGEDP,
+ XVCMPGEDP_rec,
+ XVCMPGESP,
+ XVCMPGESP_rec,
+ XVCMPGTDP,
+ XVCMPGTDP_rec,
+ XVCMPGTSP,
+ XVCMPGTSP_rec,
+ XVMAXDP,
+ XVMAXSP,
+ XVMINDP,
+ XVMINSP,
+ XVTDIVDP,
+ XVTDIVSP,
+ XVTSTDCDP,
+ XVTSTDCSP
+)>;
+
+// 4 Cycles ALU2 operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ CMPRB, CMPRB8,
+ RLDCL_rec,
+ RLDCR_rec,
+ RLDIC_rec,
+ RLDICL_32_rec, RLDICL_rec,
+ RLDICR_rec,
+ TD,
+ TDI,
+ TWI,
+ VSHASIGMAD,
+ VSHASIGMAW
+)>;
+
+// 4 Cycles ALU2 operations, 4 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ RLDIMI_rec,
+ RLWINM8_rec, RLWINM_rec,
+ RLWNM8_rec, RLWNM_rec
+)>;
+
+// 4 Cycles ALU2 operations, 5 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ RLWIMI8_rec, RLWIMI_rec
+)>;
+
+// Single crack instructions
+// 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
+ (instrs
+ SRAD_rec,
+ SRADI_rec,
+ SRAW_rec,
+ SRAWI_rec
+)>;
+
+// Single crack instructions
+// 4 Cycles ALU2 operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ TABORTDC,
+ TABORTDCI,
+ TABORTWC,
+ TABORTWCI
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles Permute operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VRLQ,
+ VRLQNM,
+ VSLQ,
+ VSRAQ,
+ VSRQ
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VRLQMI
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles ALU2 operations, 0 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_PAIR, P10W_F2_4C],
+ (instrs
+ MFCR, MFCR8
+)>;
+
+// 2 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_2C, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ MTCTR, MTCTR8, MTCTR8loop, MTCTRloop,
+ MTLR, MTLR8
+)>;
+
+// 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ CR6SET, CREQV, CRSET,
+ DSS, DSSALL,
+ MCRXRX,
+ MFCTR, MFCTR8,
+ MFLR, MFLR8,
+ NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8,
+ VXOR, V_SET0, V_SET0B, V_SET0H,
+ XXLEQV, XXLEQVOnes,
+ XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz
+)>;
+
+// 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
+ ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
+ ADDME, ADDME8,
+ ADDME8O, ADDMEO,
+ ADDZE, ADDZE8,
+ ADDZE8O, ADDZEO,
+ EXTSB, EXTSB8, EXTSB8_32_64,
+ EXTSB8_rec, EXTSB_rec,
+ EXTSH, EXTSH8, EXTSH8_32_64,
+ EXTSH8_rec, EXTSH_rec,
+ EXTSW, EXTSW_32, EXTSW_32_64,
+ EXTSW_32_64_rec, EXTSW_rec,
+ FABSD, FABSS,
+ FMR,
+ FNABSD, FNABSS,
+ FNEGD, FNEGS,
+ MCRF,
+ MFOCRF, MFOCRF8,
+ MFVRD, MFVSRD,
+ MFVRWZ, MFVSRWZ,
+ MTOCRF, MTOCRF8,
+ MTVRD, MTVSRD,
+ MTVRWA, MTVSRWA,
+ MTVRWZ, MTVSRWZ,
+ NEG, NEG8,
+ NEG8_rec, NEG_rec,
+ NEG8O, NEGO,
+ SETB, SETB8,
+ SETBC, SETBC8,
+ SETBCR, SETBCR8,
+ SETNBC, SETNBC8,
+ SETNBCR, SETNBCR8,
+ SUBFME, SUBFME8,
+ SUBFME8O, SUBFMEO,
+ SUBFZE, SUBFZE8,
+ SUBFZE8O, SUBFZEO,
+ VEXTSB2D, VEXTSB2Ds,
+ VEXTSB2W, VEXTSB2Ws,
+ VEXTSD2Q,
+ VEXTSH2D, VEXTSH2Ds,
+ VEXTSH2W, VEXTSH2Ws,
+ VEXTSW2D, VEXTSW2Ds,
+ VNEGD,
+ VNEGW,
+ WAIT,
+ XSABSDP,
+ XSABSQP,
+ XSNABSDP,
+ XSNABSQP,
+ XSNEGDP,
+ XSNEGQP,
+ XSXEXPDP,
+ XSXEXPQP,
+ XSXSIGDP,
+ XVABSDP,
+ XVABSSP,
+ XVNABSDP,
+ XVNABSSP,
+ XVNEGDP,
+ XVNEGSP,
+ XVXEXPDP,
+ XVXEXPSP,
+ XVXSIGDP,
+ XVXSIGSP
+)>;
+
+// 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADD4, ADD4TLS, ADD8, ADD8TLS, ADD8TLS_,
+ ADD4_rec, ADD8_rec,
+ ADDE, ADDE8,
+ ADDE8O, ADDEO,
+ ADDIC, ADDIC8,
+ ADD4O, ADD8O,
+ AND, AND8,
+ AND8_rec, AND_rec,
+ ANDC, ANDC8,
+ ANDC8_rec, ANDC_rec,
+ ANDI8_rec, ANDI_rec,
+ ANDIS8_rec, ANDIS_rec,
+ CMPD, CMPW,
+ CMPB, CMPB8,
+ CMPDI, CMPWI,
+ CMPLD, CMPLW,
+ CMPLDI, CMPLWI,
+ CRAND,
+ CRANDC,
+ CRNAND,
+ CRNOR,
+ CROR,
+ CRORC,
+ CR6UNSET, CRUNSET, CRXOR,
+ EQV, EQV8,
+ EQV8_rec, EQV_rec,
+ EXTSWSLI, EXTSWSLI_32_64,
+ FCPSGND, FCPSGNS,
+ NAND, NAND8,
+ NAND8_rec, NAND_rec,
+ NOR, NOR8,
+ NOR8_rec, NOR_rec,
+ COPY, OR, OR8,
+ OR8_rec, OR_rec,
+ ORC, ORC8,
+ ORC8_rec, ORC_rec,
+ ORIS, ORIS8,
+ SLD,
+ SLW, SLW8,
+ SRAD,
+ SRADI, SRADI_32,
+ SRAW,
+ SRAWI,
+ SRD,
+ SRW, SRW8,
+ SUBF, SUBF8,
+ SUBF8_rec, SUBF_rec,
+ SUBFE, SUBFE8,
+ SUBFE8O, SUBFEO,
+ SUBFIC, SUBFIC8,
+ SUBF8O, SUBFO,
+ VADDUBM,
+ VADDUDM,
+ VADDUHM,
+ VADDUWM,
+ VAND,
+ VANDC,
+ VCMPEQUB,
+ VCMPEQUD,
+ VCMPEQUH,
+ VCMPEQUW,
+ VCMPGTSB,
+ VCMPGTSD,
+ VCMPGTSH,
+ VCMPGTSW,
+ VCMPGTUB,
+ VCMPGTUD,
+ VCMPGTUH,
+ VCMPGTUW,
+ VCMPNEB,
+ VCMPNEH,
+ VCMPNEW,
+ VCMPNEZB,
+ VCMPNEZH,
+ VCMPNEZW,
+ VEQV,
+ VMAXSB,
+ VMAXSD,
+ VMAXSH,
+ VMAXSW,
+ VMAXUB,
+ VMAXUD,
+ VMAXUH,
+ VMAXUW,
+ VMINSB,
+ VMINSD,
+ VMINSH,
+ VMINSW,
+ VMINUB,
+ VMINUD,
+ VMINUH,
+ VMINUW,
+ VMRGEW,
+ VMRGOW,
+ VNAND,
+ VNOR,
+ VOR,
+ VORC,
+ VRLB,
+ VRLD,
+ VRLDNM,
+ VRLH,
+ VRLW,
+ VRLWNM,
+ VSLB,
+ VSLD,
+ VSLH,
+ VSLW,
+ VSRAB,
+ VSRAD,
+ VSRAH,
+ VSRAW,
+ VSRB,
+ VSRD,
+ VSRH,
+ VSRW,
+ VSUBUBM,
+ VSUBUDM,
+ VSUBUHM,
+ VSUBUWM,
+ XOR, XOR8,
+ XOR8_rec, XOR_rec,
+ XORI, XORI8,
+ XORIS, XORIS8,
+ XSCPSGNDP,
+ XSCPSGNQP,
+ XSIEXPDP,
+ XSIEXPQP,
+ XVCPSGNDP,
+ XVCPSGNSP,
+ XVIEXPDP,
+ XVIEXPSP,
+ XXLAND,
+ XXLANDC,
+ XXLNAND,
+ XXLNOR,
+ XXLOR, XXLORf,
+ XXLORC
+)>;
+
+// 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDEX, ADDEX8,
+ DST, DST64, DSTT, DSTT64,
+ DSTST, DSTST64, DSTSTT, DSTSTT64,
+ ISEL, ISEL8,
+ RLDCL,
+ RLDCR,
+ RLDIC,
+ RLDICL, RLDICL_32, RLDICL_32_64,
+ RLDICR, RLDICR_32,
+ VRLDMI,
+ VRLWMI,
+ VSEL,
+ XXSEL
+)>;
+
+// 3 Cycles ALU operations, 4 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ RLDIMI,
+ RLWINM, RLWINM8,
+ RLWNM, RLWNM8
+)>;
+
+// 3 Cycles ALU operations, 5 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ RLWIMI, RLWIMI8
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ MFFS,
+ MFFS_rec,
+ MFFSL,
+ MFVSCR,
+ TRECHKPT
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ ADDME8_rec, ADDME_rec,
+ ADDME8O_rec, ADDMEO_rec,
+ ADDZE8_rec, ADDZE_rec,
+ ADDZE8O_rec, ADDZEO_rec,
+ MCRFS,
+ MFFSCDRN,
+ MFFSCDRNI,
+ MFFSCRN,
+ MFFSCRNI,
+ MTFSB0,
+ MTVSCR,
+ NEG8O_rec, NEGO_rec,
+ SUBFME8_rec, SUBFME_rec,
+ SUBFME8O_rec, SUBFMEO_rec,
+ SUBFZE8_rec, SUBFZE_rec,
+ SUBFZE8O_rec, SUBFZEO_rec,
+ TABORT,
+ TBEGIN,
+ TRECLAIM,
+ TSR
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDE8_rec, ADDE_rec,
+ ADDE8O_rec, ADDEO_rec,
+ ADDIC_rec,
+ ADD4O_rec, ADD8O_rec,
+ SUBFE8_rec, SUBFE_rec,
+ SUBFE8O_rec, SUBFEO_rec,
+ SUBF8O_rec, SUBFO_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ HRFID,
+ MFFSCE,
+ RFID,
+ STOP
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ FABSD_rec, FABSS_rec,
+ FMR_rec,
+ FNABSD_rec, FNABSS_rec,
+ FNEGD_rec, FNEGS_rec,
+ MTFSB1,
+ RFEBB,
+ SC
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDC, ADDC8,
+ ADDC8_rec, ADDC_rec,
+ ADDC8O, ADDCO,
+ FCPSGND_rec, FCPSGNS_rec,
+ MTFSF, MTFSFb,
+ MTFSFI, MTFSFIb,
+ SUBFC, SUBFC8,
+ SUBFC8_rec, SUBFC_rec,
+ SUBFC8O, SUBFCO
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ MTFSFI_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 4 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ MTFSF_rec
+)>;
+
+// 4-way crack instructions
+// 3 Cycles ALU operations, 3 Cycles ALU operations, 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDC8O_rec, ADDCO_rec,
+ SUBFC8O_rec, SUBFCO_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VSTRIBL_rec,
+ VSTRIBR_rec,
+ VSTRIHL_rec,
+ VSTRIHR_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read, P10FX_Read],
+ (instrs
+ MTCRF, MTCRF8
+)>;
+
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+ (instrs
+ LBZ, LBZ8,
+ LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD,
+ LDBRX,
+ DFLOADf32, DFLOADf64, LFD,
+ LFDX, XFLOADf32, XFLOADf64,
+ LFIWAX, LIWAX,
+ LFIWZX, LIWZX,
+ LHA, LHA8,
+ LHAX, LHAX8,
+ LHBRX, LHBRX8,
+ LHZ, LHZ8,
+ LVEBX,
+ LVEHX,
+ LVEWX,
+ LVX,
+ LVXL,
+ LWA, LWA_32,
+ LWAX, LWAX_32,
+ LWBRX, LWBRX8,
+ LWZ, LWZ8, LWZtoc, LWZtocL,
+ LXSD,
+ LXSDX,
+ LXSIBZX,
+ LXSIHZX,
+ LXSIWAX,
+ LXSIWZX,
+ LXV,
+ LXVB16X,
+ LXVD2X,
+ LXVDSX,
+ LXVH8X,
+ LXVRBX,
+ LXVRDX,
+ LXVRHX,
+ LXVRWX,
+ LXVW4X,
+ LXVWSX,
+ LXVX
+)>;
+
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ DCBT,
+ DCBTST,
+ ICBT,
+ LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
+ LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
+ LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
+ LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
+ LXVL,
+ LXVLL
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
+ (instrs
+ HASHCHK,
+ HASHCHKP
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ SLBIA
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read],
+ (instrs
+ DARN,
+ LBARX, LBARXL,
+ LDARX, LDARXL,
+ LHARX, LHARXL,
+ LWARX, LWARXL,
+ SLBFEE_rec,
+ SLBIE,
+ SLBMFEE,
+ SLBMFEV
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ LBZCIX,
+ LDCIX,
+ LHZCIX,
+ LWZCIX,
+ MTSPR, MTSPR8, MTSR, MTVRSAVE, MTVRSAVEv
+)>;
+
+// Expand instructions
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+ (instrs
+ LMW
+)>;
+
+// Expand instructions
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ LSWI
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY],
+ (instrs
+ LBZU, LBZU8,
+ LBZUX, LBZUX8,
+ LDU,
+ LDUX,
+ LFDU,
+ LFDUX,
+ LHAU, LHAU8,
+ LHAUX, LHAUX8,
+ LHZU, LHZU8,
+ LHZUX, LHZUX8,
+ LWAUX,
+ LWZU, LWZU8,
+ LWZUX, LWZUX8
+)>;
+
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+ (instrs
+ PLBZ, PLBZ8, PLBZ8pc, PLBZpc,
+ PLD, PLDpc,
+ PLFD, PLFDpc,
+ PLFS, PLFSpc,
+ PLHA, PLHA8, PLHA8pc, PLHApc,
+ PLHZ, PLHZ8, PLHZ8pc, PLHZpc,
+ PLWA, PLWA8, PLWA8pc, PLWApc,
+ PLWZ, PLWZ8, PLWZ8pc, PLWZpc,
+ PLXSD, PLXSDpc,
+ PLXSSP, PLXSSPpc,
+ PLXV, PLXVpc,
+ PLXVP, PLXVPpc
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
+ (instrs
+ LFS,
+ LFSX,
+ LXSSP,
+ LXSSPX
+)>;
+
+// 4-way crack instructions
+// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ LFSU,
+ LFSUX
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+ (instrs
+ TLBIEL
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read, P10LD_Read],
+ (instrs
+ SLBMTE
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
+ (instrs
+ LXVP,
+ LXVPX
+)>;
+
+// Single crack instructions
+// 13 Cycles Unknown operations, 1 input operands
+def : InstRW<[P10W_MFL_13C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ MFSPR, MFSPR8, MFSR, MFTB8, MFVRSAVE, MFVRSAVEv
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 0 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY],
+ (instrs
+ XXSETACCZ
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 2 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read],
+ (instrs
+ XVBF16GER2,
+ XVF16GER2,
+ XVF32GER,
+ XVF64GER,
+ XVI16GER2,
+ XVI16GER2S,
+ XVI4GER8,
+ XVI8GER4
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ XVBF16GER2NN,
+ XVBF16GER2NP,
+ XVBF16GER2PN,
+ XVBF16GER2PP,
+ XVF16GER2NN,
+ XVF16GER2NP,
+ XVF16GER2PN,
+ XVF16GER2PP,
+ XVF32GERNN,
+ XVF32GERNP,
+ XVF32GERPN,
+ XVF32GERPP,
+ XVF64GERNN,
+ XVF64GERNP,
+ XVF64GERPN,
+ XVF64GERPP,
+ XVI16GER2PP,
+ XVI16GER2SPP,
+ XVI4GER8PP,
+ XVI8GER4PP,
+ XVI8GER4SPP
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 4 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ PMXVF32GER,
+ PMXVF64GER
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 5 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ PMXVBF16GER2,
+ PMXVF16GER2,
+ PMXVF32GERNN,
+ PMXVF32GERNP,
+ PMXVF32GERPN,
+ PMXVF32GERPP,
+ PMXVF64GERNN,
+ PMXVF64GERNP,
+ PMXVF64GERPN,
+ PMXVF64GERPP,
+ PMXVI16GER2,
+ PMXVI16GER2S,
+ PMXVI4GER8,
+ PMXVI8GER4
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 6 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ PMXVBF16GER2NN,
+ PMXVBF16GER2NP,
+ PMXVBF16GER2PN,
+ PMXVBF16GER2PP,
+ PMXVF16GER2NN,
+ PMXVF16GER2NP,
+ PMXVF16GER2PN,
+ PMXVF16GER2PP,
+ PMXVI16GER2PP,
+ PMXVI16GER2SPP,
+ PMXVI4GER8PP,
+ PMXVI8GER4PP,
+ PMXVI8GER4SPP
+)>;
+
+// 2-way crack instructions
+// 10 Cycles SIMD Matrix Multiply Engine operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C],
+ (instrs
+ XXMTACC
+)>;
+
+// 4-way crack instructions
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 Cycles ALU operations, 10 Cycles SIMD Matrix Multiply Engine operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C, P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C],
+ (instrs
+ XXMFACC
+)>;
+
+// 5 Cycles GPR Multiply operations, 2 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read],
+ (instrs
+ MULHD,
+ MULHDU,
+ MULHW,
+ MULHWU,
+ MULLD,
+ MULLDO,
+ MULLI, MULLI8,
+ MULLW,
+ MULLWO,
+ VMULHSD,
+ VMULHUD,
+ VMULLD
+)>;
+
+// 5 Cycles GPR Multiply operations, 3 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read, P10MU_Read],
+ (instrs
+ MADDHD,
+ MADDHDU,
+ MADDLD, MADDLD8
+)>;
+
+// 2-way crack instructions
+// 5 Cycles GPR Multiply operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ MULHD_rec,
+ MULHDU_rec,
+ MULHW_rec,
+ MULHWU_rec,
+ MULLD_rec,
+ MULLDO_rec,
+ MULLW_rec,
+ MULLWO_rec
+)>;
+
+// 4 Cycles Permute operations, 0 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH
+)>;
+
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
+ (instrs
+ LVSL,
+ LVSR,
+ MFVSRLD,
+ MTVSRWS,
+ VCLZLSBB,
+ VCTZLSBB,
+ VGBBD,
+ VPRTYBQ,
+ VSPLTISB,
+ VSPLTISH,
+ VSTRIBL,
+ VSTRIBR,
+ VSTRIHL,
+ VSTRIHR,
+ VUPKHPX,
+ VUPKHSB,
+ VUPKHSH,
+ VUPKHSW,
+ VUPKLPX,
+ VUPKLSB,
+ VUPKLSH,
+ VUPKLSW,
+ XVCVBF16SPN,
+ XXBRD,
+ XXBRH,
+ XXBRQ,
+ XXBRW,
+ XXSPLTIB
+)>;
+
+// 4 Cycles Permute operations, 2 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read],
+ (instrs
+ BPERMD,
+ MTVSRDD,
+ VBPERMD,
+ VBPERMQ,
+ VCLRLB,
+ VCLRRB,
+ VEXTRACTD,
+ VEXTRACTUB,
+ VEXTRACTUH,
+ VEXTRACTUW,
+ VEXTUBLX,
+ VEXTUBRX,
+ VEXTUHLX,
+ VEXTUHRX,
+ VEXTUWLX,
+ VEXTUWRX,
+ VINSERTD,
+ VINSERTW,
+ VMRGHB,
+ VMRGHH,
+ VMRGHW,
+ VMRGLB,
+ VMRGLH,
+ VMRGLW,
+ VPKPX,
+ VPKSDSS,
+ VPKSDUS,
+ VPKSHSS,
+ VPKSHUS,
+ VPKSWSS,
+ VPKSWUS,
+ VPKUDUM,
+ VPKUDUS,
+ VPKUHUM,
+ VPKUHUS,
+ VPKUWUM,
+ VPKUWUS,
+ VSL,
+ VSLO,
+ VSLV,
+ VSPLTB, VSPLTBs,
+ VSPLTH, VSPLTHs,
+ VSPLTW,
+ VSR,
+ VSRO,
+ VSRV,
+ XXEXTRACTUW,
+ XXGENPCVDM,
+ XXGENPCVHM,
+ XXGENPCVWM,
+ XXMRGHW,
+ XXMRGLW,
+ XXPERM,
+ XXPERMDI, XXPERMDIs,
+ XXPERMR,
+ XXSLDWI, XXSLDWIs,
+ XXSPLTW, XXSPLTWs
+)>;
+
+// 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read],
+ (instrs
+ VEXTDDVLX,
+ VEXTDDVRX,
+ VEXTDUBVLX,
+ VEXTDUBVRX,
+ VEXTDUHVLX,
+ VEXTDUHVRX,
+ VEXTDUWVLX,
+ VEXTDUWVRX,
+ VINSBLX,
+ VINSBRX,
+ VINSBVLX,
+ VINSBVRX,
+ VINSD,
+ VINSDLX,
+ VINSDRX,
+ VINSERTB,
+ VINSERTH,
+ VINSHLX,
+ VINSHRX,
+ VINSHVLX,
+ VINSHVRX,
+ VINSW,
+ VINSWLX,
+ VINSWRX,
+ VINSWVLX,
+ VINSWVRX,
+ VPERM,
+ VPERMR,
+ VPERMXOR,
+ VSLDBI,
+ VSLDOI,
+ VSRDBI,
+ XXINSERTW
+)>;
+
+// 2-way crack instructions
+// 4 Cycles Permute operations, and 7 Cycles VMX Multiply operations, 2 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_EVEN, P10W_vMU_7C, P10W_DISP_ANY],
+ (instrs
+ VSUMSWS
+)>;
+
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read],
+ (instrs
+ XXSPLTIDP,
+ XXSPLTIW
+)>;
+
+// 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read],
+ (instrs
+ XXBLENDVB,
+ XXBLENDVD,
+ XXBLENDVH,
+ XXBLENDVW,
+ XXSPLTI32DX
+)>;
+
+// 4 Cycles Permute operations, 4 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P10PM_Read],
+ (instrs
+ XXEVAL,
+ XXPERMX
+)>;
+
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read],
+ (instrs
+ DCBST,
+ DCBZ,
+ ICBI
+)>;
+
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+ (instrs
+ DCBF,
+ PSTXVP, PSTXVPpc,
+ STB, STB8,
+ STBU, STBU8,
+ STBUX, STBUX8,
+ SPILLTOVSR_ST, STD,
+ STDBRX,
+ STDU,
+ STDUX,
+ DFSTOREf32, DFSTOREf64, STFD,
+ STFDU,
+ STFDUX,
+ STFDX,
+ STFIWX, STIWX,
+ STFS,
+ STFSU,
+ STFSUX,
+ STFSX,
+ STH, STH8,
+ STHBRX,
+ STHU, STHU8,
+ STHUX, STHUX8,
+ STVEBX,
+ STVEHX,
+ STVEWX,
+ STVX,
+ STVXL,
+ STW, STW8,
+ STWBRX,
+ STWU, STWU8,
+ STWUX, STWUX8,
+ STXSD,
+ STXSDX,
+ STXSIBX, STXSIBXv,
+ STXSIHX, STXSIHXv,
+ STXSIWX,
+ STXSSP,
+ STXSSPX,
+ STXV,
+ STXVB16X,
+ STXVD2X,
+ STXVH8X,
+ STXVRBX,
+ STXVRDX,
+ STXVRHX,
+ STXVRWX,
+ STXVW4X,
+ STXVX
+)>;
+
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
+ CP_COPY, CP_COPY8,
+ STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
+ SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
+ STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
+ STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
+ STXVL,
+ STXVLL
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 0 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ EnforceIEIO,
+ MSGSYNC,
+ SLBSYNC,
+ TCHECK,
+ TLBSYNC
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read],
+ (instrs
+ TEND
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+ (instrs
+ SLBIEG,
+ STBCX,
+ STDCX,
+ STHCX,
+ STWCX,
+ TLBIE
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
+ CP_PASTE8_rec, CP_PASTE_rec,
+ STBCIX,
+ STDCIX,
+ STHCIX,
+ STWCIX
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ ISYNC
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ SYNC
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
+ (instrs
+ LDAT,
+ LWAT
+)>;
+
+// 4-way crack instructions
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
+ (instrs
+ STDAT,
+ STWAT
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+ (instrs
+ STMW
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
+ STSWI
+)>;
+
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read],
+ (instrs
+ PSTB, PSTB8, PSTB8pc, PSTBpc,
+ PSTD, PSTDpc,
+ PSTFD, PSTFDpc,
+ PSTFS, PSTFSpc,
+ PSTH, PSTH8, PSTH8pc, PSTHpc,
+ PSTW, PSTW8, PSTW8pc, PSTWpc,
+ PSTXSD, PSTXSDpc,
+ PSTXSSP, PSTXSSPpc,
+ PSTXV, PSTXVpc
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read],
+ (instrs
+ STXVP,
+ STXVPX
+)>;
+
+// FIXME - Miss scheduling information from datasheet
+// Temporary set it as 1 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_SX, P10W_DISP_ANY],
+ (instrs
+ ATTN,
+ CP_ABORT,
+ DCBA,
+ DCBI,
+ DCBZL,
+ DCCCI,
+ ICBLC,
+ ICBLQ,
+ ICBTLS,
+ ICCCI,
+ LA,
+ LDMX,
+ MFDCR,
+ MFPMR,
+ MFSRIN,
+ MSYNC,
+ MTDCR,
+ MTPMR,
+ MTSRIN,
+ NAP,
+ TLBIA,
+ TLBLD,
+ TLBLI,
+ TLBRE2,
+ TLBSX2,
+ TLBSX2D,
+ TLBWE2
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ CLRBHRB,
+ MFMSR
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read],
+ (instrs
+ MFTB
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read, P10SX_Read],
+ (instrs
+ MFBHRBE,
+ MTMSR,
+ MTMSRD
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ ADDPCIS
+)>;
+
+// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_PAIR, P10SX_Read],
+ (instrs
+ PADDI, PADDI8, PADDI8pc, PADDIpc, PLI, PLI8
+)>;
+
+// 7 Cycles VMX Multiply operations, 2 input operands
+def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read],
+ (instrs
+ VMULESB,
+ VMULESD,
+ VMULESH,
+ VMULESW,
+ VMULEUB,
+ VMULEUD,
+ VMULEUH,
+ VMULEUW,
+ VMULHSW,
+ VMULHUW,
+ VMULOSB,
+ VMULOSD,
+ VMULOSH,
+ VMULOSW,
+ VMULOUB,
+ VMULOUD,
+ VMULOUH,
+ VMULOUW,
+ VMULUWM,
+ VSUM2SWS,
+ VSUM4SBS,
+ VSUM4SHS,
+ VSUM4UBS
+)>;
+
+// 7 Cycles VMX Multiply operations, 3 input operands
+def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read, P10vMU_Read],
+ (instrs
+ VMHADDSHS,
+ VMHRADDSHS,
+ VMLADDUHM,
+ VMSUMCUD,
+ VMSUMMBM,
+ VMSUMSHM,
+ VMSUMSHS,
+ VMSUMUBM,
+ VMSUMUDM,
+ VMSUMUHM,
+ VMSUMUHS
+)>;
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index e41e0133a7b9..8e983acb450b 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -590,8 +590,7 @@ def : ProcessorModel<"pwr6x", G5Model,
def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
-// No scheduler model yet.
-def : ProcessorModel<"pwr10", P9Model, ProcessorFeatures.P10Features>;
+def : ProcessorModel<"pwr10", P10Model, ProcessorFeatures.P10Features>;
// No scheduler model for future CPU.
def : ProcessorModel<"future", NoSchedModel,
ProcessorFeatures.FutureFeatures>;
diff --git a/llvm/lib/Target/PowerPC/PPCSchedPredicates.td b/llvm/lib/Target/PowerPC/PPCSchedPredicates.td
new file mode 100644
index 000000000000..18f325e99a60
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCSchedPredicates.td
@@ -0,0 +1,294 @@
+//===--- PPCSchedPredicates.td - PowerPC Scheduling Preds -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines scheduling predicate definitions that are used by the
+// PowerPC subtargets.
+//===----------------------------------------------------------------------===//
+// Identify instructions that write BF pipelines with 7 cycles.
+def P10W_BF_7C_Pred : MCSchedPredicate<
+ CheckOpcode<[FADD,
+ FADDS,
+ FADDS_rec,
+ FADD_rec,
+ FCFID,
+ FCFIDS,
+ FCFIDS_rec,
+ FCFIDU,
+ FCFIDUS,
+ FCFIDUS_rec,
+ FCFIDU_rec,
+ FCFID_rec,
+ FCTID,
+ FCTIDU,
+ FCTIDUZ,
+ FCTIDUZ_rec,
+ FCTIDU_rec,
+ FCTIDZ,
+ FCTIDZ_rec,
+ FCTID_rec,
+ FCTIW,
+ FCTIWU,
+ FCTIWUZ,
+ FCTIWUZ_rec,
+ FCTIWU_rec,
+ FCTIWZ,
+ FCTIWZ_rec,
+ FCTIW_rec,
+ FMADD,
+ FMADDS,
+ FMADDS_rec,
+ FMADD_rec,
+ FMSUB,
+ FMSUBS,
+ FMSUBS_rec,
+ FMSUB_rec,
+ FMUL,
+ FMULS,
+ FMULS_rec,
+ FMUL_rec,
+ FNMADD,
+ FNMADDS,
+ FNMADDS_rec,
+ FNMADD_rec,
+ FNMSUB,
+ FNMSUBS,
+ FNMSUBS_rec,
+ FNMSUB_rec,
+ FRE,
+ FRES,
+ FRES_rec,
+ FRE_rec,
+ FRIMD, FRIMS,
+ FRIMD_rec, FRIMS_rec,
+ FRIND, FRINS,
+ FRIND_rec, FRINS_rec,
+ FRIPD, FRIPS,
+ FRIPD_rec, FRIPS_rec,
+ FRIZD, FRIZS,
+ FRIZD_rec, FRIZS_rec,
+ FRSP,
+ FRSP_rec,
+ FRSQRTE,
+ FRSQRTES,
+ FRSQRTES_rec,
+ FRSQRTE_rec,
+ FSELD, FSELS,
+ FSELD_rec, FSELS_rec,
+ FSUB,
+ FSUBS,
+ FSUBS_rec,
+ FSUB_rec,
+ VADDFP,
+ VCFSX, VCFSX_0,
+ VCFUX, VCFUX_0,
+ VCTSXS, VCTSXS_0,
+ VCTUXS, VCTUXS_0,
+ VEXPTEFP,
+ VEXPTEFP,
+ VLOGEFP,
+ VMADDFP,
+ VNMSUBFP,
+ VREFP,
+ VRFIM,
+ VRFIN,
+ VRFIP,
+ VRFIZ,
+ VRSQRTEFP,
+ VSUBFP,
+ XSADDDP,
+ XSADDSP,
+ XSCVDPHP,
+ XSCVDPSP,
+ XSCVDPSPN,
+ XSCVDPSXDS, XSCVDPSXDSs,
+ XSCVDPSXWS, XSCVDPSXWSs,
+ XSCVDPUXDS, XSCVDPUXDSs,
+ XSCVDPUXWS, XSCVDPUXWSs,
+ XSCVSPDP,
+ XSCVSXDDP,
+ XSCVSXDSP,
+ XSCVUXDDP,
+ XSCVUXDSP,
+ XSMADDADP,
+ XSMADDASP,
+ XSMADDMDP,
+ XSMADDMSP,
+ XSMSUBADP,
+ XSMSUBASP,
+ XSMSUBMDP,
+ XSMSUBMSP,
+ XSMULDP,
+ XSMULSP,
+ XSNMADDADP,
+ XSNMADDASP,
+ XSNMADDMDP,
+ XSNMADDMSP,
+ XSNMSUBADP,
+ XSNMSUBASP,
+ XSNMSUBMDP,
+ XSNMSUBMSP,
+ XSRDPI,
+ XSRDPIC,
+ XSRDPIM,
+ XSRDPIP,
+ XSRDPIZ,
+ XSREDP,
+ XSRESP,
+ XSRSP,
+ XSRSQRTEDP,
+ XSRSQRTESP,
+ XSSUBDP,
+ XSSUBSP,
+ XVADDDP,
+ XVADDSP,
+ XVCVDPSP,
+ XVCVDPSXDS,
+ XVCVDPSXWS,
+ XVCVDPUXDS,
+ XVCVDPUXWS,
+ XVCVSPBF16,
+ XVCVSPDP,
+ XVCVSPHP,
+ XVCVSPSXDS,
+ XVCVSPSXWS,
+ XVCVSPUXDS,
+ XVCVSPUXWS,
+ XVCVSXDDP,
+ XVCVSXDSP,
+ XVCVSXWDP,
+ XVCVSXWSP,
+ XVCVUXDDP,
+ XVCVUXDSP,
+ XVCVUXWDP,
+ XVCVUXWSP,
+ XVMADDADP,
+ XVMADDASP,
+ XVMADDMDP,
+ XVMADDMSP,
+ XVMSUBADP,
+ XVMSUBASP,
+ XVMSUBMDP,
+ XVMSUBMSP,
+ XVMULDP,
+ XVMULSP,
+ XVNMADDADP,
+ XVNMADDASP,
+ XVNMADDMDP,
+ XVNMADDMSP,
+ XVNMSUBADP,
+ XVNMSUBASP,
+ XVNMSUBMDP,
+ XVNMSUBMSP,
+ XVRDPI,
+ XVRDPIC,
+ XVRDPIM,
+ XVRDPIP,
+ XVRDPIZ,
+ XVREDP,
+ XVRESP,
+ XVRSPI,
+ XVRSPIC,
+ XVRSPIM,
+ XVRSPIP,
+ XVRSPIZ,
+ XVRSQRTEDP,
+ XVRSQRTESP,
+ XVSUBDP,
+ XVSUBSP]>
+>;
+
+// Identify instructions that write CY pipelines with 7 cycles.
+def P10W_CY_7C_Pred : MCSchedPredicate<
+ CheckOpcode<[CFUGED,
+ CNTLZDM,
+ CNTTZDM,
+ PDEPD,
+ PEXTD,
+ VCFUGED,
+ VCIPHER,
+ VCIPHERLAST,
+ VCLZDM,
+ VCTZDM,
+ VGNB,
+ VNCIPHER,
+ VNCIPHERLAST,
+ VPDEPD,
+ VPEXTD,
+ VPMSUMB,
+ VPMSUMD,
+ VPMSUMH,
+ VPMSUMW,
+ VSBOX]>
+>;
+
+// Identify instructions that write MM pipelines with 10 cycles.
+def P10W_MM_10C_Pred : MCSchedPredicate<
+ CheckOpcode<[PMXVBF16GER2,
+ PMXVBF16GER2NN,
+ PMXVBF16GER2NP,
+ PMXVBF16GER2PN,
+ PMXVBF16GER2PP,
+ PMXVF16GER2,
+ PMXVF16GER2NN,
+ PMXVF16GER2NP,
+ PMXVF16GER2PN,
+ PMXVF16GER2PP,
+ PMXVF32GER,
+ PMXVF32GERNN,
+ PMXVF32GERNP,
+ PMXVF32GERPN,
+ PMXVF32GERPP,
+ PMXVF64GER,
+ PMXVF64GERNN,
+ PMXVF64GERNP,
+ PMXVF64GERPN,
+ PMXVF64GERPP,
+ PMXVI16GER2,
+ PMXVI16GER2PP,
+ PMXVI16GER2S,
+ PMXVI16GER2SPP,
+ PMXVI4GER8,
+ PMXVI4GER8PP,
+ PMXVI8GER4,
+ PMXVI8GER4PP,
+ PMXVI8GER4SPP,
+ XVBF16GER2,
+ XVBF16GER2NN,
+ XVBF16GER2NP,
+ XVBF16GER2PN,
+ XVBF16GER2PP,
+ XVF16GER2,
+ XVF16GER2NN,
+ XVF16GER2NP,
+ XVF16GER2PN,
+ XVF16GER2PP,
+ XVF32GER,
+ XVF32GERNN,
+ XVF32GERNP,
+ XVF32GERPN,
+ XVF32GERPP,
+ XVF64GER,
+ XVF64GERNN,
+ XVF64GERNP,
+ XVF64GERPN,
+ XVF64GERPP,
+ XVI16GER2,
+ XVI16GER2PP,
+ XVI16GER2S,
+ XVI16GER2SPP,
+ XVI4GER8,
+ XVI4GER8PP,
+ XVI8GER4,
+ XVI8GER4PP,
+ XVI8GER4SPP,
+ XXMFACC,
+ XXMFACC,
+ XXMTACC,
+ XXSETACCZ]>
+>;
diff --git a/llvm/lib/Target/PowerPC/PPCSchedule.td b/llvm/lib/Target/PowerPC/PPCSchedule.td
index e378d57d325e..f65dbae16d3a 100644
--- a/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -128,7 +128,9 @@ def IIC_SprMTPMR : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
+include "PPCInstrInfo.td"
+include "PPCSchedPredicates.td"
include "PPCScheduleG3.td"
include "PPCSchedule440.td"
include "PPCScheduleG4.td"
@@ -137,6 +139,7 @@ include "PPCScheduleG5.td"
include "PPCScheduleP7.td"
include "PPCScheduleP8.td"
include "PPCScheduleP9.td"
+include "PPCScheduleP10.td"
include "PPCScheduleA2.td"
include "PPCScheduleE500.td"
include "PPCScheduleE500mc.td"
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
new file mode 100644
index 000000000000..bf56491f373a
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
@@ -0,0 +1,416 @@
+//===--- PPCScheduleP10.td - P10 Scheduling Definitions -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines the resources required by P10 instructions.
+//===----------------------------------------------------------------------===//
+// Modeling pipeline forwarding logic.
+def P10BR_Read : SchedRead;
+def P10DF_Read : SchedRead;
+def P10DV_Read : SchedRead;
+def P10DX_Read : SchedRead;
+def P10F2_Read : SchedRead;
+def P10FX_Read : SchedRead;
+def P10LD_Read : SchedRead;
+def P10MU_Read : SchedRead;
+def P10PM_Read : SchedRead;
+def P10ST_Read : SchedRead;
+def P10SX_Read : SchedRead;
+def P10vMU_Read : SchedRead;
+
+def P10Model : SchedMachineModel {
+ let IssueWidth = 8;
+
+ // TODO - Need to be updated according to P10 UM.
+ let MicroOpBufferSize = 44;
+
+ // TODO - tune this on real HW once it arrives. For now, we will use the same
+ // value as we do on P9.
+ let LoopMicroOpBufferSize = 60;
+
+ let CompleteModel = 1;
+
+ // Do not support SPE (Signal Procesing Engine) on Power 10.
+ let UnsupportedFeatures = [HasSPE, IsE500, IsBookE];
+}
+
+let SchedModel = P10Model in {
+
+ // ***************** Processor Resources *****************
+
+ // Pipeline Groups
+
+ def P10_BF : ProcResource<4>; // Four Binary Floating Point pipelines.
+ def P10_BR : ProcResource<2>; // Two Branch pipelines.
+ def P10_CY : ProcResource<4>; // Four Crypto pipelines.
+ def P10_DF : ProcResource<1>; // One Decimal Floating Point pipelines.
+ def P10_DV : ProcResource<2>; // Two Fixed-point divide (DIV) pipelines.
+ def P10_DX : ProcResource<2>; // Two 128-bit fixed-point and BCD pipelines.
+ def P10_FX : ProcResource<4>; // Four ALU pipelines.
+ def P10_LD : ProcResource<2>; // Two Load pipelines.
+ def P10_MM : ProcResource<2>; // Two 512-bit SIMD matrix multiply engine pipelines.
+ def P10_PM : ProcResource<4>; // Four 128-bit permute (PM) pipelines.
+ def P10_ST : ProcResource<2>; // Two ST-D pipelines.
+ def P10_SX : ProcResource<2>; // Two Simple Fixed-point (SFX) pipelines.
+
+ // Dispatch Groups
+
+ // Dispatch to any slots
+ def P10_ANY_SLOT : ProcResource<8>;
+
+ let Super = P10_ANY_SLOT in {
+
+ // Dispatch to even slots
+ def P10_EVEN_SLOT : ProcResource<4>;
+
+ // Dispatch to odd slots
+ def P10_ODD_SLOT : ProcResource<4>;
+ }
+
+ // Dispatch Rules
+ let NumMicroOps = 0, Latency = 1 in {
+ // Dispatch Rule '-'
+ def P10W_DISP_ANY : SchedWriteRes<[P10_ANY_SLOT]>;
+
+ // Dispatch Rule '-', even slot
+ def P10W_DISP_EVEN : SchedWriteRes<[P10_EVEN_SLOT]>;
+
+ // Dispatch Rule 'P'
+ def P10W_DISP_PAIR : SchedWriteRes<[P10_EVEN_SLOT, P10_ODD_SLOT]>;
+ }
+
+ // ***************** SchedWriteRes Definitions *****************
+
+ // A BF pipeline may take from 7 to 36 cycles to complete.
+ // Some BF operations may keep the pipeline busy for up to 10 cycles.
+ def P10W_BF_7C : SchedWriteRes<[P10_BF]> {
+ let Latency = 7;
+ }
+
+ def P10W_BF_22C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 5 ];
+ let Latency = 22;
+ }
+
+ def P10W_BF_24C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 8 ];
+ let Latency = 24;
+ }
+
+ def P10W_BF_26C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 5 ];
+ let Latency = 26;
+ }
+
+ def P10W_BF_27C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 7 ];
+ let Latency = 27;
+ }
+
+ def P10W_BF_36C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 36;
+ }
+
+ // A BR pipeline may take 2 cycles to complete.
+ def P10W_BR_2C : SchedWriteRes<[P10_BR]> {
+ let Latency = 2;
+ }
+
+ // A CY pipeline may take 7 cycles to complete.
+ def P10W_CY_7C : SchedWriteRes<[P10_CY]> {
+ let Latency = 7;
+ }
+
+ // A DF pipeline may take from 13 to 174 cycles to complete.
+ // Some DF operations may keep the pipeline busy for up to 67 cycles.
+ def P10W_DF_13C : SchedWriteRes<[P10_DF]> {
+ let Latency = 13;
+ }
+
+ def P10W_DF_24C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 16 ];
+ let Latency = 24;
+ }
+
+ def P10W_DF_25C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 17 ];
+ let Latency = 25;
+ }
+
+ def P10W_DF_26C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 18 ];
+ let Latency = 26;
+ }
+
+ def P10W_DF_32C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 22 ];
+ let Latency = 32;
+ }
+
+ def P10W_DF_33C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 25 ];
+ let Latency = 33;
+ }
+
+ def P10W_DF_34C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 25 ];
+ let Latency = 34;
+ }
+
+ def P10W_DF_38C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 30 ];
+ let Latency = 38;
+ }
+
+ def P10W_DF_40C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 17 ];
+ let Latency = 40;
+ }
+
+ def P10W_DF_43C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 34 ];
+ let Latency = 43;
+ }
+
+ def P10W_DF_59C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 49 ];
+ let Latency = 59;
+ }
+
+ def P10W_DF_61C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 12 ];
+ let Latency = 61;
+ }
+
+ def P10W_DF_68C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 15 ];
+ let Latency = 68;
+ }
+
+ def P10W_DF_77C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 67 ];
+ let Latency = 77;
+ }
+
+ def P10W_DF_87C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 12 ];
+ let Latency = 87;
+ }
+
+ def P10W_DF_100C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 32 ];
+ let Latency = 100;
+ }
+
+ def P10W_DF_174C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 174;
+ }
+
+ // A DV pipeline may take from 20 to 83 cycles to complete.
+ // Some DV operations may keep the pipeline busy for up to 33 cycles.
+ def P10W_DV_20C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 20;
+ }
+
+ def P10W_DV_25C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 25;
+ }
+
+ def P10W_DV_27C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 27;
+ }
+
+ def P10W_DV_41C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 41;
+ }
+
+ def P10W_DV_43C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 21 ];
+ let Latency = 43;
+ }
+
+ def P10W_DV_47C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 21 ];
+ let Latency = 47;
+ }
+
+ def P10W_DV_54C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 54;
+ }
+
+ def P10W_DV_60C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 60;
+ }
+
+ def P10W_DV_75C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 21 ];
+ let Latency = 75;
+ }
+
+ def P10W_DV_83C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 83;
+ }
+
+ // A DX pipeline may take 5 cycles to complete.
+ def P10W_DX_5C : SchedWriteRes<[P10_DX]> {
+ let Latency = 5;
+ }
+
+ // A F2 pipeline may take 4 cycles to complete.
+ def P10W_F2_4C : SchedWriteRes<[P10_FX]> {
+ let Latency = 4;
+ }
+
+ // A FX pipeline may take from 2 to 3 cycles to complete.
+ def P10W_FX_2C : SchedWriteRes<[P10_FX]> {
+ let Latency = 2;
+ }
+
+ def P10W_FX_3C : SchedWriteRes<[P10_FX]> {
+ let Latency = 3;
+ }
+
+ // A LD pipeline may take 6 cycles to complete.
+ def P10W_LD_6C : SchedWriteRes<[P10_LD]> {
+ let Latency = 6;
+ }
+
+ // A MF pipeline may take 13 cycles to complete.
+ def P10W_MF_13C : SchedWriteRes<[P10_SX]> {
+ let Latency = 13;
+ }
+
+ // A MFL pipeline may take 13 cycles to complete.
+ def P10W_MFL_13C : SchedWriteRes<[P10_SX]> {
+ let Latency = 13;
+ }
+
+ // A MM pipeline may take 10 cycles to complete.
+ def P10W_MM_10C : SchedWriteRes<[P10_MM]> {
+ let Latency = 10;
+ }
+
+ // A MU pipeline may take 5 cycles to complete.
+ def P10W_MU_5C : SchedWriteRes<[P10_BF]> {
+ let Latency = 5;
+ }
+
+ // A PM pipeline may take 4 cycles to complete.
+ def P10W_PM_4C : SchedWriteRes<[P10_PM]> {
+ let Latency = 4;
+ }
+
+ // A ST pipeline may take 3 cycles to complete.
+ def P10W_ST_3C : SchedWriteRes<[P10_ST]> {
+ let Latency = 3;
+ }
+
+ // A SX pipeline may take from 0 to 3 cycles to complete.
+ def P10W_SX : SchedWriteRes<[P10_SX]> {
+ let Latency = 0;
+ }
+
+ def P10W_SX_3C : SchedWriteRes<[P10_SX]> {
+ let Latency = 3;
+ }
+
+ // A vMU pipeline may take 7 cycles to complete.
+ def P10W_vMU_7C : SchedWriteRes<[P10_BF]> {
+ let Latency = 7;
+ }
+
+ // ***************** Read Advance Definitions *****************
+
+ // Modeling pipeline forwarding logic.
+ def P10BF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10BF_Read_2C : SchedReadAdvance<2, [P10W_BF_7C]>;
+ def P10BR_Read_1C : SchedReadAdvance<1, [P10W_FX_3C, P10W_F2_4C]>;
+ def P10CY_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10CY_Read_3C : SchedReadAdvance<3, [P10W_CY_7C]>;
+ def P10DF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10DV_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10DX_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10F2_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10FX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10LD_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C]>;
+ def P10MM_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C]>;
+ def P10MM_Read_6C : SchedReadAdvance<6, [P10W_MM_10C]>;
+ def P10MU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_DF_13C]>;
+ def P10PM_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10ST_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10SX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C, P10W_MM_10C]>;
+ def P10vMU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+
+ // Save 1 cycles if pipeline BF reads the data from pipelines DX, MU, vMU, CY, DF, MM.
+ // Save 2 cycles if pipeline BF reads the data from pipelines BF.
+ def P10BF_Read : SchedReadVariant<[
+ SchedVar<P10W_BF_7C_Pred, [P10BF_Read_2C]>,
+ SchedVar<NoSchedPred, [P10BF_Read_1C]>
+ ]>;
+
+ // Save 1 cycles if pipeline CY reads the data from pipelines DX, MU, vMU, BF, DF, MM.
+ // Save 3 cycles if pipeline CY reads the data from pipelines CY.
+ def P10CY_Read : SchedReadVariant<[
+ SchedVar<P10W_CY_7C_Pred, [P10CY_Read_3C]>,
+ SchedVar<NoSchedPred, [P10CY_Read_1C]>
+ ]>;
+
+ // Save 1 cycles if pipeline MM reads the data from pipelines DX, MU, vMU, BF, CY, DF.
+ // Save 6 cycles if pipeline MM reads the data from pipelines MM.
+ def P10MM_Read : SchedReadVariant<[
+ SchedVar<P10W_MM_10C_Pred, [P10MM_Read_6C]>,
+ SchedVar<NoSchedPred, [P10MM_Read_1C]>
+ ]>;
+
+ // Save 1 cycles if pipeline BR reads the data from pipelines FX, F2.
+ def : SchedAlias<P10BR_Read, P10BR_Read_1C>;
+
+ // Save 1 cycles if pipeline DF reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10DF_Read, P10DF_Read_1C>;
+
+ // Save 1 cycles if pipeline DV reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10DV_Read, P10DV_Read_1C>;
+
+ // Save 1 cycles if pipeline DX reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10DX_Read, P10DX_Read_1C>;
+
+ // Save 1 cycles if pipeline F2 reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10F2_Read, P10F2_Read_1C>;
+
+ // Save 1 cycles if pipeline FX reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10FX_Read, P10FX_Read_1C>;
+
+ // Save 1 cycles if pipeline LD reads the data from pipelines ST, SX, FX, F2.
+ def : SchedAlias<P10LD_Read, P10LD_Read_1C>;
+
+ // Save 1 cycles if pipeline MU reads the data from pipelines DX, MU, DF.
+ def : SchedAlias<P10MU_Read, P10MU_Read_1C>;
+
+ // Save 1 cycles if pipeline PM reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10PM_Read, P10PM_Read_1C>;
+
+ // Save 1 cycles if pipeline ST reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10ST_Read, P10ST_Read_1C>;
+
+ // Save 1 cycles if pipeline SX reads the data from pipelines ST, SX, FX, F2, PM, MM.
+ def : SchedAlias<P10SX_Read, P10SX_Read_1C>;
+
+ // Save 1 cycles if pipeline vMU reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10vMU_Read, P10vMU_Read_1C>;
+
+ include "P10InstrResources.td"
+}
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 571cc219ff2b..3dc069ecad8a 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -9,8 +9,6 @@
// This file defines the itinerary class data for the POWER9 processor.
//
//===----------------------------------------------------------------------===//
-include "PPCInstrInfo.td"
-
def P9Model : SchedMachineModel {
// The maximum number of instructions to be issued at the same time.
// While a value of 8 is technically correct since 8 instructions can be
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
index 9e8f8d073a1a..d49821148860 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -105,8 +105,8 @@ define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %
;
; CHECK-BE-LABEL: vec_xst_trunc_ss:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: sldi r3, r5, 1
+; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: stxsihx v2, r6, r3
; CHECK-BE-NEXT: blr
;
@@ -136,8 +136,8 @@ define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %
;
; CHECK-BE-LABEL: vec_xst_trunc_us:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: sldi r3, r5, 1
+; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: stxsihx v2, r6, r3
; CHECK-BE-NEXT: blr
;
@@ -167,8 +167,8 @@ define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %
;
; CHECK-BE-LABEL: vec_xst_trunc_si:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-BE-NEXT: sldi r3, r5, 2
+; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-BE-NEXT: stfiwx f0, r6, r3
; CHECK-BE-NEXT: blr
;
@@ -198,8 +198,8 @@ define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %
;
; CHECK-BE-LABEL: vec_xst_trunc_ui:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-BE-NEXT: sldi r3, r5, 2
+; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-BE-NEXT: stfiwx f0, r6, r3
; CHECK-BE-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/constant-pool.ll b/llvm/test/CodeGen/PowerPC/constant-pool.ll
index 697b5eebe432..4185a41b50f2 100644
--- a/llvm/test/CodeGen/PowerPC/constant-pool.ll
+++ b/llvm/test/CodeGen/PowerPC/constant-pool.ll
@@ -364,15 +364,15 @@ define ppc_fp128 @three_constants_ppcf128(ppc_fp128 %a, ppc_fp128 %c) {
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset v31, -16
-; CHECK-NEXT: xxlxor f4, f4, f4
; CHECK-NEXT: xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT: xxlxor f4, f4, f4
; CHECK-NEXT: stxv vs63, 32(r1) # 16-byte Folded Spill
; CHECK-NEXT: xxsplti32dx vs63, 0, 1074935889
; CHECK-NEXT: xxsplti32dx vs3, 1, -343597384
; CHECK-NEXT: # kill: def $f3 killed $f3 killed $vsl3
; CHECK-NEXT: bl __gcc_qadd at notoc
-; CHECK-NEXT: xxlxor f4, f4, f4
; CHECK-NEXT: xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT: xxlxor f4, f4, f4
; CHECK-NEXT: xxsplti32dx vs3, 1, -1719329096
; CHECK-NEXT: # kill: def $f3 killed $f3 killed $vsl3
; CHECK-NEXT: bl __gcc_qadd at notoc
diff --git a/llvm/test/CodeGen/PowerPC/int128_ldst.ll b/llvm/test/CodeGen/PowerPC/int128_ldst.ll
index f72c2c062b65..7daef40937fc 100644
--- a/llvm/test/CodeGen/PowerPC/int128_ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/int128_ldst.ll
@@ -420,8 +420,8 @@ define dso_local i128 @ld_disjoint_unalign64___int128___int128(i64 %ptr) {
; CHECK-P10-NEXT: rldicr 4, 3, 0, 23
; CHECK-P10-NEXT: pli 5, 232
; CHECK-P10-NEXT: pli 3, 3567587329
-; CHECK-P10-NEXT: pli 6, 3567587337
; CHECK-P10-NEXT: rldimi 3, 5, 32, 0
+; CHECK-P10-NEXT: pli 6, 3567587337
; CHECK-P10-NEXT: rldimi 6, 5, 32, 0
; CHECK-P10-NEXT: ldx 3, 4, 3
; CHECK-P10-NEXT: ldx 4, 4, 6
@@ -465,8 +465,8 @@ define dso_local i128 @ld_disjoint_align64___int128___int128(i64 %ptr) {
; CHECK-P10-NEXT: rldicr 4, 3, 0, 23
; CHECK-P10-NEXT: pli 3, 244140625
; CHECK-P10-NEXT: pli 5, 232
-; CHECK-P10-NEXT: pli 6, 3567587336
; CHECK-P10-NEXT: rldic 3, 3, 12, 24
+; CHECK-P10-NEXT: pli 6, 3567587336
; CHECK-P10-NEXT: rldimi 6, 5, 32, 0
; CHECK-P10-NEXT: ldx 3, 4, 3
; CHECK-P10-NEXT: ldx 4, 4, 6
@@ -584,8 +584,8 @@ define dso_local i128 @ld_cst_unalign64___int128___int128() {
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: pli 4, 232
; CHECK-P10-NEXT: pli 3, 3567587329
-; CHECK-P10-NEXT: pli 5, 3567587337
; CHECK-P10-NEXT: rldimi 3, 4, 32, 0
+; CHECK-P10-NEXT: pli 5, 3567587337
; CHECK-P10-NEXT: rldimi 5, 4, 32, 0
; CHECK-P10-NEXT: ld 3, 0(3)
; CHECK-P10-NEXT: ld 4, 0(5)
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
index ae6b146b0254..11a06034e384 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
@@ -26,10 +26,10 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-NEXT: xxlor vs0, v2, v2
; CHECK-NEXT: xxlor vs1, v3, v3
; CHECK-NEXT: stxvp vsp34, 128(r1) # 32-byte Folded Spill
-; CHECK-NEXT: ld r30, 272(r1)
-; CHECK-NEXT: stxvp vsp36, 96(r1) # 32-byte Folded Spill
; CHECK-NEXT: xxlor vs2, v4, v4
; CHECK-NEXT: xxlor vs3, v5, v5
+; CHECK-NEXT: ld r30, 272(r1)
+; CHECK-NEXT: stxvp vsp36, 96(r1) # 32-byte Folded Spill
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-NEXT: xxmfacc acc0
@@ -38,9 +38,9 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-NEXT: bl foo at notoc
; CHECK-NEXT: lxvp vsp0, 64(r1)
; CHECK-NEXT: lxvp vsp2, 32(r1)
+; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxvp vsp34, 128(r1) # 32-byte Folded Reload
; CHECK-NEXT: lxvp vsp36, 96(r1) # 32-byte Folded Reload
-; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r30)
@@ -69,10 +69,10 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: xxlor vs0, v2, v2
; CHECK-BE-NEXT: xxlor vs1, v3, v3
; CHECK-BE-NEXT: stxvp vsp34, 208(r1) # 32-byte Folded Spill
-; CHECK-BE-NEXT: ld r30, 368(r1)
; CHECK-BE-NEXT: xxlor vs2, v4, v4
; CHECK-BE-NEXT: xxlor vs3, v5, v5
; CHECK-BE-NEXT: stxvp vsp36, 176(r1) # 32-byte Folded Spill
+; CHECK-BE-NEXT: ld r30, 368(r1)
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-BE-NEXT: xxmfacc acc0
@@ -82,9 +82,9 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: nop
; CHECK-BE-NEXT: lxvp vsp0, 112(r1)
; CHECK-BE-NEXT: lxvp vsp2, 144(r1)
+; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxvp vsp34, 208(r1) # 32-byte Folded Reload
; CHECK-BE-NEXT: lxvp vsp36, 176(r1) # 32-byte Folded Reload
-; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r30)
diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index 77bf4a4eaa96..7f02c9f0215c 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -409,29 +409,29 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
; CHECK-NEXT: xvf32gerpp acc2, vs0, vs1
; CHECK-NEXT: lxv vs0, 32(r7)
; CHECK-NEXT: lxv vs1, 48(r7)
-; CHECK-NEXT: xxmfacc acc2
; CHECK-NEXT: xvf32gerpn acc1, vs0, vs1
; CHECK-NEXT: lxv vs12, 64(r7)
; CHECK-NEXT: lxv vs13, 80(r7)
-; CHECK-NEXT: rldic r7, r4, 6, 26
; CHECK-NEXT: xxsetaccz acc0
+; CHECK-NEXT: rldic r7, r4, 6, 26
; CHECK-NEXT: addi r4, r4, 3
-; CHECK-NEXT: xxmfacc acc1
+; CHECK-NEXT: add r8, r3, r7
+; CHECK-NEXT: xxmfacc acc2
; CHECK-NEXT: xvf32gernp acc0, vs12, vs13
; CHECK-NEXT: stxvx vs11, r3, r7
-; CHECK-NEXT: add r7, r3, r7
+; CHECK-NEXT: stxv vs8, 48(r8)
+; CHECK-NEXT: xxmfacc acc1
+; CHECK-NEXT: stxv vs9, 32(r8)
+; CHECK-NEXT: stxv vs10, 16(r8)
+; CHECK-NEXT: stxv vs4, 112(r8)
+; CHECK-NEXT: stxv vs5, 96(r8)
; CHECK-NEXT: xxmfacc acc0
-; CHECK-NEXT: stxv vs8, 48(r7)
-; CHECK-NEXT: stxv vs9, 32(r7)
-; CHECK-NEXT: stxv vs10, 16(r7)
-; CHECK-NEXT: stxv vs4, 112(r7)
-; CHECK-NEXT: stxv vs5, 96(r7)
-; CHECK-NEXT: stxv vs6, 80(r7)
-; CHECK-NEXT: stxv vs7, 64(r7)
-; CHECK-NEXT: stxv vs0, 176(r7)
-; CHECK-NEXT: stxv vs1, 160(r7)
-; CHECK-NEXT: stxv vs2, 144(r7)
-; CHECK-NEXT: stxv vs3, 128(r7)
+; CHECK-NEXT: stxv vs6, 80(r8)
+; CHECK-NEXT: stxv vs7, 64(r8)
+; CHECK-NEXT: stxv vs0, 176(r8)
+; CHECK-NEXT: stxv vs1, 160(r8)
+; CHECK-NEXT: stxv vs2, 144(r8)
+; CHECK-NEXT: stxv vs3, 128(r8)
; CHECK-NEXT: bdnz .LBB9_2
; CHECK-NEXT: # %bb.3: # %for.cond.cleanup
; CHECK-NEXT: blr
@@ -458,29 +458,29 @@ define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
; CHECK-BE-NEXT: xvf32gerpp acc2, vs0, vs1
; CHECK-BE-NEXT: lxv vs0, 32(r7)
; CHECK-BE-NEXT: lxv vs1, 48(r7)
-; CHECK-BE-NEXT: xxmfacc acc2
; CHECK-BE-NEXT: xvf32gerpn acc1, vs0, vs1
; CHECK-BE-NEXT: lxv vs12, 64(r7)
; CHECK-BE-NEXT: lxv vs13, 80(r7)
-; CHECK-BE-NEXT: rldic r7, r4, 6, 26
; CHECK-BE-NEXT: xxsetaccz acc0
+; CHECK-BE-NEXT: rldic r7, r4, 6, 26
; CHECK-BE-NEXT: addi r4, r4, 3
-; CHECK-BE-NEXT: xxmfacc acc1
+; CHECK-BE-NEXT: add r8, r3, r7
+; CHECK-BE-NEXT: xxmfacc acc2
; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13
; CHECK-BE-NEXT: stxvx vs8, r3, r7
-; CHECK-BE-NEXT: add r7, r3, r7
+; CHECK-BE-NEXT: stxv vs9, 16(r8)
+; CHECK-BE-NEXT: xxmfacc acc1
+; CHECK-BE-NEXT: stxv vs11, 48(r8)
+; CHECK-BE-NEXT: stxv vs10, 32(r8)
+; CHECK-BE-NEXT: stxv vs5, 80(r8)
+; CHECK-BE-NEXT: stxv vs4, 64(r8)
; CHECK-BE-NEXT: xxmfacc acc0
-; CHECK-BE-NEXT: stxv vs9, 16(r7)
-; CHECK-BE-NEXT: stxv vs11, 48(r7)
-; CHECK-BE-NEXT: stxv vs10, 32(r7)
-; CHECK-BE-NEXT: stxv vs5, 80(r7)
-; CHECK-BE-NEXT: stxv vs4, 64(r7)
-; CHECK-BE-NEXT: stxv vs7, 112(r7)
-; CHECK-BE-NEXT: stxv vs6, 96(r7)
-; CHECK-BE-NEXT: stxv vs1, 144(r7)
-; CHECK-BE-NEXT: stxv vs0, 128(r7)
-; CHECK-BE-NEXT: stxv vs3, 176(r7)
-; CHECK-BE-NEXT: stxv vs2, 160(r7)
+; CHECK-BE-NEXT: stxv vs7, 112(r8)
+; CHECK-BE-NEXT: stxv vs6, 96(r8)
+; CHECK-BE-NEXT: stxv vs1, 144(r8)
+; CHECK-BE-NEXT: stxv vs0, 128(r8)
+; CHECK-BE-NEXT: stxv vs3, 176(r8)
+; CHECK-BE-NEXT: stxv vs2, 160(r8)
; CHECK-BE-NEXT: bdnz .LBB9_2
; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup
; CHECK-BE-NEXT: blr
@@ -671,8 +671,8 @@ define void @test_ldst_2(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8
; CHECK-NEXT: lxv vs0, 48(r3)
; CHECK-NEXT: lxv vs3, 0(r3)
; CHECK-NEXT: lxv vs2, 16(r3)
-; CHECK-NEXT: lxvp vsp36, 0(r4)
; CHECK-NEXT: xxmtacc acc0
+; CHECK-NEXT: lxvp vsp36, 0(r4)
; CHECK-NEXT: xvf64gernp acc0, vsp36, v2
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r7)
@@ -687,8 +687,8 @@ define void @test_ldst_2(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
; CHECK-BE-NEXT: lxv vs2, 32(r3)
-; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
; CHECK-BE-NEXT: xxmtacc acc0
+; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r7)
@@ -715,8 +715,8 @@ define void @test_ldst_3(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vp
; CHECK-NEXT: lxv vs0, 48(r3)
; CHECK-NEXT: lxv vs3, 0(r3)
; CHECK-NEXT: lxv vs2, 16(r3)
-; CHECK-NEXT: lxvp vsp36, 0(r5)
; CHECK-NEXT: xxmtacc acc0
+; CHECK-NEXT: lxvp vsp36, 0(r5)
; CHECK-NEXT: xvf64gernp acc0, vsp36, v2
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r9)
@@ -731,8 +731,8 @@ define void @test_ldst_3(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vp
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
; CHECK-BE-NEXT: lxv vs2, 32(r3)
-; CHECK-BE-NEXT: lxvp vsp36, 0(r5)
; CHECK-BE-NEXT: xxmtacc acc0
+; CHECK-BE-NEXT: lxvp vsp36, 0(r5)
; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r9)
diff --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
index a2eeceb09977..a9c060ed6df5 100644
--- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
@@ -13,9 +13,9 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK: # %bb.0:
; CHECK-NEXT: vmr v1, v4
; CHECK-NEXT: vmr v4, v3
-; CHECK-NEXT: ld r3, 96(r1)
; CHECK-NEXT: vmr v0, v2
; CHECK-NEXT: xxlor vs3, v5, v5
+; CHECK-NEXT: ld r3, 96(r1)
; CHECK-NEXT: xxlor vs0, v0, v0
; CHECK-NEXT: xxlor vs1, v1, v1
; CHECK-NEXT: xxlor vs2, v4, v4
@@ -37,9 +37,9 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE: # %bb.0:
; CHECK-BE-NEXT: vmr v1, v4
; CHECK-BE-NEXT: vmr v4, v3
-; CHECK-BE-NEXT: ld r3, 112(r1)
; CHECK-BE-NEXT: vmr v0, v2
; CHECK-BE-NEXT: xxlor vs3, v5, v5
+; CHECK-BE-NEXT: ld r3, 112(r1)
; CHECK-BE-NEXT: xxlor vs0, v0, v0
; CHECK-BE-NEXT: xxlor vs1, v1, v1
; CHECK-BE-NEXT: xxlor vs2, v4, v4
@@ -73,9 +73,9 @@ define void @intrinsics2(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <
; CHECK: # %bb.0:
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: xxlor vs0, v2, v2
; CHECK-NEXT: lxv v4, 0(r5)
; CHECK-NEXT: lxv v5, 0(r6)
-; CHECK-NEXT: xxlor vs0, v2, v2
; CHECK-NEXT: xxlor vs1, v3, v3
; CHECK-NEXT: xxlor vs2, v4, v4
; CHECK-NEXT: xxlor vs3, v5, v5
@@ -97,9 +97,9 @@ define void @intrinsics2(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <
; CHECK-BE: # %bb.0:
; CHECK-BE-NEXT: lxv v2, 0(r3)
; CHECK-BE-NEXT: lxv v3, 0(r4)
+; CHECK-BE-NEXT: xxlor vs0, v2, v2
; CHECK-BE-NEXT: lxv v4, 0(r5)
; CHECK-BE-NEXT: lxv v5, 0(r6)
-; CHECK-BE-NEXT: xxlor vs0, v2, v2
; CHECK-BE-NEXT: xxlor vs1, v3, v3
; CHECK-BE-NEXT: xxlor vs2, v4, v4
; CHECK-BE-NEXT: xxlor vs3, v5, v5
@@ -1406,8 +1406,8 @@ define void @test34(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-NEXT: lxv vs0, 48(r3)
; CHECK-NEXT: lxv vs3, 0(r3)
; CHECK-NEXT: lxv vs2, 16(r3)
-; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v5, 0(r4)
+; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v4, 16(r4)
; CHECK-NEXT: xvf64gerpp acc0, vsp36, v2
; CHECK-NEXT: xxmfacc acc0
@@ -1423,8 +1423,8 @@ define void @test34(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
; CHECK-BE-NEXT: lxv vs2, 32(r3)
-; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v5, 16(r4)
+; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v4, 0(r4)
; CHECK-BE-NEXT: xvf64gerpp acc0, vsp36, v2
; CHECK-BE-NEXT: xxmfacc acc0
@@ -1454,8 +1454,8 @@ define void @test35(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-NEXT: lxv vs0, 48(r3)
; CHECK-NEXT: lxv vs3, 0(r3)
; CHECK-NEXT: lxv vs2, 16(r3)
-; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v5, 0(r4)
+; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v4, 16(r4)
; CHECK-NEXT: xvf64gerpn acc0, vsp36, v2
; CHECK-NEXT: xxmfacc acc0
@@ -1471,8 +1471,8 @@ define void @test35(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
; CHECK-BE-NEXT: lxv vs2, 32(r3)
-; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v5, 16(r4)
+; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v4, 0(r4)
; CHECK-BE-NEXT: xvf64gerpn acc0, vsp36, v2
; CHECK-BE-NEXT: xxmfacc acc0
@@ -1502,8 +1502,8 @@ define void @test36(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-NEXT: lxv vs0, 48(r3)
; CHECK-NEXT: lxv vs3, 0(r3)
; CHECK-NEXT: lxv vs2, 16(r3)
-; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v5, 0(r4)
+; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v4, 16(r4)
; CHECK-NEXT: xvf64gernp acc0, vsp36, v2
; CHECK-NEXT: xxmfacc acc0
@@ -1519,8 +1519,8 @@ define void @test36(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
; CHECK-BE-NEXT: lxv vs2, 32(r3)
-; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v5, 16(r4)
+; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v4, 0(r4)
; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2
; CHECK-BE-NEXT: xxmfacc acc0
@@ -1550,8 +1550,8 @@ define void @test37(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-NEXT: lxv vs0, 48(r3)
; CHECK-NEXT: lxv vs3, 0(r3)
; CHECK-NEXT: lxv vs2, 16(r3)
-; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v5, 0(r4)
+; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v4, 16(r4)
; CHECK-NEXT: xvf64gernn acc0, vsp36, v2
; CHECK-NEXT: xxmfacc acc0
@@ -1567,8 +1567,8 @@ define void @test37(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
; CHECK-BE-NEXT: lxv vs2, 32(r3)
-; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v5, 16(r4)
+; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v4, 0(r4)
; CHECK-BE-NEXT: xvf64gernn acc0, vsp36, v2
; CHECK-BE-NEXT: xxmfacc acc0
@@ -1634,8 +1634,8 @@ define void @test39(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-NEXT: lxv vs0, 48(r3)
; CHECK-NEXT: lxv vs3, 0(r3)
; CHECK-NEXT: lxv vs2, 16(r3)
-; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v5, 0(r4)
+; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v4, 16(r4)
; CHECK-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0
; CHECK-NEXT: xxmfacc acc0
@@ -1651,8 +1651,8 @@ define void @test39(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
; CHECK-BE-NEXT: lxv vs2, 32(r3)
-; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v5, 16(r4)
+; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v4, 0(r4)
; CHECK-BE-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0
; CHECK-BE-NEXT: xxmfacc acc0
@@ -1682,8 +1682,8 @@ define void @test40(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-NEXT: lxv vs0, 48(r3)
; CHECK-NEXT: lxv vs3, 0(r3)
; CHECK-NEXT: lxv vs2, 16(r3)
-; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v5, 0(r4)
+; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v4, 16(r4)
; CHECK-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0
; CHECK-NEXT: xxmfacc acc0
@@ -1699,8 +1699,8 @@ define void @test40(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
; CHECK-BE-NEXT: lxv vs2, 32(r3)
-; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v5, 16(r4)
+; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v4, 0(r4)
; CHECK-BE-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0
; CHECK-BE-NEXT: xxmfacc acc0
@@ -1730,8 +1730,8 @@ define void @test41(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-NEXT: lxv vs0, 48(r3)
; CHECK-NEXT: lxv vs3, 0(r3)
; CHECK-NEXT: lxv vs2, 16(r3)
-; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v5, 0(r4)
+; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v4, 16(r4)
; CHECK-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0
; CHECK-NEXT: xxmfacc acc0
@@ -1747,8 +1747,8 @@ define void @test41(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
; CHECK-BE-NEXT: lxv vs2, 32(r3)
-; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v5, 16(r4)
+; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v4, 0(r4)
; CHECK-BE-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0
; CHECK-BE-NEXT: xxmfacc acc0
@@ -1778,8 +1778,8 @@ define void @test42(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-NEXT: lxv vs0, 48(r3)
; CHECK-NEXT: lxv vs3, 0(r3)
; CHECK-NEXT: lxv vs2, 16(r3)
-; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v5, 0(r4)
+; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxv v4, 16(r4)
; CHECK-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0
; CHECK-NEXT: xxmfacc acc0
@@ -1795,8 +1795,8 @@ define void @test42(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
; CHECK-BE-NEXT: lxv vs0, 0(r3)
; CHECK-BE-NEXT: lxv vs3, 48(r3)
; CHECK-BE-NEXT: lxv vs2, 32(r3)
-; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v5, 16(r4)
+; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxv v4, 0(r4)
; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0
; CHECK-BE-NEXT: xxmfacc acc0
diff --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
index 26cf24382dc5..25fd19e1f3c7 100644
--- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll
@@ -13,13 +13,13 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble
define void @testPHI1(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) {
; CHECK-LABEL: testPHI1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: cmpwi r5, 3
; CHECK-NEXT: xxsetaccz acc0
+; CHECK-NEXT: cmpwi r5, 3
; CHECK-NEXT: blt cr0, .LBB0_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: clrldi r5, r5, 32
; CHECK-NEXT: lxv v2, 0(r4)
; CHECK-NEXT: lxv v3, 16(r4)
+; CHECK-NEXT: clrldi r5, r5, 32
; CHECK-NEXT: addi r4, r4, 32
; CHECK-NEXT: addi r5, r5, -2
; CHECK-NEXT: mtctr r5
@@ -40,13 +40,13 @@ define void @testPHI1(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %Len) {
;
; CHECK-BE-LABEL: testPHI1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: cmpwi r5, 3
; CHECK-BE-NEXT: xxsetaccz acc0
+; CHECK-BE-NEXT: cmpwi r5, 3
; CHECK-BE-NEXT: blt cr0, .LBB0_3
; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-BE-NEXT: clrldi r5, r5, 32
; CHECK-BE-NEXT: lxv v2, 0(r4)
; CHECK-BE-NEXT: lxv v3, 16(r4)
+; CHECK-BE-NEXT: clrldi r5, r5, 32
; CHECK-BE-NEXT: addi r4, r4, 32
; CHECK-BE-NEXT: addi r5, r5, -2
; CHECK-BE-NEXT: mtctr r5
@@ -110,8 +110,8 @@ define dso_local void @testPHI2(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %L
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v2, 0(r4)
; CHECK-NEXT: lxv v3, 16(r4)
-; CHECK-NEXT: lxv vs4, 32(r4)
; CHECK-NEXT: cmpwi r5, 4
+; CHECK-NEXT: lxv vs4, 32(r4)
; CHECK-NEXT: xvf64ger acc0, vsp34, vs4
; CHECK-NEXT: blt cr0, .LBB1_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
@@ -138,8 +138,8 @@ define dso_local void @testPHI2(<16 x i8>* %Dst, <16 x i8>* %Src, i32 signext %L
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lxv v2, 0(r4)
; CHECK-BE-NEXT: lxv v3, 16(r4)
-; CHECK-BE-NEXT: lxv vs4, 32(r4)
; CHECK-BE-NEXT: cmpwi r5, 4
+; CHECK-BE-NEXT: lxv vs4, 32(r4)
; CHECK-BE-NEXT: xvf64ger acc0, vsp34, vs4
; CHECK-BE-NEXT: blt cr0, .LBB1_3
; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader
@@ -273,8 +273,8 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun
; CHECK-NEXT: xvf32gernp acc0, v2, v2
; CHECK-NEXT: bdnz .LBB3_4
; CHECK-NEXT: .LBB3_5: # %for.cond.cleanup
-; CHECK-NEXT: li r3, 0
; CHECK-NEXT: xxmfacc acc0
+; CHECK-NEXT: li r3, 0
; CHECK-NEXT: stxv vs0, 48(r5)
; CHECK-NEXT: stxv vs1, 32(r5)
; CHECK-NEXT: stxv vs2, 16(r5)
@@ -305,8 +305,8 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun
; CHECK-BE-NEXT: xvf32gernp acc0, v2, v2
; CHECK-BE-NEXT: bdnz .LBB3_4
; CHECK-BE-NEXT: .LBB3_5: # %for.cond.cleanup
-; CHECK-BE-NEXT: li r3, 0
; CHECK-BE-NEXT: xxmfacc acc0
+; CHECK-BE-NEXT: li r3, 0
; CHECK-BE-NEXT: stxv vs1, 16(r5)
; CHECK-BE-NEXT: stxv vs0, 0(r5)
; CHECK-BE-NEXT: stxv vs3, 48(r5)
diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
index 6ab9642f92fb..0d9662dc1242 100644
--- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
@@ -10,215 +10,220 @@ target triple = "powerpc64le-unknown-linux-gnu"
define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_type_of_x]* %.x, i32* %.l, <2 x double>* %.vy01, <2 x double>* %.vy02, <2 x double>* %.vy03, <2 x double>* %.vy04, <2 x double>* %.vy05, <2 x double>* %.vy06, <2 x double>* %.vy07, <2 x double>* %.vy08, <2 x double>* %.vy09, <2 x double>* %.vy0a, <2 x double>* %.vy0b, <2 x double>* %.vy0c, <2 x double>* %.vy21, <2 x double>* %.vy22, <2 x double>* %.vy23, <2 x double>* %.vy24, <2 x double>* %.vy25, <2 x double>* %.vy26, <2 x double>* %.vy27, <2 x double>* %.vy28, <2 x double>* %.vy29, <2 x double>* %.vy2a, <2 x double>* %.vy2b, <2 x double>* %.vy2c) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: stdu 1, -576(1)
-; CHECK-NEXT: .cfi_def_cfa_offset 576
-; CHECK-NEXT: .cfi_offset r14, -160
-; CHECK-NEXT: .cfi_offset r15, -152
-; CHECK-NEXT: .cfi_offset r16, -144
-; CHECK-NEXT: .cfi_offset r17, -136
-; CHECK-NEXT: .cfi_offset r18, -128
-; CHECK-NEXT: .cfi_offset r19, -120
-; CHECK-NEXT: .cfi_offset r20, -112
-; CHECK-NEXT: .cfi_offset r21, -104
-; CHECK-NEXT: .cfi_offset r22, -96
-; CHECK-NEXT: .cfi_offset r23, -88
-; CHECK-NEXT: .cfi_offset r24, -80
-; CHECK-NEXT: .cfi_offset r25, -72
-; CHECK-NEXT: .cfi_offset r26, -64
-; CHECK-NEXT: .cfi_offset r27, -56
-; CHECK-NEXT: .cfi_offset r28, -48
-; CHECK-NEXT: .cfi_offset r29, -40
-; CHECK-NEXT: .cfi_offset r30, -32
-; CHECK-NEXT: .cfi_offset r31, -24
+; CHECK-NEXT: stdu 1, -592(1)
+; CHECK-NEXT: .cfi_def_cfa_offset 592
+; CHECK-NEXT: .cfi_offset r14, -192
+; CHECK-NEXT: .cfi_offset r15, -184
+; CHECK-NEXT: .cfi_offset r16, -176
+; CHECK-NEXT: .cfi_offset r17, -168
+; CHECK-NEXT: .cfi_offset r18, -160
+; CHECK-NEXT: .cfi_offset r19, -152
+; CHECK-NEXT: .cfi_offset r20, -144
+; CHECK-NEXT: .cfi_offset r21, -136
+; CHECK-NEXT: .cfi_offset r22, -128
+; CHECK-NEXT: .cfi_offset r23, -120
+; CHECK-NEXT: .cfi_offset r24, -112
+; CHECK-NEXT: .cfi_offset r25, -104
+; CHECK-NEXT: .cfi_offset r26, -96
+; CHECK-NEXT: .cfi_offset r27, -88
+; CHECK-NEXT: .cfi_offset r28, -80
+; CHECK-NEXT: .cfi_offset r29, -72
+; CHECK-NEXT: .cfi_offset r30, -64
+; CHECK-NEXT: .cfi_offset r31, -56
+; CHECK-NEXT: .cfi_offset f26, -48
+; CHECK-NEXT: .cfi_offset f27, -40
+; CHECK-NEXT: .cfi_offset f28, -32
+; CHECK-NEXT: .cfi_offset f29, -24
; CHECK-NEXT: .cfi_offset f30, -16
; CHECK-NEXT: .cfi_offset f31, -8
-; CHECK-NEXT: .cfi_offset v20, -352
-; CHECK-NEXT: .cfi_offset v21, -336
-; CHECK-NEXT: .cfi_offset v22, -320
-; CHECK-NEXT: .cfi_offset v23, -304
-; CHECK-NEXT: .cfi_offset v24, -288
-; CHECK-NEXT: .cfi_offset v25, -272
-; CHECK-NEXT: .cfi_offset v26, -256
-; CHECK-NEXT: .cfi_offset v27, -240
-; CHECK-NEXT: .cfi_offset v28, -224
-; CHECK-NEXT: .cfi_offset v29, -208
-; CHECK-NEXT: .cfi_offset v30, -192
-; CHECK-NEXT: .cfi_offset v31, -176
+; CHECK-NEXT: .cfi_offset v20, -384
+; CHECK-NEXT: .cfi_offset v21, -368
+; CHECK-NEXT: .cfi_offset v22, -352
+; CHECK-NEXT: .cfi_offset v23, -336
+; CHECK-NEXT: .cfi_offset v24, -320
+; CHECK-NEXT: .cfi_offset v25, -304
+; CHECK-NEXT: .cfi_offset v26, -288
+; CHECK-NEXT: .cfi_offset v27, -272
+; CHECK-NEXT: .cfi_offset v28, -256
+; CHECK-NEXT: .cfi_offset v29, -240
+; CHECK-NEXT: .cfi_offset v30, -224
+; CHECK-NEXT: .cfi_offset v31, -208
; CHECK-NEXT: lwz 4, 0(4)
-; CHECK-NEXT: std 14, 416(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 15, 424(1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv 52, 224(1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv 53, 240(1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv 54, 256(1) # 16-byte Folded Spill
-; CHECK-NEXT: std 16, 432(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 17, 440(1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv 55, 272(1) # 16-byte Folded Spill
-; CHECK-NEXT: std 18, 448(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 19, 456(1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv 56, 288(1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv 57, 304(1) # 16-byte Folded Spill
-; CHECK-NEXT: std 20, 464(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 21, 472(1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv 58, 320(1) # 16-byte Folded Spill
-; CHECK-NEXT: std 22, 480(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 23, 488(1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv 59, 336(1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv 60, 352(1) # 16-byte Folded Spill
-; CHECK-NEXT: std 24, 496(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 25, 504(1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv 61, 368(1) # 16-byte Folded Spill
-; CHECK-NEXT: std 26, 512(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 27, 520(1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv 62, 384(1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv 63, 400(1) # 16-byte Folded Spill
-; CHECK-NEXT: std 28, 528(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 29, 536(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill
; CHECK-NEXT: cmpwi 4, 1
-; CHECK-NEXT: std 30, 544(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 31, 552(1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd 30, 560(1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd 31, 568(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill
; CHECK-NEXT: blt 0, .LBB0_7
; CHECK-NEXT: # %bb.1: # %_loop_1_do_.lr.ph
-; CHECK-NEXT: mr 23, 5
+; CHECK-NEXT: mr 22, 5
; CHECK-NEXT: lwz 5, 0(3)
; CHECK-NEXT: cmpwi 5, 1
; CHECK-NEXT: blt 0, .LBB0_7
; CHECK-NEXT: # %bb.2: # %_loop_1_do_.preheader
+; CHECK-NEXT: mr 14, 6
+; CHECK-NEXT: ld 6, 712(1)
+; CHECK-NEXT: lwa 3, 0(7)
; CHECK-NEXT: addi 5, 5, 1
-; CHECK-NEXT: li 20, 9
+; CHECK-NEXT: std 8, 40(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 9, 48(1) # 8-byte Folded Spill
+; CHECK-NEXT: mr 11, 10
+; CHECK-NEXT: cmpldi 5, 9
+; CHECK-NEXT: lxv 4, 0(8)
+; CHECK-NEXT: ld 8, 696(1)
+; CHECK-NEXT: ld 10, 736(1)
; CHECK-NEXT: ld 28, 824(1)
-; CHECK-NEXT: ld 19, 712(1)
-; CHECK-NEXT: lwa 3, 0(7)
-; CHECK-NEXT: ld 7, 784(1)
-; CHECK-NEXT: ld 12, 776(1)
-; CHECK-NEXT: ld 11, 768(1)
-; CHECK-NEXT: ld 2, 760(1)
+; CHECK-NEXT: std 6, 88(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 10, 96(1) # 8-byte Folded Spill
+; CHECK-NEXT: lxv 0, 0(6)
+; CHECK-NEXT: li 6, 9
+; CHECK-NEXT: ld 7, 688(1)
+; CHECK-NEXT: ld 27, 840(1)
; CHECK-NEXT: ld 29, 832(1)
-; CHECK-NEXT: cmpldi 5, 9
-; CHECK-NEXT: ld 27, 816(1)
-; CHECK-NEXT: ld 26, 808(1)
-; CHECK-NEXT: ld 25, 800(1)
-; CHECK-NEXT: ld 24, 792(1)
-; CHECK-NEXT: iselgt 5, 5, 20
-; CHECK-NEXT: ld 30, 752(1)
-; CHECK-NEXT: ld 22, 744(1)
-; CHECK-NEXT: ld 21, 736(1)
-; CHECK-NEXT: ld 20, 728(1)
-; CHECK-NEXT: ld 18, 704(1)
-; CHECK-NEXT: ld 17, 696(1)
-; CHECK-NEXT: ld 16, 688(1)
-; CHECK-NEXT: ld 14, 680(1)
-; CHECK-NEXT: sldi 0, 3, 2
-; CHECK-NEXT: std 5, 216(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 28, 208(1) # 8-byte Folded Spill
-; CHECK-NEXT: mr 5, 4
-; CHECK-NEXT: ld 4, 720(1)
-; CHECK-NEXT: std 19, 96(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 4, 104(1) # 8-byte Folded Spill
-; CHECK-NEXT: lxv 11, 0(4)
-; CHECK-NEXT: mr 4, 5
-; CHECK-NEXT: ld 5, 216(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 15, 672(1)
-; CHECK-NEXT: sldi 31, 3, 1
+; CHECK-NEXT: ld 26, 816(1)
+; CHECK-NEXT: ld 25, 808(1)
+; CHECK-NEXT: ld 24, 800(1)
+; CHECK-NEXT: ld 23, 792(1)
; CHECK-NEXT: std 8, 32(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 9, 40(1) # 8-byte Folded Spill
-; CHECK-NEXT: lxv 43, 0(8)
-; CHECK-NEXT: mr 8, 6
+; CHECK-NEXT: sldi 0, 3, 1
+; CHECK-NEXT: sldi 31, 3, 2
+; CHECK-NEXT: std 28, 184(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 29, 192(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 25, 168(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 26, 176(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 23, 152(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 24, 160(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 27, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT: iselgt 5, 5, 6
; CHECK-NEXT: sldi 6, 3, 3
-; CHECK-NEXT: std 2, 144(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 11, 152(1) # 8-byte Folded Spill
-; CHECK-NEXT: lxv 3, 0(2)
-; CHECK-NEXT: lxv 2, 0(11)
-; CHECK-NEXT: lxv 0, 0(7)
-; CHECK-NEXT: add 6, 6, 23
-; CHECK-NEXT: lxv 7, 0(28)
-; CHECK-NEXT: add 28, 3, 31
-; CHECK-NEXT: lxv 42, 0(9)
-; CHECK-NEXT: lxv 41, 0(10)
-; CHECK-NEXT: lxv 40, 0(15)
-; CHECK-NEXT: lxv 39, 0(14)
-; CHECK-NEXT: lxv 38, 0(16)
-; CHECK-NEXT: lxv 33, 0(17)
-; CHECK-NEXT: lxv 37, 0(18)
-; CHECK-NEXT: lxv 13, 0(19)
-; CHECK-NEXT: lxv 10, 0(20)
-; CHECK-NEXT: lxv 8, 0(21)
-; CHECK-NEXT: lxv 6, 0(22)
-; CHECK-NEXT: lxv 4, 0(30)
-; CHECK-NEXT: lxv 1, 0(12)
-; CHECK-NEXT: lxv 32, 0(24)
-; CHECK-NEXT: lxv 36, 0(25)
-; CHECK-NEXT: lxv 12, 0(26)
-; CHECK-NEXT: lxv 9, 0(27)
-; CHECK-NEXT: lxv 5, 0(29)
+; CHECK-NEXT: ld 21, 784(1)
+; CHECK-NEXT: ld 20, 776(1)
+; CHECK-NEXT: ld 19, 768(1)
+; CHECK-NEXT: ld 18, 760(1)
+; CHECK-NEXT: std 18, 120(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 19, 128(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 20, 136(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 21, 144(1) # 8-byte Folded Spill
+; CHECK-NEXT: add 2, 6, 22
+; CHECK-NEXT: ld 17, 752(1)
+; CHECK-NEXT: ld 16, 744(1)
+; CHECK-NEXT: lxv 3, 0(9)
+; CHECK-NEXT: ld 6, 728(1)
; CHECK-NEXT: addi 5, 5, -2
+; CHECK-NEXT: std 7, 80(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 6, 72(1) # 8-byte Folded Spill
+; CHECK-NEXT: ld 15, 720(1)
+; CHECK-NEXT: ld 9, 704(1)
+; CHECK-NEXT: lxv 43, 0(8)
+; CHECK-NEXT: ld 8, 848(1)
+; CHECK-NEXT: std 11, 56(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 15, 64(1) # 8-byte Folded Spill
+; CHECK-NEXT: lxv 2, 0(11)
; CHECK-NEXT: sldi 11, 3, 4
-; CHECK-NEXT: std 12, 160(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 7, 168(1) # 8-byte Folded Spill
-; CHECK-NEXT: add 7, 3, 0
-; CHECK-NEXT: add 12, 11, 23
-; CHECK-NEXT: addi 11, 6, 32
-; CHECK-NEXT: addi 12, 12, 32
-; CHECK-NEXT: std 22, 128(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 30, 136(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 26, 192(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 27, 200(1) # 8-byte Folded Spill
-; CHECK-NEXT: mulli 26, 3, 48
-; CHECK-NEXT: mulli 22, 3, 6
-; CHECK-NEXT: sldi 6, 7, 3
-; CHECK-NEXT: add 30, 23, 6
-; CHECK-NEXT: std 29, 216(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 24, 176(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 25, 184(1) # 8-byte Folded Spill
-; CHECK-NEXT: li 25, 1
-; CHECK-NEXT: li 24, 0
-; CHECK-NEXT: std 10, 48(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 15, 56(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 14, 64(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 16, 72(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 17, 80(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 18, 88(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 20, 112(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 21, 120(1) # 8-byte Folded Spill
; CHECK-NEXT: rldicl 5, 5, 61, 3
+; CHECK-NEXT: lxv 1, 0(7)
+; CHECK-NEXT: add 7, 3, 31
+; CHECK-NEXT: add 12, 11, 22
+; CHECK-NEXT: addi 11, 2, 32
; CHECK-NEXT: addi 2, 5, 1
+; CHECK-NEXT: lxv 6, 0(28)
; CHECK-NEXT: sldi 5, 3, 5
-; CHECK-NEXT: add 29, 23, 5
+; CHECK-NEXT: add 28, 3, 0
+; CHECK-NEXT: lxv 42, 0(9)
+; CHECK-NEXT: lxv 41, 0(15)
+; CHECK-NEXT: lxv 40, 0(6)
+; CHECK-NEXT: lxv 39, 0(10)
+; CHECK-NEXT: lxv 38, 0(16)
+; CHECK-NEXT: sldi 30, 7, 3
+; CHECK-NEXT: addi 12, 12, 32
+; CHECK-NEXT: add 30, 22, 30
+; CHECK-NEXT: std 16, 104(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 17, 112(1) # 8-byte Folded Spill
+; CHECK-NEXT: lxv 33, 0(17)
+; CHECK-NEXT: lxv 32, 0(18)
+; CHECK-NEXT: lxv 37, 0(19)
+; CHECK-NEXT: lxv 36, 0(20)
+; CHECK-NEXT: lxv 13, 0(21)
+; CHECK-NEXT: lxv 12, 0(23)
+; CHECK-NEXT: li 23, 0
+; CHECK-NEXT: lxv 11, 0(24)
+; CHECK-NEXT: li 24, 1
+; CHECK-NEXT: lxv 9, 0(25)
+; CHECK-NEXT: mulli 25, 3, 6
+; CHECK-NEXT: lxv 8, 0(26)
+; CHECK-NEXT: mulli 26, 3, 48
+; CHECK-NEXT: lxv 5, 0(29)
+; CHECK-NEXT: add 29, 22, 5
; CHECK-NEXT: sldi 5, 28, 3
-; CHECK-NEXT: add 27, 23, 5
-; CHECK-NEXT: mr 5, 23
+; CHECK-NEXT: lxv 7, 0(27)
+; CHECK-NEXT: add 27, 22, 5
+; CHECK-NEXT: mr 5, 22
+; CHECK-NEXT: lxv 10, 0(8)
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_4 Depth 2
-; CHECK-NEXT: maddld 6, 22, 24, 7
-; CHECK-NEXT: maddld 20, 22, 24, 0
+; CHECK-NEXT: maddld 6, 25, 23, 7
; CHECK-NEXT: mtctr 2
; CHECK-NEXT: sldi 6, 6, 3
-; CHECK-NEXT: add 21, 23, 6
-; CHECK-NEXT: sldi 6, 20, 3
-; CHECK-NEXT: add 20, 23, 6
-; CHECK-NEXT: maddld 6, 22, 24, 28
+; CHECK-NEXT: add 21, 22, 6
+; CHECK-NEXT: maddld 6, 25, 23, 31
+; CHECK-NEXT: sldi 6, 6, 3
+; CHECK-NEXT: add 20, 22, 6
+; CHECK-NEXT: maddld 6, 25, 23, 28
; CHECK-NEXT: sldi 6, 6, 3
-; CHECK-NEXT: add 19, 23, 6
-; CHECK-NEXT: maddld 6, 22, 24, 31
+; CHECK-NEXT: add 19, 22, 6
+; CHECK-NEXT: maddld 6, 25, 23, 0
; CHECK-NEXT: sldi 6, 6, 3
-; CHECK-NEXT: add 18, 23, 6
-; CHECK-NEXT: maddld 6, 22, 24, 3
+; CHECK-NEXT: add 18, 22, 6
+; CHECK-NEXT: maddld 6, 25, 23, 3
; CHECK-NEXT: sldi 6, 6, 3
-; CHECK-NEXT: add 17, 23, 6
-; CHECK-NEXT: mulld 6, 22, 24
+; CHECK-NEXT: add 17, 22, 6
+; CHECK-NEXT: mulld 6, 25, 23
; CHECK-NEXT: sldi 6, 6, 3
-; CHECK-NEXT: add 16, 23, 6
-; CHECK-NEXT: mr 6, 8
+; CHECK-NEXT: add 16, 22, 6
+; CHECK-NEXT: mr 6, 14
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_4: # %_loop_2_do_
; CHECK-NEXT: # Parent Loop BB0_3 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lxvp 34, 0(6)
; CHECK-NEXT: lxvp 44, 0(16)
+; CHECK-NEXT: xvmaddadp 4, 45, 35
; CHECK-NEXT: lxvp 46, 0(17)
+; CHECK-NEXT: xvmaddadp 3, 47, 35
; CHECK-NEXT: lxvp 48, 0(18)
; CHECK-NEXT: lxvp 50, 0(19)
; CHECK-NEXT: lxvp 62, 0(20)
@@ -228,135 +233,135 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
; CHECK-NEXT: lxvp 54, 32(17)
; CHECK-NEXT: lxvp 52, 32(18)
; CHECK-NEXT: lxvp 30, 32(19)
+; CHECK-NEXT: lxvp 28, 32(20)
+; CHECK-NEXT: lxvp 26, 32(21)
+; CHECK-NEXT: xvmaddadp 2, 49, 35
+; CHECK-NEXT: xvmaddadp 1, 51, 35
+; CHECK-NEXT: xvmaddadp 43, 63, 35
+; CHECK-NEXT: xvmaddadp 42, 61, 35
+; CHECK-NEXT: xvmaddadp 0, 44, 34
+; CHECK-NEXT: xvmaddadp 41, 46, 34
+; CHECK-NEXT: xvmaddadp 40, 48, 34
+; CHECK-NEXT: xvmaddadp 39, 50, 34
+; CHECK-NEXT: xvmaddadp 38, 62, 34
+; CHECK-NEXT: xvmaddadp 33, 60, 34
+; CHECK-NEXT: xvmaddadp 32, 57, 59
+; CHECK-NEXT: xvmaddadp 37, 55, 59
+; CHECK-NEXT: xvmaddadp 36, 53, 59
+; CHECK-NEXT: xvmaddadp 13, 31, 59
+; CHECK-NEXT: xvmaddadp 12, 29, 59
+; CHECK-NEXT: xvmaddadp 11, 27, 59
+; CHECK-NEXT: xvmaddadp 9, 56, 58
+; CHECK-NEXT: xvmaddadp 8, 54, 58
+; CHECK-NEXT: xvmaddadp 6, 52, 58
+; CHECK-NEXT: xvmaddadp 5, 30, 58
+; CHECK-NEXT: xvmaddadp 7, 28, 58
+; CHECK-NEXT: xvmaddadp 10, 26, 58
; CHECK-NEXT: addi 6, 6, 64
; CHECK-NEXT: addi 16, 16, 64
; CHECK-NEXT: addi 17, 17, 64
; CHECK-NEXT: addi 18, 18, 64
; CHECK-NEXT: addi 19, 19, 64
-; CHECK-NEXT: xvmaddadp 43, 45, 35
-; CHECK-NEXT: xvmaddadp 42, 47, 35
-; CHECK-NEXT: xvmaddadp 41, 49, 35
-; CHECK-NEXT: xvmaddadp 40, 51, 35
-; CHECK-NEXT: xvmaddadp 39, 63, 35
-; CHECK-NEXT: xvmaddadp 38, 61, 35
-; CHECK-NEXT: xvmaddadp 33, 44, 34
-; CHECK-NEXT: xvmaddadp 37, 46, 34
-; CHECK-NEXT: xvmaddadp 13, 48, 34
-; CHECK-NEXT: xvmaddadp 11, 50, 34
-; CHECK-NEXT: xvmaddadp 10, 62, 34
-; CHECK-NEXT: xvmaddadp 8, 60, 34
-; CHECK-NEXT: lxvp 34, 32(20)
-; CHECK-NEXT: lxvp 44, 32(21)
; CHECK-NEXT: addi 20, 20, 64
; CHECK-NEXT: addi 21, 21, 64
-; CHECK-NEXT: xvmaddadp 6, 57, 59
-; CHECK-NEXT: xvmaddadp 4, 55, 59
-; CHECK-NEXT: xvmaddadp 3, 53, 59
-; CHECK-NEXT: xvmaddadp 2, 31, 59
-; CHECK-NEXT: xvmaddadp 32, 56, 58
-; CHECK-NEXT: xvmaddadp 36, 54, 58
-; CHECK-NEXT: xvmaddadp 12, 52, 58
-; CHECK-NEXT: xvmaddadp 9, 30, 58
-; CHECK-NEXT: xvmaddadp 1, 35, 59
-; CHECK-NEXT: xvmaddadp 0, 45, 59
-; CHECK-NEXT: xvmaddadp 7, 34, 58
-; CHECK-NEXT: xvmaddadp 5, 44, 58
; CHECK-NEXT: bdnz .LBB0_4
; CHECK-NEXT: # %bb.5: # %_loop_2_endl_
; CHECK-NEXT: #
-; CHECK-NEXT: addi 25, 25, 6
+; CHECK-NEXT: addi 24, 24, 6
; CHECK-NEXT: add 5, 5, 26
; CHECK-NEXT: add 11, 11, 26
; CHECK-NEXT: add 30, 30, 26
; CHECK-NEXT: add 12, 12, 26
; CHECK-NEXT: add 29, 29, 26
; CHECK-NEXT: add 27, 27, 26
-; CHECK-NEXT: addi 24, 24, 1
-; CHECK-NEXT: cmpld 25, 4
+; CHECK-NEXT: addi 23, 23, 1
+; CHECK-NEXT: cmpld 24, 4
; CHECK-NEXT: ble 0, .LBB0_3
; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit
-; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 43, 0(3)
; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 42, 0(3)
+; CHECK-NEXT: stxv 4, 0(3)
; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 41, 0(3)
+; CHECK-NEXT: stxv 3, 0(3)
; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 40, 0(3)
-; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 39, 0(3)
-; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 38, 0(3)
+; CHECK-NEXT: stxv 2, 0(3)
; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 33, 0(3)
+; CHECK-NEXT: stxv 1, 0(3)
+; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 43, 0(3)
; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 37, 0(3)
+; CHECK-NEXT: stxv 42, 0(9)
+; CHECK-NEXT: stxv 0, 0(3)
+; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 41, 0(3)
+; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 40, 0(3)
; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 13, 0(3)
+; CHECK-NEXT: stxv 39, 0(3)
; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 11, 0(3)
+; CHECK-NEXT: stxv 38, 0(3)
; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 10, 0(3)
+; CHECK-NEXT: stxv 33, 0(3)
; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 8, 0(3)
+; CHECK-NEXT: stxv 32, 0(3)
; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 6, 0(3)
+; CHECK-NEXT: stxv 37, 0(3)
; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 4, 0(3)
+; CHECK-NEXT: stxv 36, 0(3)
; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 3, 0(3)
+; CHECK-NEXT: stxv 13, 0(3)
; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 2, 0(3)
+; CHECK-NEXT: stxv 12, 0(3)
; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 1, 0(3)
+; CHECK-NEXT: stxv 11, 0(3)
; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 0, 0(3)
+; CHECK-NEXT: stxv 9, 0(3)
; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 32, 0(3)
+; CHECK-NEXT: stxv 8, 0(3)
; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 36, 0(3)
+; CHECK-NEXT: stxv 6, 0(3)
; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 12, 0(3)
+; CHECK-NEXT: stxv 5, 0(3)
; CHECK-NEXT: ld 3, 200(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 9, 0(3)
-; CHECK-NEXT: ld 3, 208(1) # 8-byte Folded Reload
; CHECK-NEXT: stxv 7, 0(3)
-; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 5, 0(3)
+; CHECK-NEXT: stxv 10, 0(8)
; CHECK-NEXT: .LBB0_7: # %_return_bb
-; CHECK-NEXT: lxv 63, 400(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 62, 384(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 61, 368(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 60, 352(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 59, 336(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 58, 320(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 57, 304(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 56, 288(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 55, 272(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 54, 256(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 53, 240(1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv 52, 224(1) # 16-byte Folded Reload
-; CHECK-NEXT: lfd 31, 568(1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd 30, 560(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 31, 552(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 30, 544(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 29, 536(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 28, 528(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 27, 520(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 26, 512(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 25, 504(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 24, 496(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 23, 488(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 22, 480(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 21, 472(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 20, 464(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 19, 456(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 18, 448(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 17, 440(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 16, 432(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 15, 424(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 14, 416(1) # 8-byte Folded Reload
-; CHECK-NEXT: addi 1, 1, 576
+; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 60, 336(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 59, 320(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 58, 304(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 57, 288(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 56, 272(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 55, 256(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload
+; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 29, 568(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 28, 560(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 27, 552(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 26, 544(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 31, 536(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 30, 528(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 23, 472(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 22, 464(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 21, 456(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 20, 448(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 19, 440(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload
+; CHECK-NEXT: addi 1, 1, 592
; CHECK-NEXT: blr
entry:
%_val_l_ = load i32, i32* %.l, align 4
diff --git a/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll b/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll
index ac922d867354..3616fb6036b7 100644
--- a/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll
@@ -26,34 +26,34 @@ define dso_local signext i32 @test_FI_elim([40 x i8]* noalias nocapture derefere
; CHECK-NEXT: stdu r1, -80(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 80
; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: mr r9, r6
; CHECK-NEXT: mr r6, r5
-; CHECK-NEXT: li r5, 3
-; CHECK-NEXT: li r10, -127
-; CHECK-NEXT: lxv v2, 0(r3)
-; CHECK-NEXT: stb r5, 0(0)
-; CHECK-NEXT: stb r10, 0(r3)
-; CHECK-NEXT: stb r5, 0(r3)
-; CHECK-NEXT: lbz r5, 2(r7)
-; CHECK-NEXT: li r2, 1
-; CHECK-NEXT: stb r10, 0(r3)
-; CHECK-NEXT: pstxv v2, 64(r1), 0
-; CHECK-NEXT: vaddudm v3, v2, v2
-; CHECK-NEXT: mfvsrd r11, v2
; CHECK-NEXT: li r0, 4
+; CHECK-NEXT: li r11, 3
+; CHECK-NEXT: std r0, 0(r3)
+; CHECK-NEXT: stb r11, 0(0)
+; CHECK-NEXT: li r12, -127
+; CHECK-NEXT: stb r12, 0(r3)
+; CHECK-NEXT: li r2, 1
+; CHECK-NEXT: stb r11, 0(r3)
+; CHECK-NEXT: stb r12, 0(r3)
; CHECK-NEXT: stw r2, 0(r3)
+; CHECK-NEXT: mfvsrd r5, v2
+; CHECK-NEXT: vaddudm v3, v2, v2
+; CHECK-NEXT: pstxv v2, 64(r1), 0
+; CHECK-NEXT: neg r5, r5
+; CHECK-NEXT: mfvsrd r10, v3
+; CHECK-NEXT: std r5, 0(r3)
+; CHECK-NEXT: lbz r5, 2(r7)
; CHECK-NEXT: mr r7, r9
+; CHECK-NEXT: neg r10, r10
+; CHECK-NEXT: std r2, 0(r3)
; CHECK-NEXT: std r0, 0(r3)
+; CHECK-NEXT: std r10, 0(r3)
; CHECK-NEXT: rlwinm r5, r5, 0, 27, 27
-; CHECK-NEXT: mfvsrd r12, v3
-; CHECK-NEXT: neg r11, r11
; CHECK-NEXT: stb r5, 0(0)
; CHECK-NEXT: lbz r5, 2(r8)
-; CHECK-NEXT: neg r12, r12
-; CHECK-NEXT: std r11, 0(r3)
-; CHECK-NEXT: std r2, 0(r3)
-; CHECK-NEXT: std r0, 0(r3)
-; CHECK-NEXT: std r12, 0(r3)
; CHECK-NEXT: rlwinm r5, r5, 0, 27, 27
; CHECK-NEXT: stb r5, 0(r3)
; CHECK-NEXT: li r5, 2
@@ -74,36 +74,36 @@ define dso_local signext i32 @test_FI_elim([40 x i8]* noalias nocapture derefere
; CHECK-BE-NEXT: .cfi_def_cfa_offset 176
; CHECK-BE-NEXT: .cfi_offset lr, 16
; CHECK-BE-NEXT: .cfi_offset r30, -16
+; CHECK-BE-NEXT: lxv v2, 0(r3)
; CHECK-BE-NEXT: mr r9, r6
; CHECK-BE-NEXT: mr r6, r5
-; CHECK-BE-NEXT: li r5, 3
-; CHECK-BE-NEXT: li r11, -127
-; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: li r0, 4
+; CHECK-BE-NEXT: li r11, 3
; CHECK-BE-NEXT: std r30, 160(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: pstxv v2, 144(r1), 0
-; CHECK-BE-NEXT: stb r5, 0(0)
-; CHECK-BE-NEXT: stb r11, 0(r3)
-; CHECK-BE-NEXT: stb r5, 0(r3)
-; CHECK-BE-NEXT: lbz r5, 2(r7)
-; CHECK-BE-NEXT: vaddudm v3, v2, v2
-; CHECK-BE-NEXT: mfvsrld r10, v2
+; CHECK-BE-NEXT: std r0, 0(r3)
+; CHECK-BE-NEXT: stb r11, 0(0)
+; CHECK-BE-NEXT: li r12, -127
+; CHECK-BE-NEXT: stb r12, 0(r3)
; CHECK-BE-NEXT: li r30, 1
; CHECK-BE-NEXT: stb r11, 0(r3)
-; CHECK-BE-NEXT: li r0, 4
+; CHECK-BE-NEXT: stb r12, 0(r3)
+; CHECK-BE-NEXT: mfvsrld r5, v2
+; CHECK-BE-NEXT: vaddudm v3, v2, v2
; CHECK-BE-NEXT: stw r30, 0(r3)
+; CHECK-BE-NEXT: pstxv v2, 144(r1), 0
+; CHECK-BE-NEXT: mfvsrld r10, v3
+; CHECK-BE-NEXT: neg r5, r5
+; CHECK-BE-NEXT: std r5, 0(r3)
+; CHECK-BE-NEXT: lbz r5, 2(r7)
; CHECK-BE-NEXT: mr r7, r9
+; CHECK-BE-NEXT: neg r10, r10
+; CHECK-BE-NEXT: std r30, 0(r3)
; CHECK-BE-NEXT: std r0, 0(r3)
+; CHECK-BE-NEXT: std r10, 0(r3)
; CHECK-BE-NEXT: rlwinm r5, r5, 0, 27, 27
-; CHECK-BE-NEXT: mfvsrld r12, v3
; CHECK-BE-NEXT: stb r5, 0(0)
; CHECK-BE-NEXT: lbz r5, 2(r8)
-; CHECK-BE-NEXT: neg r10, r10
-; CHECK-BE-NEXT: neg r12, r12
-; CHECK-BE-NEXT: std r10, 0(r3)
-; CHECK-BE-NEXT: std r30, 0(r3)
-; CHECK-BE-NEXT: std r0, 0(r3)
; CHECK-BE-NEXT: rlwinm r5, r5, 0, 27, 27
-; CHECK-BE-NEXT: std r12, 0(r3)
; CHECK-BE-NEXT: stb r5, 0(r3)
; CHECK-BE-NEXT: li r5, 2
; CHECK-BE-NEXT: stw r5, 0(r3)
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
index cb7bf5124816..27b7d2d47ebe 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
@@ -82,17 +82,17 @@ define dso_local double @P10_Spill_CR_EQ(%2* %arg) local_unnamed_addr #0 {
; CHECK-NEXT: .LBB0_12: # %bb40
; CHECK-NEXT: mcrf cr6, cr4
; CHECK-NEXT: crnot 4*cr4+eq, 4*cr4+eq
-; CHECK-NEXT: crand 4*cr4+lt, 4*cr7+lt, 4*cr2+un
+; CHECK-NEXT: crand 4*cr4+gt, 4*cr7+lt, 4*cr2+un
+; CHECK-NEXT: crand 4*cr4+lt, 4*cr1+lt, 4*cr5+lt
; CHECK-NEXT: # implicit-def: $x6
-; CHECK-NEXT: crand 4*cr4+gt, 4*cr1+lt, 4*cr5+lt
-; CHECK-NEXT: bc 4, 4*cr4+gt, .LBB0_14
+; CHECK-NEXT: bc 4, 4*cr4+lt, .LBB0_14
; CHECK-NEXT: # %bb.13: # %bb48
; CHECK-NEXT: ld r6, 0(r3)
; CHECK-NEXT: .LBB0_14: # %bb50
; CHECK-NEXT: cmpwi r5, -1
; CHECK-NEXT: crand 4*cr4+un, 4*cr3+lt, 4*cr4+eq
; CHECK-NEXT: # implicit-def: $r5
-; CHECK-NEXT: bc 4, 4*cr4+lt, .LBB0_16
+; CHECK-NEXT: bc 4, 4*cr4+gt, .LBB0_16
; CHECK-NEXT: # %bb.15: # %bb52
; CHECK-NEXT: lwz r5, 0(r3)
; CHECK-NEXT: .LBB0_16: # %bb54
@@ -111,18 +111,18 @@ define dso_local double @P10_Spill_CR_EQ(%2* %arg) local_unnamed_addr #0 {
; CHECK-NEXT: cmpwi cr1, r5, 1
; CHECK-NEXT: crand lt, gt, 4*cr4+eq
; CHECK-NEXT: # implicit-def: $x5
+; CHECK-NEXT: crand 4*cr4+eq, 4*cr3+eq, 4*cr4+eq
; CHECK-NEXT: setnbc r8, 4*cr5+gt
; CHECK-NEXT: crand 4*cr5+lt, 4*cr2+eq, 4*cr5+lt
-; CHECK-NEXT: crand 4*cr4+eq, 4*cr3+eq, 4*cr4+eq
-; CHECK-NEXT: crand gt, 4*cr1+lt, 4*cr4+lt
+; CHECK-NEXT: crand gt, 4*cr1+lt, 4*cr4+gt
; CHECK-NEXT: stw r8, -24(r1)
; CHECK-NEXT: setnbc r8, 4*cr5+lt
; CHECK-NEXT: cmpwi cr5, r7, 1
; CHECK-NEXT: stw r8, -28(r1)
-; CHECK-NEXT: lwz r6, 92(r6)
; CHECK-NEXT: crand eq, 4*cr5+lt, 4*cr4+un
+; CHECK-NEXT: lwz r6, 92(r6)
; CHECK-NEXT: cmpwi cr6, r6, 1
-; CHECK-NEXT: crand un, 4*cr6+lt, 4*cr4+gt
+; CHECK-NEXT: crand un, 4*cr6+lt, 4*cr4+lt
; CHECK-NEXT: bc 4, gt, .LBB0_20
; CHECK-NEXT: # %bb.19: # %bb68
; CHECK-NEXT: ld r5, 0(r3)
@@ -134,8 +134,8 @@ define dso_local double @P10_Spill_CR_EQ(%2* %arg) local_unnamed_addr #0 {
; CHECK-NEXT: rlwimi r6, r7, 12, 20, 20
; CHECK-NEXT: mtocrf 4, r6
; CHECK-NEXT: ld r6, 0(r3)
-; CHECK-NEXT: crandc 4*cr5+gt, lt, 4*cr3+eq
; CHECK-NEXT: lwz r8, -16(r1)
+; CHECK-NEXT: crandc 4*cr5+gt, lt, 4*cr3+eq
; CHECK-NEXT: # implicit-def: $cr5eq
; CHECK-NEXT: crandc 4*cr5+lt, 4*cr5+lt, 4*cr7+eq
; CHECK-NEXT: mfocrf r7, 4
@@ -156,32 +156,32 @@ define dso_local double @P10_Spill_CR_EQ(%2* %arg) local_unnamed_addr #0 {
; CHECK-NEXT: setbc r5, 4*cr5+un
; CHECK-NEXT: # implicit-def: $cr5un
; CHECK-NEXT: mfocrf r8, 4
-; CHECK-NEXT: add r5, r7, r5
; CHECK-NEXT: rlwimi r8, r9, 9, 23, 23
; CHECK-NEXT: lwz r9, -4(r1)
+; CHECK-NEXT: add r5, r7, r5
; CHECK-NEXT: mtocrf 4, r8
-; CHECK-NEXT: mtocrf 128, r9
-; CHECK-NEXT: lwz r9, -8(r1)
; CHECK-NEXT: isel r3, 0, r3, 4*cr5+lt
; CHECK-NEXT: setbc r8, 4*cr5+un
; CHECK-NEXT: isel r6, 0, r6, 4*cr5+gt
; CHECK-NEXT: isel r4, 0, r4, 4*cr5+eq
+; CHECK-NEXT: mtocrf 128, r9
+; CHECK-NEXT: lwz r9, -8(r1)
; CHECK-NEXT: add r5, r8, r5
; CHECK-NEXT: iseleq r3, 0, r3
; CHECK-NEXT: mtfprd f0, r5
+; CHECK-NEXT: xscvsxddp f0, f0
; CHECK-NEXT: mtocrf 128, r9
; CHECK-NEXT: lwz r9, -12(r1)
; CHECK-NEXT: lwz r12, 8(r1)
-; CHECK-NEXT: xscvsxddp f0, f0
; CHECK-NEXT: iseleq r6, 0, r6
-; CHECK-NEXT: mtocrf 128, r9
; CHECK-NEXT: add r3, r6, r3
+; CHECK-NEXT: xsmuldp f0, f0, f2
+; CHECK-NEXT: mtocrf 128, r9
; CHECK-NEXT: mtocrf 32, r12
; CHECK-NEXT: mtocrf 16, r12
; CHECK-NEXT: mtocrf 8, r12
; CHECK-NEXT: iseleq r4, 0, r4
; CHECK-NEXT: add r3, r4, r3
-; CHECK-NEXT: xsmuldp f0, f0, f2
; CHECK-NEXT: mtfprd f1, r3
; CHECK-NEXT: xscvsxddp f1, f1
; CHECK-NEXT: xsadddp f1, f0, f1
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
index b4166bde22ab..4c4d9c9a046d 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
@@ -17,10 +17,9 @@
define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-LABEL: P10_Spill_CR_GT:
-; CHECK: .localentry P10_Spill_CR_GT, 1
-; CHECK-NEXT: # %bb.0: # %bb
-; CHECK-NEXT: mflr r0
+; CHECK: # %bb.0: # %bb
; CHECK-NEXT: mfcr r12
+; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stw r12, 8(r1)
; CHECK-NEXT: stdu r1, -64(r1)
@@ -49,8 +48,8 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-NEXT: .LBB0_1: # %bb43
; CHECK-NEXT: #
; CHECK-NEXT: bl call_1 at notoc
-; CHECK-NEXT: li r4, 0
; CHECK-NEXT: setnbc r3, 4*cr4+eq
+; CHECK-NEXT: li r4, 0
; CHECK-NEXT: stb r4, 0(r3)
; CHECK-NEXT: li r4, 0
; CHECK-NEXT: .p2align 4
@@ -210,8 +209,8 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
;
; CHECK-BE-LABEL: P10_Spill_CR_GT:
; CHECK-BE: # %bb.0: # %bb
-; CHECK-BE-NEXT: mflr r0
; CHECK-BE-NEXT: mfcr r12
+; CHECK-BE-NEXT: mflr r0
; CHECK-BE-NEXT: std r0, 16(r1)
; CHECK-BE-NEXT: stw r12, 8(r1)
; CHECK-BE-NEXT: stdu r1, -144(r1)
@@ -242,8 +241,8 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-BE-NEXT: #
; CHECK-BE-NEXT: bl call_1
; CHECK-BE-NEXT: nop
-; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: setnbc r3, 4*cr4+eq
+; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: stb r4, 0(r3)
; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: .p2align 4
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
index b19f1903336b..6c6b26bc24e4 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
@@ -25,10 +25,9 @@ declare void @call_4() local_unnamed_addr
define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
; CHECK-LABEL: P10_Spill_CR_LT:
-; CHECK: .localentry P10_Spill_CR_LT, 1
-; CHECK-NEXT: # %bb.0: # %bb
-; CHECK-NEXT: mflr r0
+; CHECK: # %bb.0: # %bb
; CHECK-NEXT: mfcr r12
+; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stw r12, 8(r1)
; CHECK-NEXT: stdu r1, -80(r1)
@@ -90,8 +89,8 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
;
; CHECK-BE-LABEL: P10_Spill_CR_LT:
; CHECK-BE: # %bb.0: # %bb
-; CHECK-BE-NEXT: mflr r0
; CHECK-BE-NEXT: mfcr r12
+; CHECK-BE-NEXT: mflr r0
; CHECK-BE-NEXT: std r0, 16(r1)
; CHECK-BE-NEXT: stw r12, 8(r1)
; CHECK-BE-NEXT: stdu r1, -160(r1)
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
index 91424e82e99f..64570379ea0c 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
@@ -36,8 +36,8 @@ declare i8 @call_6(%1*, i32) local_unnamed_addr
define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unnamed_addr {
; CHECK-LABEL: P10_Spill_CR_UN:
; CHECK: # %bb.0: # %bb
-; CHECK-NEXT: mflr r0
; CHECK-NEXT: mfcr r12
+; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stw r12, 8(r1)
; CHECK-NEXT: stdu r1, -224(r1)
@@ -84,8 +84,8 @@ define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unn
; CHECK-NEXT: # %bb.4: # %bb37
; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_14
; CHECK-NEXT: .LBB0_5: # %bb42
-; CHECK-NEXT: li r4, 0
; CHECK-NEXT: paddi r3, 0, global_1 at PCREL, 1
+; CHECK-NEXT: li r4, 0
; CHECK-NEXT: cmpwi r28, 0
; CHECK-NEXT: isel r3, r3, r4, 4*cr2+gt
; CHECK-NEXT: crnot 4*cr2+lt, eq
@@ -145,19 +145,19 @@ define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unn
; CHECK-NEXT: # implicit-def: $r3
; CHECK-NEXT: .LBB0_15: # %bb50
; CHECK-NEXT: li r4, 0
+; CHECK-NEXT: xxspltidp vs3, -1082130432
+; CHECK-NEXT: xxspltidp vs4, -1082130432
; CHECK-NEXT: extsh r9, r3
; CHECK-NEXT: extsw r6, r28
; CHECK-NEXT: li r5, 0
-; CHECK-NEXT: xxspltidp vs3, -1082130432
-; CHECK-NEXT: xxspltidp vs4, -1082130432
+; CHECK-NEXT: li r7, 0
; CHECK-NEXT: std r30, 104(r1)
; CHECK-NEXT: std r29, 96(r1)
-; CHECK-NEXT: li r7, 0
; CHECK-NEXT: li r8, 0
; CHECK-NEXT: li r10, 0
+; CHECK-NEXT: xxlxor f1, f1, f1
; CHECK-NEXT: std r4, 152(r1)
; CHECK-NEXT: li r4, -1
-; CHECK-NEXT: xxlxor f1, f1, f1
; CHECK-NEXT: std r4, 112(r1)
; CHECK-NEXT: li r4, 1024
; CHECK-NEXT: bl call_4 at notoc
@@ -182,8 +182,8 @@ define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unn
;
; CHECK-BE-LABEL: P10_Spill_CR_UN:
; CHECK-BE: # %bb.0: # %bb
-; CHECK-BE-NEXT: mflr r0
; CHECK-BE-NEXT: mfcr r12
+; CHECK-BE-NEXT: mflr r0
; CHECK-BE-NEXT: std r0, 16(r1)
; CHECK-BE-NEXT: stw r12, 8(r1)
; CHECK-BE-NEXT: stdu r1, -240(r1)
@@ -303,19 +303,19 @@ define dso_local void @P10_Spill_CR_UN(%2* %arg, %1* %arg1, i32 %arg2) local_unn
; CHECK-BE-NEXT: # implicit-def: $r3
; CHECK-BE-NEXT: .LBB0_15: # %bb50
; CHECK-BE-NEXT: li r4, 0
+; CHECK-BE-NEXT: xxspltidp vs3, -1082130432
+; CHECK-BE-NEXT: xxspltidp vs4, -1082130432
; CHECK-BE-NEXT: extsh r9, r3
; CHECK-BE-NEXT: extsw r6, r28
; CHECK-BE-NEXT: li r5, 0
-; CHECK-BE-NEXT: xxspltidp vs3, -1082130432
-; CHECK-BE-NEXT: xxspltidp vs4, -1082130432
+; CHECK-BE-NEXT: li r7, 0
; CHECK-BE-NEXT: std r30, 120(r1)
; CHECK-BE-NEXT: std r29, 112(r1)
-; CHECK-BE-NEXT: li r7, 0
; CHECK-BE-NEXT: li r8, 0
; CHECK-BE-NEXT: li r10, 0
+; CHECK-BE-NEXT: xxlxor f1, f1, f1
; CHECK-BE-NEXT: std r4, 168(r1)
; CHECK-BE-NEXT: li r4, -1
-; CHECK-BE-NEXT: xxlxor f1, f1, f1
; CHECK-BE-NEXT: std r4, 128(r1)
; CHECK-BE-NEXT: li r4, 1024
; CHECK-BE-NEXT: bl call_4
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
index ae7db80de199..37cf078f53bf 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@@ -109,10 +109,10 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3
; CHECK-S-NEXT: sub r29, r8, r9
; CHECK-S-NEXT: add r9, r10, r9
; CHECK-S-NEXT: sub r10, r10, r3
+; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: sub r12, r4, r5
; CHECK-S-NEXT: add r0, r6, r5
; CHECK-S-NEXT: sub r2, r6, r7
-; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: add r30, r8, r7
; CHECK-S-NEXT: mullw r3, r3, r11
; CHECK-S-NEXT: mullw r3, r3, r5
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
index 1eb48991db70..32b8a7486a7e 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
@@ -353,9 +353,9 @@ define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
-; CHECK-S-NEXT: mtctr r5
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: mr r12, r5
+; CHECK-S-NEXT: mtctr r5
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bctrl
; CHECK-S-NEXT: plwz r4, globalVar at PCREL(0), 1
@@ -383,8 +383,8 @@ define dso_local signext i32 @IndirectCallNoGlobal(i32 signext %a, i32 signext %
; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -48(r1)
-; CHECK-S-NEXT: mtctr r5
; CHECK-S-NEXT: mr r12, r5
+; CHECK-S-NEXT: mtctr r5
; CHECK-S-NEXT: mr r30, r4
; CHECK-S-NEXT: bctrl
; CHECK-S-NEXT: add r3, r3, r30
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
index 3a742588d23b..f1a05c105099 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
@@ -20,8 +20,8 @@
define dso_local signext i32 @jumptable(i32 signext %param) {
; CHECK-R-LABEL: jumptable:
; CHECK-R: # %bb.1: # %entry
-; CHECK-R-NEXT: rldic r4, r4
; CHECK-R-NEXT: paddi r5, 0, .LJTI0_0 at PCREL, 1
+; CHECK-R-NEXT: rldic r4, r4
; CHECK-R-NEXT: lwax r4, r4, r5
; CHECK-R-NEXT: add r4, r4, r5
; CHECK-R-NEXT: mtctr r4
@@ -35,8 +35,8 @@ define dso_local signext i32 @jumptable(i32 signext %param) {
; CHECK-A-LE-NEXT: bctr
; CHECK-A-BE-LABEL: jumptable:
; CHECK-A-BE: # %bb.1: # %entry
-; CHECK-A-BE-NEXT: rldic r4, r4
; CHECK-A-BE-NEXT: paddi r5, 0, .LJTI0_0 at PCREL, 1
+; CHECK-A-BE-NEXT: rldic r4, r4
; CHECK-A-BE-NEXT: lwax r4, r4, r5
; CHECK-A-BE-NEXT: mtctr r4
; CHECK-A-BE-NEXT: bctr
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
index 583e7950b6d5..1982332ffd5c 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
@@ -34,8 +34,7 @@ declare signext i32 @Function(...)
define dso_local void @TailCallLocalFuncPtr() local_unnamed_addr {
; CHECK-LABEL: TailCallLocalFuncPtr:
-; CHECK: .localentry TailCallLocalFuncPtr, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r12, FuncLocal at PCREL(0), 1
; CHECK-NEXT: mtctr r12
; CHECK-NEXT: bctr
@@ -48,8 +47,7 @@ entry:
define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr {
; CHECK-LABEL: TailCallExtrnFuncPtr:
-; CHECK: .localentry TailCallExtrnFuncPtr, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, Func at got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel0:
; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
@@ -65,8 +63,7 @@ entry:
define dso_local signext i32 @TailCallParamFuncPtr(i32 (...)* nocapture %passedfunc) local_unnamed_addr {
; CHECK-LABEL: TailCallParamFuncPtr:
-; CHECK: .localentry TailCallParamFuncPtr, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mtctr r3
; CHECK-NEXT: mr r12, r3
; CHECK-NEXT: bctr
@@ -79,8 +76,7 @@ entry:
define dso_local signext i32 @NoTailIndirectCall(i32 (...)* nocapture %passedfunc, i32 signext %a) local_unnamed_addr {
; CHECK-LABEL: NoTailIndirectCall:
-; CHECK: .localentry NoTailIndirectCall, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
@@ -88,8 +84,8 @@ define dso_local signext i32 @NoTailIndirectCall(i32 (...)* nocapture %passedfun
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -48(r1)
-; CHECK-NEXT: mtctr r3
; CHECK-NEXT: mr r12, r3
+; CHECK-NEXT: mtctr r3
; CHECK-NEXT: mr r30, r4
; CHECK-NEXT: bctrl
; CHECK-NEXT: add r3, r3, r30
@@ -108,8 +104,7 @@ entry:
define dso_local signext i32 @TailCallDirect() local_unnamed_addr {
; CHECK-LABEL: TailCallDirect:
-; CHECK: .localentry TailCallDirect, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: b Function at notoc
; CHECK-NEXT: #TC_RETURNd8 Function at notoc 0
entry:
@@ -119,8 +114,7 @@ entry:
define dso_local signext i32 @NoTailCallDirect(i32 signext %a) local_unnamed_addr {
; CHECK-LABEL: NoTailCallDirect:
-; CHECK: .localentry NoTailCallDirect, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
@@ -145,8 +139,7 @@ entry:
define dso_local signext i32 @TailCallDirectLocal() local_unnamed_addr {
; CHECK-LABEL: TailCallDirectLocal:
-; CHECK: .localentry TailCallDirectLocal, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: b LocalFunction at notoc
; CHECK-NEXT: #TC_RETURNd8 LocalFunction at notoc 0
entry:
@@ -156,8 +149,7 @@ entry:
define dso_local signext i32 @NoTailCallDirectLocal(i32 signext %a) local_unnamed_addr {
; CHECK-LABEL: NoTailCallDirectLocal:
-; CHECK: .localentry NoTailCallDirectLocal, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
@@ -182,8 +174,7 @@ entry:
define dso_local signext i32 @TailCallAbs() local_unnamed_addr {
; CHECK-LABEL: TailCallAbs:
-; CHECK: .localentry TailCallAbs, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li r3, 400
; CHECK-NEXT: li r12, 400
; CHECK-NEXT: mtctr r3
@@ -196,8 +187,7 @@ entry:
define dso_local signext i32 @NoTailCallAbs(i32 signext %a) local_unnamed_addr {
; CHECK-LABEL: NoTailCallAbs:
-; CHECK: .localentry NoTailCallAbs, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
@@ -227,8 +217,7 @@ entry:
; This function should be tail called and not inlined.
define internal fastcc signext i32 @LocalFunction() unnamed_addr #0 {
; CHECK-LABEL: LocalFunction:
-; CHECK: .localentry LocalFunction, 1
-; CHECK-NEXT: # %bb.0: # %entry
+; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: li r3, 42
; CHECK-NEXT: #NO_APP
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
index d4942d6ecd0b..49f8b43f7a82 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll
@@ -16,11 +16,11 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
; CHECK-NEXT: lxv v4, 0(0)
; CHECK-NEXT: xxlxor v0, v0, v0
; CHECK-NEXT: xxlxor v1, v1, v1
-; CHECK-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
; CHECK-NEXT: xxlxor v2, v2, v2
; CHECK-NEXT: li r6, 1
; CHECK-NEXT: li r4, 16
+; CHECK-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
; CHECK-NEXT: extswsli r3, r3, 3
; CHECK-NEXT: xvmaddadp v1, v4, v1
; CHECK-NEXT: lxvdsx v5, 0, r3
@@ -29,30 +29,30 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
; CHECK-NEXT: .LBB0_1: # %bb9
; CHECK-NEXT: #
; CHECK-NEXT: addi r6, r6, 2
-; CHECK-NEXT: lxv vs1, -64(r5)
-; CHECK-NEXT: lxv vs2, -16(r5)
; CHECK-NEXT: lxv vs0, 16(0)
+; CHECK-NEXT: lxv vs1, -64(r5)
+; CHECK-NEXT: xxlxor v7, v7, v7
; CHECK-NEXT: vmr v9, v0
; CHECK-NEXT: xxlxor v10, v10, v10
-; CHECK-NEXT: xxlxor v7, v7, v7
; CHECK-NEXT: mulld r6, r6, r3
-; CHECK-NEXT: xvmaddadp v9, vs1, v2
-; CHECK-NEXT: xxlxor v8, v8, v8
-; CHECK-NEXT: xvmaddadp v10, vs2, v10
; CHECK-NEXT: xvmaddadp v7, vs0, v5
; CHECK-NEXT: xvmuldp v6, vs0, v2
+; CHECK-NEXT: lxv vs0, -16(r5)
+; CHECK-NEXT: xvmaddadp v9, vs1, v2
+; CHECK-NEXT: xxlxor v8, v8, v8
; CHECK-NEXT: xvmaddadp v7, v2, v2
; CHECK-NEXT: xvmaddadp v6, v2, v2
; CHECK-NEXT: lxvdsx v14, r6, r4
-; CHECK-NEXT: xvmaddadp v8, vs1, v8
; CHECK-NEXT: li r6, 0
-; CHECK-NEXT: xvmuldp v11, vs2, v14
+; CHECK-NEXT: xvmaddadp v8, vs1, v8
+; CHECK-NEXT: xvmaddadp v10, vs0, v10
; CHECK-NEXT: xvmuldp v3, vs1, v14
+; CHECK-NEXT: xvmuldp v11, vs0, v14
; CHECK-NEXT: xvmuldp vs5, v14, v2
; CHECK-NEXT: xvmuldp v13, v4, v14
+; CHECK-NEXT: xxlor vs0, v2, v2
; CHECK-NEXT: vmr v12, v2
; CHECK-NEXT: xxlor vs14, v10, v10
-; CHECK-NEXT: xxlor vs0, v2, v2
; CHECK-NEXT: xxlor vs4, v2, v2
; CHECK-NEXT: # kill: def $vsrp2 killed $vsrp2 def $uacc1
; CHECK-NEXT: xxlor vs6, v6, v6
@@ -60,11 +60,11 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
; CHECK-NEXT: xxlor vs8, v12, v12
; CHECK-NEXT: xxlor vs9, v13, v13
; CHECK-NEXT: vmr v12, v1
-; CHECK-NEXT: xxlor vs15, v11, v11
-; CHECK-NEXT: vmr v10, v2
; CHECK-NEXT: xxlor vs1, v3, v3
; CHECK-NEXT: xxlor vs2, v8, v8
; CHECK-NEXT: xxlor vs3, v9, v9
+; CHECK-NEXT: xxlor vs15, v11, v11
+; CHECK-NEXT: vmr v10, v2
; CHECK-NEXT: xxlor vs10, v12, v12
; CHECK-NEXT: xxlor vs11, v13, v13
; CHECK-NEXT: xxmtacc acc1
@@ -72,8 +72,8 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
; CHECK-NEXT: xxlor vs13, v11, v11
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmtacc acc2
-; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0
; CHECK-NEXT: xxmtacc acc3
+; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
@@ -117,11 +117,11 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
; TRACKLIVE-NEXT: lxv v4, 0(0)
; TRACKLIVE-NEXT: xxlxor v0, v0, v0
; TRACKLIVE-NEXT: xxlxor v1, v1, v1
-; TRACKLIVE-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
-; TRACKLIVE-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
; TRACKLIVE-NEXT: xxlxor v2, v2, v2
; TRACKLIVE-NEXT: li r6, 1
; TRACKLIVE-NEXT: li r4, 16
+; TRACKLIVE-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
+; TRACKLIVE-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
; TRACKLIVE-NEXT: extswsli r3, r3, 3
; TRACKLIVE-NEXT: xvmaddadp v1, v4, v1
; TRACKLIVE-NEXT: lxvdsx v5, 0, r3
@@ -131,33 +131,33 @@ define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unn
; TRACKLIVE-NEXT: #
; TRACKLIVE-NEXT: addi r6, r6, 2
; TRACKLIVE-NEXT: lxv vs0, 16(0)
-; TRACKLIVE-NEXT: xxlxor vs7, vs7, vs7
; TRACKLIVE-NEXT: lxv vs1, -64(r5)
-; TRACKLIVE-NEXT: lxv vs4, -16(r5)
-; TRACKLIVE-NEXT: xxlxor vs12, vs12, vs12
+; TRACKLIVE-NEXT: xxlxor vs7, vs7, vs7
; TRACKLIVE-NEXT: xxlor vs3, v0, v0
; TRACKLIVE-NEXT: xxlxor vs2, vs2, vs2
+; TRACKLIVE-NEXT: xxlxor vs12, vs12, vs12
; TRACKLIVE-NEXT: mulld r6, r6, r3
; TRACKLIVE-NEXT: xxlor vs10, v2, v2
+; TRACKLIVE-NEXT: xxlor vs4, v2, v2
; TRACKLIVE-NEXT: xxlor vs8, vs10, vs10
; TRACKLIVE-NEXT: xxlor vs10, v1, v1
; TRACKLIVE-NEXT: xvmaddadp vs7, vs0, v5
; TRACKLIVE-NEXT: xvmuldp vs6, vs0, v2
-; TRACKLIVE-NEXT: xvmaddadp vs12, vs4, vs12
+; TRACKLIVE-NEXT: lxv vs0, -16(r5)
; TRACKLIVE-NEXT: xvmaddadp vs3, vs1, v2
; TRACKLIVE-NEXT: xvmaddadp vs2, vs1, vs2
-; TRACKLIVE-NEXT: xxlor vs0, v2, v2
; TRACKLIVE-NEXT: lxvdsx v6, r6, r4
; TRACKLIVE-NEXT: li r6, 0
; TRACKLIVE-NEXT: xvmaddadp vs7, v2, v2
; TRACKLIVE-NEXT: xvmaddadp vs6, v2, v2
-; TRACKLIVE-NEXT: xxlor vs14, vs12, vs12
-; TRACKLIVE-NEXT: xxlor vs12, v2, v2
+; TRACKLIVE-NEXT: xvmaddadp vs12, vs0, vs12
; TRACKLIVE-NEXT: xvmuldp v3, vs1, v6
; TRACKLIVE-NEXT: xvmuldp vs11, v4, v6
-; TRACKLIVE-NEXT: xvmuldp vs13, vs4, v6
+; TRACKLIVE-NEXT: xvmuldp vs13, vs0, v6
; TRACKLIVE-NEXT: xvmuldp vs5, v6, v2
-; TRACKLIVE-NEXT: xxlor vs4, v2, v2
+; TRACKLIVE-NEXT: xxlor vs0, v2, v2
+; TRACKLIVE-NEXT: xxlor vs14, vs12, vs12
+; TRACKLIVE-NEXT: xxlor vs12, v2, v2
; TRACKLIVE-NEXT: xxlor vs1, v3, v3
; TRACKLIVE-NEXT: xxlor vs9, vs11, vs11
; TRACKLIVE-NEXT: xxlor vs15, vs13, vs13
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
index 5d108b313f8f..98791c6f2316 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
@@ -66,9 +66,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
; LE-P10-NEXT: clrldi r3, r3, 32
; LE-P10-NEXT: addi r1, r1, 64
; LE-P10-NEXT: ld r0, 16(r1)
-; LE-P10-NEXT: mtlr r0
; LE-P10-NEXT: hashchk r0, -24(r1)
; LE-P10-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: mtlr r0
; LE-P10-NEXT: blr
;
; LE-P9-LABEL: caller:
@@ -189,8 +189,8 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
; BE-P10-NEXT: clrldi r3, r3, 32
; BE-P10-NEXT: addi r1, r1, 144
; BE-P10-NEXT: ld r0, 16(r1)
-; BE-P10-NEXT: mtlr r0
; BE-P10-NEXT: hashchk r0, -24(r1)
+; BE-P10-NEXT: mtlr r0
; BE-P10-NEXT: blr
;
; BE-P9-LABEL: caller:
@@ -244,9 +244,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
; LE-P10-PRIV-NEXT: clrldi r3, r3, 32
; LE-P10-PRIV-NEXT: addi r1, r1, 64
; LE-P10-PRIV-NEXT: ld r0, 16(r1)
-; LE-P10-PRIV-NEXT: mtlr r0
; LE-P10-PRIV-NEXT: hashchkp r0, -24(r1)
; LE-P10-PRIV-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: mtlr r0
; LE-P10-PRIV-NEXT: blr
;
; LE-P9-PRIV-LABEL: caller:
@@ -302,8 +302,8 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
; BE-P10-PRIV-NEXT: clrldi r3, r3, 32
; BE-P10-PRIV-NEXT: addi r1, r1, 144
; BE-P10-PRIV-NEXT: ld r0, 16(r1)
-; BE-P10-PRIV-NEXT: mtlr r0
; BE-P10-PRIV-NEXT: hashchkp r0, -24(r1)
+; BE-P10-PRIV-NEXT: mtlr r0
; BE-P10-PRIV-NEXT: blr
;
; BE-P9-PRIV-LABEL: caller:
@@ -365,30 +365,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; LE-P10-NEXT: lwz r4, 12(r3)
; LE-P10-NEXT: std r14, 256(r1) # 8-byte Folded Spill
; LE-P10-NEXT: std r15, 264(r1) # 8-byte Folded Spill
-; LE-P10-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill
-; LE-P10-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill
-; LE-P10-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill
; LE-P10-NEXT: std r16, 272(r1) # 8-byte Folded Spill
; LE-P10-NEXT: std r17, 280(r1) # 8-byte Folded Spill
-; LE-P10-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill
; LE-P10-NEXT: std r18, 288(r1) # 8-byte Folded Spill
; LE-P10-NEXT: std r19, 296(r1) # 8-byte Folded Spill
-; LE-P10-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill
-; LE-P10-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill
; LE-P10-NEXT: std r20, 304(r1) # 8-byte Folded Spill
; LE-P10-NEXT: std r21, 312(r1) # 8-byte Folded Spill
-; LE-P10-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill
; LE-P10-NEXT: std r22, 320(r1) # 8-byte Folded Spill
; LE-P10-NEXT: std r23, 328(r1) # 8-byte Folded Spill
-; LE-P10-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill
-; LE-P10-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill
; LE-P10-NEXT: std r24, 336(r1) # 8-byte Folded Spill
; LE-P10-NEXT: std r25, 344(r1) # 8-byte Folded Spill
-; LE-P10-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill
; LE-P10-NEXT: std r26, 352(r1) # 8-byte Folded Spill
; LE-P10-NEXT: std r27, 360(r1) # 8-byte Folded Spill
-; LE-P10-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill
-; LE-P10-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill
; LE-P10-NEXT: std r28, 368(r1) # 8-byte Folded Spill
; LE-P10-NEXT: std r29, 376(r1) # 8-byte Folded Spill
; LE-P10-NEXT: std r30, 384(r1) # 8-byte Folded Spill
@@ -411,6 +399,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; LE-P10-NEXT: stfd f29, 520(r1) # 8-byte Folded Spill
; LE-P10-NEXT: stfd f30, 528(r1) # 8-byte Folded Spill
; LE-P10-NEXT: stfd f31, 536(r1) # 8-byte Folded Spill
+; LE-P10-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill
+; LE-P10-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill
; LE-P10-NEXT: std r3, 40(r1) # 8-byte Folded Spill
; LE-P10-NEXT: stw r4, 52(r1)
; LE-P10-NEXT: #APP
@@ -436,40 +436,40 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; LE-P10-NEXT: lfd f29, 520(r1) # 8-byte Folded Reload
; LE-P10-NEXT: lfd f28, 512(r1) # 8-byte Folded Reload
; LE-P10-NEXT: lfd f27, 504(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f26, 496(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f25, 488(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f24, 480(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f23, 472(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f22, 464(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f21, 456(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f20, 448(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f19, 440(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f18, 432(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f17, 424(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f16, 416(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f15, 408(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lfd f14, 400(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r31, 392(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r30, 384(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r29, 376(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f26, 496(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r28, 368(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r27, 360(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r26, 352(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f25, 488(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r25, 344(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r24, 336(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r23, 328(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f24, 480(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r22, 320(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r21, 312(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lwz r4, 16(r4)
-; LE-P10-NEXT: add r3, r4, r3
-; LE-P10-NEXT: lfd f23, 472(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f22, 464(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r20, 304(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r19, 296(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r18, 288(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r17, 280(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r16, 272(r1) # 8-byte Folded Reload
; LE-P10-NEXT: ld r15, 264(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: ld r14, 256(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f21, 456(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f20, 448(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f19, 440(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f18, 432(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f17, 424(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f16, 416(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f15, 408(r1) # 8-byte Folded Reload
-; LE-P10-NEXT: lfd f14, 400(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: lwz r4, 16(r4)
+; LE-P10-NEXT: add r3, r4, r3
; LE-P10-NEXT: clrldi r3, r3, 32
+; LE-P10-NEXT: ld r14, 256(r1) # 8-byte Folded Reload
; LE-P10-NEXT: addi r1, r1, 544
; LE-P10-NEXT: ld r0, 16(r1)
; LE-P10-NEXT: lwz r12, 8(r1)
@@ -1177,30 +1177,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; BE-P10-NEXT: lwz r4, 12(r3)
; BE-P10-NEXT: std r14, 336(r1) # 8-byte Folded Spill
; BE-P10-NEXT: std r15, 344(r1) # 8-byte Folded Spill
-; BE-P10-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill
-; BE-P10-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill
-; BE-P10-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill
; BE-P10-NEXT: std r16, 352(r1) # 8-byte Folded Spill
; BE-P10-NEXT: std r17, 360(r1) # 8-byte Folded Spill
-; BE-P10-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill
; BE-P10-NEXT: std r18, 368(r1) # 8-byte Folded Spill
; BE-P10-NEXT: std r19, 376(r1) # 8-byte Folded Spill
-; BE-P10-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill
-; BE-P10-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill
; BE-P10-NEXT: std r20, 384(r1) # 8-byte Folded Spill
; BE-P10-NEXT: std r21, 392(r1) # 8-byte Folded Spill
-; BE-P10-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill
; BE-P10-NEXT: std r22, 400(r1) # 8-byte Folded Spill
; BE-P10-NEXT: std r23, 408(r1) # 8-byte Folded Spill
-; BE-P10-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill
-; BE-P10-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill
; BE-P10-NEXT: std r24, 416(r1) # 8-byte Folded Spill
; BE-P10-NEXT: std r25, 424(r1) # 8-byte Folded Spill
-; BE-P10-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill
; BE-P10-NEXT: std r26, 432(r1) # 8-byte Folded Spill
; BE-P10-NEXT: std r27, 440(r1) # 8-byte Folded Spill
-; BE-P10-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill
-; BE-P10-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill
; BE-P10-NEXT: std r28, 448(r1) # 8-byte Folded Spill
; BE-P10-NEXT: std r29, 456(r1) # 8-byte Folded Spill
; BE-P10-NEXT: std r30, 464(r1) # 8-byte Folded Spill
@@ -1223,6 +1211,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; BE-P10-NEXT: stfd f29, 600(r1) # 8-byte Folded Spill
; BE-P10-NEXT: stfd f30, 608(r1) # 8-byte Folded Spill
; BE-P10-NEXT: stfd f31, 616(r1) # 8-byte Folded Spill
+; BE-P10-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill
+; BE-P10-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill
; BE-P10-NEXT: std r3, 120(r1) # 8-byte Folded Spill
; BE-P10-NEXT: stw r4, 132(r1)
; BE-P10-NEXT: #APP
@@ -1249,40 +1249,40 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; BE-P10-NEXT: lfd f29, 600(r1) # 8-byte Folded Reload
; BE-P10-NEXT: lfd f28, 592(r1) # 8-byte Folded Reload
; BE-P10-NEXT: lfd f27, 584(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r31, 472(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r30, 464(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r29, 456(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r28, 448(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r27, 440(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r26, 432(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r25, 424(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r24, 416(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r23, 408(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r22, 400(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r21, 392(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lwz r4, 16(r4)
-; BE-P10-NEXT: add r3, r4, r3
-; BE-P10-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r20, 384(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r19, 376(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r18, 368(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r17, 360(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r16, 352(r1) # 8-byte Folded Reload
; BE-P10-NEXT: ld r15, 344(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: ld r14, 336(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload
-; BE-P10-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload
+; BE-P10-NEXT: lwz r4, 16(r4)
+; BE-P10-NEXT: add r3, r4, r3
; BE-P10-NEXT: clrldi r3, r3, 32
+; BE-P10-NEXT: ld r14, 336(r1) # 8-byte Folded Reload
; BE-P10-NEXT: addi r1, r1, 624
; BE-P10-NEXT: ld r0, 16(r1)
; BE-P10-NEXT: lwz r12, 8(r1)
@@ -1582,30 +1582,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; LE-P10-PRIV-NEXT: lwz r4, 12(r3)
; LE-P10-PRIV-NEXT: std r14, 256(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: std r15, 264(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill
; LE-P10-PRIV-NEXT: std r16, 272(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: std r17, 280(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill
; LE-P10-PRIV-NEXT: std r18, 288(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: std r19, 296(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill
; LE-P10-PRIV-NEXT: std r20, 304(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: std r21, 312(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill
; LE-P10-PRIV-NEXT: std r22, 320(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: std r23, 328(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill
; LE-P10-PRIV-NEXT: std r24, 336(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: std r25, 344(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill
; LE-P10-PRIV-NEXT: std r26, 352(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: std r27, 360(r1) # 8-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill
-; LE-P10-PRIV-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill
; LE-P10-PRIV-NEXT: std r28, 368(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: std r29, 376(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: std r30, 384(r1) # 8-byte Folded Spill
@@ -1628,6 +1616,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; LE-P10-PRIV-NEXT: stfd f29, 520(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: stfd f30, 528(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: stfd f31, 536(r1) # 8-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill
+; LE-P10-PRIV-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill
; LE-P10-PRIV-NEXT: std r3, 40(r1) # 8-byte Folded Spill
; LE-P10-PRIV-NEXT: stw r4, 52(r1)
; LE-P10-PRIV-NEXT: #APP
@@ -1653,40 +1653,40 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; LE-P10-PRIV-NEXT: lfd f29, 520(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: lfd f28, 512(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: lfd f27, 504(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f26, 496(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f25, 488(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f24, 480(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f23, 472(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f22, 464(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f21, 456(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f20, 448(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f19, 440(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f18, 432(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f17, 424(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f16, 416(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f15, 408(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lfd f14, 400(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r31, 392(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r30, 384(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r29, 376(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f26, 496(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r28, 368(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r27, 360(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r26, 352(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f25, 488(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r25, 344(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r24, 336(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r23, 328(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f24, 480(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r22, 320(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r21, 312(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lwz r4, 16(r4)
-; LE-P10-PRIV-NEXT: add r3, r4, r3
-; LE-P10-PRIV-NEXT: lfd f23, 472(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f22, 464(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r20, 304(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r19, 296(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r18, 288(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r17, 280(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r16, 272(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: ld r15, 264(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: ld r14, 256(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f21, 456(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f20, 448(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f19, 440(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f18, 432(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f17, 424(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f16, 416(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f15, 408(r1) # 8-byte Folded Reload
-; LE-P10-PRIV-NEXT: lfd f14, 400(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: lwz r4, 16(r4)
+; LE-P10-PRIV-NEXT: add r3, r4, r3
; LE-P10-PRIV-NEXT: clrldi r3, r3, 32
+; LE-P10-PRIV-NEXT: ld r14, 256(r1) # 8-byte Folded Reload
; LE-P10-PRIV-NEXT: addi r1, r1, 544
; LE-P10-PRIV-NEXT: ld r0, 16(r1)
; LE-P10-PRIV-NEXT: lwz r12, 8(r1)
@@ -1986,30 +1986,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; BE-P10-PRIV-NEXT: lwz r4, 12(r3)
; BE-P10-PRIV-NEXT: std r14, 336(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: std r15, 344(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill
; BE-P10-PRIV-NEXT: std r16, 352(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: std r17, 360(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill
; BE-P10-PRIV-NEXT: std r18, 368(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: std r19, 376(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill
; BE-P10-PRIV-NEXT: std r20, 384(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: std r21, 392(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill
; BE-P10-PRIV-NEXT: std r22, 400(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: std r23, 408(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill
; BE-P10-PRIV-NEXT: std r24, 416(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: std r25, 424(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill
; BE-P10-PRIV-NEXT: std r26, 432(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: std r27, 440(r1) # 8-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill
-; BE-P10-PRIV-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill
; BE-P10-PRIV-NEXT: std r28, 448(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: std r29, 456(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: std r30, 464(r1) # 8-byte Folded Spill
@@ -2032,6 +2020,18 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; BE-P10-PRIV-NEXT: stfd f29, 600(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: stfd f30, 608(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: stfd f31, 616(r1) # 8-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v20, 144(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v21, 160(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v22, 176(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v23, 192(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v24, 208(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v25, 224(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v26, 240(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v27, 256(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v28, 272(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v29, 288(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v30, 304(r1) # 16-byte Folded Spill
+; BE-P10-PRIV-NEXT: stxv v31, 320(r1) # 16-byte Folded Spill
; BE-P10-PRIV-NEXT: std r3, 120(r1) # 8-byte Folded Spill
; BE-P10-PRIV-NEXT: stw r4, 132(r1)
; BE-P10-PRIV-NEXT: #APP
@@ -2058,40 +2058,40 @@ define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 {
; BE-P10-PRIV-NEXT: lfd f29, 600(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: lfd f28, 592(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: lfd f27, 584(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r31, 472(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r30, 464(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r29, 456(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f26, 576(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r28, 448(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r27, 440(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r26, 432(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f25, 568(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r25, 424(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r24, 416(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r23, 408(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f24, 560(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r22, 400(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r21, 392(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lwz r4, 16(r4)
-; BE-P10-PRIV-NEXT: add r3, r4, r3
-; BE-P10-PRIV-NEXT: lfd f23, 552(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f22, 544(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r20, 384(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r19, 376(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r18, 368(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r17, 360(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r16, 352(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: ld r15, 344(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: ld r14, 336(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f21, 536(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f20, 528(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f19, 520(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f18, 512(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f17, 504(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f16, 496(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f15, 488(r1) # 8-byte Folded Reload
-; BE-P10-PRIV-NEXT: lfd f14, 480(r1) # 8-byte Folded Reload
+; BE-P10-PRIV-NEXT: lwz r4, 16(r4)
+; BE-P10-PRIV-NEXT: add r3, r4, r3
; BE-P10-PRIV-NEXT: clrldi r3, r3, 32
+; BE-P10-PRIV-NEXT: ld r14, 336(r1) # 8-byte Folded Reload
; BE-P10-PRIV-NEXT: addi r1, r1, 624
; BE-P10-PRIV-NEXT: ld r0, 16(r1)
; BE-P10-PRIV-NEXT: lwz r12, 8(r1)
@@ -2416,9 +2416,9 @@ define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 {
; LE-P10-NEXT: addi r1, r1, 64
; LE-P10-NEXT: ld r0, 16(r1)
; LE-P10-NEXT: clrldi r3, r3, 32
-; LE-P10-NEXT: mtlr r0
; LE-P10-NEXT: hashchk r0, -24(r1)
; LE-P10-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; LE-P10-NEXT: mtlr r0
; LE-P10-NEXT: blr
; LE-P10-NEXT: .LBB2_2:
; LE-P10-NEXT: li r3, 0
@@ -2603,8 +2603,8 @@ define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 {
; BE-P10-NEXT: addi r1, r1, 144
; BE-P10-NEXT: ld r0, 16(r1)
; BE-P10-NEXT: clrldi r3, r3, 32
-; BE-P10-NEXT: mtlr r0
; BE-P10-NEXT: hashchk r0, -24(r1)
+; BE-P10-NEXT: mtlr r0
; BE-P10-NEXT: blr
; BE-P10-NEXT: .LBB2_2:
; BE-P10-NEXT: li r3, 0
@@ -2688,9 +2688,9 @@ define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 {
; LE-P10-PRIV-NEXT: addi r1, r1, 64
; LE-P10-PRIV-NEXT: ld r0, 16(r1)
; LE-P10-PRIV-NEXT: clrldi r3, r3, 32
-; LE-P10-PRIV-NEXT: mtlr r0
; LE-P10-PRIV-NEXT: hashchkp r0, -24(r1)
; LE-P10-PRIV-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; LE-P10-PRIV-NEXT: mtlr r0
; LE-P10-PRIV-NEXT: blr
; LE-P10-PRIV-NEXT: .LBB2_2:
; LE-P10-PRIV-NEXT: li r3, 0
@@ -2776,8 +2776,8 @@ define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 {
; BE-P10-PRIV-NEXT: addi r1, r1, 144
; BE-P10-PRIV-NEXT: ld r0, 16(r1)
; BE-P10-PRIV-NEXT: clrldi r3, r3, 32
-; BE-P10-PRIV-NEXT: mtlr r0
; BE-P10-PRIV-NEXT: hashchkp r0, -24(r1)
+; BE-P10-PRIV-NEXT: mtlr r0
; BE-P10-PRIV-NEXT: blr
; BE-P10-PRIV-NEXT: .LBB2_2:
; BE-P10-PRIV-NEXT: li r3, 0
@@ -2870,10 +2870,10 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
; LE-P10-NEXT: lis r12, -1
; LE-P10-NEXT: std r30, -16(r1)
; LE-P10-NEXT: mr r30, r1
-; LE-P10-NEXT: ori r12, r12, 0
; LE-P10-NEXT: std r0, 16(r1)
; LE-P10-NEXT: hashst r0, -32(r1)
; LE-P10-NEXT: clrldi r0, r1, 49
+; LE-P10-NEXT: ori r12, r12, 0
; LE-P10-NEXT: subc r0, r12, r0
; LE-P10-NEXT: stdux r1, r1, r0
; LE-P10-NEXT: std r29, -24(r30) # 8-byte Folded Spill
@@ -2890,9 +2890,9 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
; LE-P10-NEXT: lwz r3, 20(r29)
; LE-P10-NEXT: lis r4, 0
; LE-P10-NEXT: ori r4, r4, 65500
+; LE-P10-NEXT: add r4, r1, r4
; LE-P10-NEXT: stw r3, 32764(r1)
; LE-P10-NEXT: lis r3, 0
-; LE-P10-NEXT: add r4, r1, r4
; LE-P10-NEXT: ori r3, r3, 32768
; LE-P10-NEXT: add r3, r1, r3
; LE-P10-NEXT: bl callee3 at notoc
@@ -3133,10 +3133,10 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
; BE-P10-NEXT: lis r12, -1
; BE-P10-NEXT: std r30, -16(r1)
; BE-P10-NEXT: mr r30, r1
-; BE-P10-NEXT: ori r12, r12, 0
; BE-P10-NEXT: std r0, 16(r1)
; BE-P10-NEXT: hashst r0, -32(r1)
; BE-P10-NEXT: clrldi r0, r1, 49
+; BE-P10-NEXT: ori r12, r12, 0
; BE-P10-NEXT: subc r0, r12, r0
; BE-P10-NEXT: stdux r1, r1, r0
; BE-P10-NEXT: std r29, -24(r30) # 8-byte Folded Spill
@@ -3153,9 +3153,9 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
; BE-P10-NEXT: lwz r3, 20(r29)
; BE-P10-NEXT: lis r4, 0
; BE-P10-NEXT: ori r4, r4, 65500
+; BE-P10-NEXT: add r4, r1, r4
; BE-P10-NEXT: stw r3, 32764(r1)
; BE-P10-NEXT: lis r3, 0
-; BE-P10-NEXT: add r4, r1, r4
; BE-P10-NEXT: ori r3, r3, 32768
; BE-P10-NEXT: add r3, r1, r3
; BE-P10-NEXT: bl callee3
@@ -3265,10 +3265,10 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
; LE-P10-PRIV-NEXT: lis r12, -1
; LE-P10-PRIV-NEXT: std r30, -16(r1)
; LE-P10-PRIV-NEXT: mr r30, r1
-; LE-P10-PRIV-NEXT: ori r12, r12, 0
; LE-P10-PRIV-NEXT: std r0, 16(r1)
; LE-P10-PRIV-NEXT: hashstp r0, -32(r1)
; LE-P10-PRIV-NEXT: clrldi r0, r1, 49
+; LE-P10-PRIV-NEXT: ori r12, r12, 0
; LE-P10-PRIV-NEXT: subc r0, r12, r0
; LE-P10-PRIV-NEXT: stdux r1, r1, r0
; LE-P10-PRIV-NEXT: std r29, -24(r30) # 8-byte Folded Spill
@@ -3285,9 +3285,9 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
; LE-P10-PRIV-NEXT: lwz r3, 20(r29)
; LE-P10-PRIV-NEXT: lis r4, 0
; LE-P10-PRIV-NEXT: ori r4, r4, 65500
+; LE-P10-PRIV-NEXT: add r4, r1, r4
; LE-P10-PRIV-NEXT: stw r3, 32764(r1)
; LE-P10-PRIV-NEXT: lis r3, 0
-; LE-P10-PRIV-NEXT: add r4, r1, r4
; LE-P10-PRIV-NEXT: ori r3, r3, 32768
; LE-P10-PRIV-NEXT: add r3, r1, r3
; LE-P10-PRIV-NEXT: bl callee3 at notoc
@@ -3396,10 +3396,10 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
; BE-P10-PRIV-NEXT: lis r12, -1
; BE-P10-PRIV-NEXT: std r30, -16(r1)
; BE-P10-PRIV-NEXT: mr r30, r1
-; BE-P10-PRIV-NEXT: ori r12, r12, 0
; BE-P10-PRIV-NEXT: std r0, 16(r1)
; BE-P10-PRIV-NEXT: hashstp r0, -32(r1)
; BE-P10-PRIV-NEXT: clrldi r0, r1, 49
+; BE-P10-PRIV-NEXT: ori r12, r12, 0
; BE-P10-PRIV-NEXT: subc r0, r12, r0
; BE-P10-PRIV-NEXT: stdux r1, r1, r0
; BE-P10-PRIV-NEXT: std r29, -24(r30) # 8-byte Folded Spill
@@ -3416,9 +3416,9 @@ define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 {
; BE-P10-PRIV-NEXT: lwz r3, 20(r29)
; BE-P10-PRIV-NEXT: lis r4, 0
; BE-P10-PRIV-NEXT: ori r4, r4, 65500
+; BE-P10-PRIV-NEXT: add r4, r1, r4
; BE-P10-PRIV-NEXT: stw r3, 32764(r1)
; BE-P10-PRIV-NEXT: lis r3, 0
-; BE-P10-PRIV-NEXT: add r4, r1, r4
; BE-P10-PRIV-NEXT: ori r3, r3, 32768
; BE-P10-PRIV-NEXT: add r3, r1, r3
; BE-P10-PRIV-NEXT: bl callee3
diff --git a/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
index e0a12a2b14b1..173560f830fb 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-double-ldst.ll
@@ -3932,9 +3932,9 @@ entry:
define dso_local void @st_not_disjoint64_double_uint8_t(i64 %ptr, double %str) {
; CHECK-P10-LABEL: st_not_disjoint64_double_uint8_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stxsibx f0, 0, r3
@@ -4371,9 +4371,9 @@ entry:
define dso_local void @st_not_disjoint64_double_int8_t(i64 %ptr, double %str) {
; CHECK-P10-LABEL: st_not_disjoint64_double_int8_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stxsibx f0, 0, r3
@@ -4814,9 +4814,9 @@ entry:
define dso_local void @st_not_disjoint64_double_uint16_t(i64 %ptr, double %str) {
; CHECK-P10-LABEL: st_not_disjoint64_double_uint16_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stxsihx f0, 0, r3
@@ -5257,9 +5257,9 @@ entry:
define dso_local void @st_not_disjoint64_double_int16_t(i64 %ptr, double %str) {
; CHECK-P10-LABEL: st_not_disjoint64_double_int16_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stxsihx f0, 0, r3
@@ -5626,9 +5626,9 @@ entry:
define dso_local void @st_not_disjoint64_double_uint32_t(i64 %ptr, double %str) {
; CHECK-P10-LABEL: st_not_disjoint64_double_uint32_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stfiwx f0, 0, r3
@@ -5958,9 +5958,9 @@ entry:
define dso_local void @st_not_disjoint64_double_int32_t(i64 %ptr, double %str) {
; CHECK-P10-LABEL: st_not_disjoint64_double_int32_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stfiwx f0, 0, r3
diff --git a/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
index b6a5ccb93d03..b2c2109e930c 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-float-ldst.ll
@@ -3954,9 +3954,9 @@ entry:
define dso_local void @st_not_disjoint64_float_uint8_t(i64 %ptr, float %str) {
; CHECK-P10-LABEL: st_not_disjoint64_float_uint8_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stxsibx f0, 0, r3
@@ -4393,9 +4393,9 @@ entry:
define dso_local void @st_not_disjoint64_float_int8_t(i64 %ptr, float %str) {
; CHECK-P10-LABEL: st_not_disjoint64_float_int8_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stxsibx f0, 0, r3
@@ -4836,9 +4836,9 @@ entry:
define dso_local void @st_not_disjoint64_float_uint16_t(i64 %ptr, float %str) {
; CHECK-P10-LABEL: st_not_disjoint64_float_uint16_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stxsihx f0, 0, r3
@@ -5279,9 +5279,9 @@ entry:
define dso_local void @st_not_disjoint64_float_int16_t(i64 %ptr, float %str) {
; CHECK-P10-LABEL: st_not_disjoint64_float_int16_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stxsihx f0, 0, r3
@@ -5648,9 +5648,9 @@ entry:
define dso_local void @st_not_disjoint64_float_uint32_t(i64 %ptr, float %str) {
; CHECK-P10-LABEL: st_not_disjoint64_float_uint32_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpuxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stfiwx f0, 0, r3
@@ -5980,9 +5980,9 @@ entry:
define dso_local void @st_not_disjoint64_float_int32_t(i64 %ptr, float %str) {
; CHECK-P10-LABEL: st_not_disjoint64_float_int32_t:
; CHECK-P10: # %bb.0: # %entry
-; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: pli r4, 232
; CHECK-P10-NEXT: pli r5, 3567587329
+; CHECK-P10-NEXT: xscvdpsxws f0, f1
; CHECK-P10-NEXT: rldimi r5, r4, 32, 0
; CHECK-P10-NEXT: or r3, r3, r5
; CHECK-P10-NEXT: stfiwx f0, 0, r3
diff --git a/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
index ce17ad3fea3b..03b7219f7458 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i16-ldst.ll
@@ -6304,8 +6304,8 @@ define dso_local void @st_not_disjoint32_uint16_t_float(i64 %ptr, i16 zeroext %s
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: stfs f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -6340,8 +6340,8 @@ define dso_local void @st_disjoint_align32_uint16_t_float(i64 %ptr, i16 zeroext
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -6692,8 +6692,8 @@ define dso_local void @st_not_disjoint32_uint16_t_double(i64 %ptr, i16 zeroext %
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: stfd f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -6728,8 +6728,8 @@ define dso_local void @st_disjoint_align32_uint16_t_double(i64 %ptr, i16 zeroext
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -7652,8 +7652,8 @@ define dso_local void @st_not_disjoint32_int16_t_float(i64 %ptr, i16 signext %st
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: stfs f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -7688,8 +7688,8 @@ define dso_local void @st_disjoint_align32_int16_t_float(i64 %ptr, i16 signext %
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -8041,8 +8041,8 @@ define dso_local void @st_not_disjoint32_int16_t_double(i64 %ptr, i16 signext %s
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: stfd f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -8077,8 +8077,8 @@ define dso_local void @st_disjoint_align32_int16_t_double(i64 %ptr, i16 signext
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
index bbab8a76627f..a13152e8c7ea 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i32-ldst.ll
@@ -6368,8 +6368,8 @@ define dso_local void @st_not_disjoint32_uint32_t_float(i64 %ptr, i32 zeroext %s
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: stfs f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -6404,8 +6404,8 @@ define dso_local void @st_disjoint_align32_uint32_t_float(i64 %ptr, i32 zeroext
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -6756,8 +6756,8 @@ define dso_local void @st_not_disjoint32_uint32_t_double(i64 %ptr, i32 zeroext %
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: stfd f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -6792,8 +6792,8 @@ define dso_local void @st_disjoint_align32_uint32_t_double(i64 %ptr, i32 zeroext
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -7430,8 +7430,8 @@ define dso_local void @st_not_disjoint32_int32_t_float(i64 %ptr, i32 signext %st
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: stfs f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -7466,8 +7466,8 @@ define dso_local void @st_disjoint_align32_int32_t_float(i64 %ptr, i32 signext %
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -7818,8 +7818,8 @@ define dso_local void @st_not_disjoint32_int32_t_double(i64 %ptr, i32 signext %s
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: stfd f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -7854,8 +7854,8 @@ define dso_local void @st_disjoint_align32_int32_t_double(i64 %ptr, i32 signext
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
index 3577d714eef0..a9f0bc31ab18 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i64-ldst.ll
@@ -7006,8 +7006,8 @@ define dso_local void @st_not_disjoint32_uint64_t_float(i64 %ptr, i64 %str) {
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprd f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: stfs f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -7042,8 +7042,8 @@ define dso_local void @st_disjoint_align32_uint64_t_float(i64 %ptr, i64 %str) {
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprd f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -7413,8 +7413,8 @@ define dso_local void @st_not_disjoint32_uint64_t_double(i64 %ptr, i64 %str) {
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprd f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: stfd f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -7449,8 +7449,8 @@ define dso_local void @st_disjoint_align32_uint64_t_double(i64 %ptr, i64 %str) {
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprd f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -7820,8 +7820,8 @@ define dso_local void @st_not_disjoint32_int64_t_float(i64 %ptr, i64 %str) {
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprd f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: stfs f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -7856,8 +7856,8 @@ define dso_local void @st_disjoint_align32_int64_t_float(i64 %ptr, i64 %str) {
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprd f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -8227,8 +8227,8 @@ define dso_local void @st_not_disjoint32_int64_t_double(i64 %ptr, i64 %str) {
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprd f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: stfd f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -8263,8 +8263,8 @@ define dso_local void @st_disjoint_align32_int64_t_double(i64 %ptr, i64 %str) {
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprd f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll b/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
index 9bbf7f79d4ff..333718e402bf 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-i8-ldst.ll
@@ -7484,8 +7484,8 @@ define dso_local void @st_not_disjoint32_uint8_t_float(i64 %ptr, i8 zeroext %str
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: stfs f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -7520,8 +7520,8 @@ define dso_local void @st_disjoint_align32_uint8_t_float(i64 %ptr, i8 zeroext %s
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvuxdsp f0, f0
; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -7872,8 +7872,8 @@ define dso_local void @st_not_disjoint32_uint8_t_double(i64 %ptr, i8 zeroext %st
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: stfd f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -7908,8 +7908,8 @@ define dso_local void @st_disjoint_align32_uint8_t_double(i64 %ptr, i8 zeroext %
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwz f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvuxddp f0, f0
; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -9118,8 +9118,8 @@ define dso_local void @st_not_disjoint32_int8_t_float(i64 %ptr, i8 signext %str)
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: stfs f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -9154,8 +9154,8 @@ define dso_local void @st_disjoint_align32_int8_t_float(i64 %ptr, i8 signext %st
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvsxdsp f0, f0
; CHECK-P10-NEXT: pstfs f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
@@ -9506,8 +9506,8 @@ define dso_local void @st_not_disjoint32_int8_t_double(i64 %ptr, i8 signext %str
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: ori r3, r3, 34463
-; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: oris r3, r3, 1
+; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: stfd f0, 0(r3)
; CHECK-P10-NEXT: blr
;
@@ -9542,8 +9542,8 @@ define dso_local void @st_disjoint_align32_int8_t_double(i64 %ptr, i8 signext %s
; CHECK-P10: # %bb.0: # %entry
; CHECK-P10-NEXT: mtfprwa f0, r4
; CHECK-P10-NEXT: lis r5, -15264
-; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: and r3, r3, r5
+; CHECK-P10-NEXT: xscvsxddp f0, f0
; CHECK-P10-NEXT: pstfd f0, 999990000(r3), 0
; CHECK-P10-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll b/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
index dcede94054cb..562f2dde467d 100644
--- a/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
+++ b/llvm/test/CodeGen/PowerPC/spill-vec-pair.ll
@@ -11,34 +11,34 @@ define dso_local void @test(<256 x i1>* %vpp, <256 x i1>* %vp2) local_unnamed_ad
; CHECK-NEXT: stdu r1, -400(r1)
; CHECK-NEXT: stfd f14, 256(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f15, 264(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill
; CHECK-NEXT: stfd f16, 272(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f17, 280(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill
; CHECK-NEXT: stfd f18, 288(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f19, 296(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill
; CHECK-NEXT: stfd f20, 304(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f21, 312(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill
; CHECK-NEXT: stfd f22, 320(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f23, 328(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill
; CHECK-NEXT: stfd f24, 336(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f25, 344(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill
; CHECK-NEXT: stfd f26, 352(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f27, 360(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill
; CHECK-NEXT: stfd f28, 368(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f29, 376(r1) # 8-byte Folded Spill
-; CHECK-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill
; CHECK-NEXT: stfd f30, 384(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f31, 392(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v20, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v21, 80(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v22, 96(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v23, 112(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v24, 128(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v25, 144(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v26, 160(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v27, 176(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v28, 192(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v29, 208(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v30, 224(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v31, 240(r1) # 16-byte Folded Spill
; CHECK-NEXT: lxvp vsp34, 0(r3)
; CHECK-NEXT: stxvp vsp34, 32(r1) # 32-byte Folded Spill
; CHECK-NEXT: #APP
@@ -84,34 +84,34 @@ define dso_local void @test(<256 x i1>* %vpp, <256 x i1>* %vp2) local_unnamed_ad
; CHECK-BE-NEXT: stdu r1, -416(r1)
; CHECK-BE-NEXT: stfd f14, 272(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f15, 280(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: stxv v20, 80(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT: stxv v21, 96(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stfd f16, 288(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f17, 296(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: stxv v22, 112(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT: stxv v23, 128(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stfd f18, 304(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f19, 312(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: stxv v24, 144(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stfd f20, 320(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f21, 328(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: stxv v25, 160(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT: stxv v26, 176(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stfd f22, 336(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f23, 344(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: stxv v27, 192(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stfd f24, 352(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f25, 360(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: stxv v28, 208(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT: stxv v29, 224(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stfd f26, 368(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f27, 376(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: stxv v30, 240(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stfd f28, 384(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f29, 392(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: stxv v31, 256(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stfd f30, 400(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: stfd f31, 408(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: stxv v20, 80(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v21, 96(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v22, 112(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v23, 128(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v24, 144(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v25, 160(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v26, 176(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v27, 192(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v28, 208(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v29, 224(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v30, 240(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v31, 256(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: lxvp vsp34, 0(r3)
; CHECK-BE-NEXT: stxvp vsp34, 48(r1) # 32-byte Folded Spill
; CHECK-BE-NEXT: #APP
diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
index 4ecaf27c8886..b9777df02c61 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
@@ -70,12 +70,11 @@ define dso_local i32 @testCompare1(%struct.tree_common* nocapture readonly %arg1
; LE-NEXT: blr
;
; CHECK-P10-LE-LABEL: testCompare1:
-; CHECK-P10-LE: .localentry testCompare1, 1
-; CHECK-P10-LE-NEXT: # %bb.0: # %entry
+; CHECK-P10-LE: # %bb.0: # %entry
; CHECK-P10-LE-NEXT: plbz r4, testCompare1 at PCREL(0), 1
; CHECK-P10-LE-NEXT: lbz r3, 0(r3)
-; CHECK-P10-LE-NEXT: clrlwi r3, r3, 31
; CHECK-P10-LE-NEXT: clrlwi r4, r4, 31
+; CHECK-P10-LE-NEXT: clrlwi r3, r3, 31
; CHECK-P10-LE-NEXT: cmplw r4, r3
; CHECK-P10-LE-NEXT: setbc r3, gt
; CHECK-P10-LE-NEXT: b fn2 at notoc
@@ -122,14 +121,13 @@ define dso_local i32 @testCompare1(%struct.tree_common* nocapture readonly %arg1
; CHECK-P10-CMP-LE-NEXT: blr
;
; CHECK-P10-CMP-BE-LABEL: testCompare1:
-; CHECK-P10-CMP-BE: .localentry testCompare1, 1
-; CHECK-P10-CMP-BE-NEXT: # %bb.0: # %entry
+; CHECK-P10-CMP-BE: # %bb.0: # %entry
; CHECK-P10-CMP-BE-NEXT: plbz r4, testCompare1 at PCREL(0), 1
; CHECK-P10-CMP-BE-NEXT: lbz r3, 0(r3)
-; CHECK-P10-CMP-BE-NEXT: clrlwi r3, r3, 31
; CHECK-P10-CMP-BE-NEXT: clrlwi r4, r4, 31
-; CHECK-P10-CMP-BE-NEXT: clrldi r3, r3, 32
+; CHECK-P10-CMP-BE-NEXT: clrlwi r3, r3, 31
; CHECK-P10-CMP-BE-NEXT: clrldi r4, r4, 32
+; CHECK-P10-CMP-BE-NEXT: clrldi r3, r3, 32
; CHECK-P10-CMP-BE-NEXT: sub r3, r3, r4
; CHECK-P10-CMP-BE-NEXT: rldicl r3, r3, 1, 63
; CHECK-P10-CMP-BE-NEXT: b fn2 at notoc
diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
index 0ecd1a12c9aa..e609a71962e1 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
@@ -72,12 +72,11 @@ define dso_local i32 @testCompare1(%struct.tree_common* nocapture readonly %arg1
; LE-NEXT: blr
;
; CHECK-P10-LE-LABEL: testCompare1:
-; CHECK-P10-LE: .localentry testCompare1, 1
-; CHECK-P10-LE-NEXT: # %bb.0: # %entry
+; CHECK-P10-LE: # %bb.0: # %entry
; CHECK-P10-LE-NEXT: plbz r4, testCompare1 at PCREL(0), 1
; CHECK-P10-LE-NEXT: lbz r3, 0(r3)
-; CHECK-P10-LE-NEXT: clrlwi r3, r3, 31
; CHECK-P10-LE-NEXT: clrlwi r4, r4, 31
+; CHECK-P10-LE-NEXT: clrlwi r3, r3, 31
; CHECK-P10-LE-NEXT: cmplw r4, r3
; CHECK-P10-LE-NEXT: setbc r3, lt
; CHECK-P10-LE-NEXT: b fn2 at notoc
@@ -124,14 +123,13 @@ define dso_local i32 @testCompare1(%struct.tree_common* nocapture readonly %arg1
; CHECK-P10-CMP-LE-NEXT: blr
;
; CHECK-P10-CMP-BE-LABEL: testCompare1:
-; CHECK-P10-CMP-BE: .localentry testCompare1, 1
-; CHECK-P10-CMP-BE-NEXT: # %bb.0: # %entry
+; CHECK-P10-CMP-BE: # %bb.0: # %entry
; CHECK-P10-CMP-BE-NEXT: plbz r4, testCompare1 at PCREL(0), 1
; CHECK-P10-CMP-BE-NEXT: lbz r3, 0(r3)
-; CHECK-P10-CMP-BE-NEXT: clrlwi r3, r3, 31
; CHECK-P10-CMP-BE-NEXT: clrlwi r4, r4, 31
-; CHECK-P10-CMP-BE-NEXT: clrldi r3, r3, 32
+; CHECK-P10-CMP-BE-NEXT: clrlwi r3, r3, 31
; CHECK-P10-CMP-BE-NEXT: clrldi r4, r4, 32
+; CHECK-P10-CMP-BE-NEXT: clrldi r3, r3, 32
; CHECK-P10-CMP-BE-NEXT: sub r3, r4, r3
; CHECK-P10-CMP-BE-NEXT: rldicl r3, r3, 1, 63
; CHECK-P10-CMP-BE-NEXT: b fn2 at notoc
More information about the llvm-commits
mailing list