[llvm] a2b5117 - [PowerPC] Update InputOps of Power10 SchedModel

Qiu Chaofan via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 7 07:46:31 PDT 2023


Author: Qiu Chaofan
Date: 2023-07-07T22:46:22+08:00
New Revision: a2b5117df75d1be8a65b2a86d5f75a59a8565fe6

URL: https://github.com/llvm/llvm-project/commit/a2b5117df75d1be8a65b2a86d5f75a59a8565fe6
DIFF: https://github.com/llvm/llvm-project/commit/a2b5117df75d1be8a65b2a86d5f75a59a8565fe6.diff

LOG: [PowerPC] Update InputOps of Power10 SchedModel

Count of input operands affect pipeline forwarding in scheduling model.
Previous Power10 model definition arranges some instructions into
incorrect groups, by counting the wrong number of input operands.

This patch updates the model, setting the input operands count correctly
by excluding irrelevant immediate operands and count memory operands of
load instructions correctly.

Reviewed By: shchenz

Differential Revision: https://reviews.llvm.org/D153842

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/P10InstrResources.td
    llvm/lib/Target/PowerPC/PPCScheduleP10.td
    llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
    llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
    llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
    llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
    llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
    llvm/test/CodeGen/PowerPC/vector-reduce-add.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 6fe23699e93e23..0827e528a80f46 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Automatically generated file, do not edit!
 //
-// This file defines the itinerary class data for the POWER10 processor.
+// This file defines instruction data for SchedModel of the POWER10 processor.
 //
 //===----------------------------------------------------------------------===//
 // 22 Cycles Binary Floating Point operations, 2 input operands
@@ -307,41 +307,32 @@ def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
     FSELD_rec, FSELS_rec
 )>;
 
-// 2 Cycles Branch operations, 0 input operands
-def : InstRW<[P10W_BR_2C, P10W_DISP_ANY],
-      (instrs
-    BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
-    BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
-    BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS
-)>;
-
 // 2 Cycles Branch operations, 1 input operands
 def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
       (instrs
     B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8,
     BA, TAILBA, TAILBA8,
-    BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
-    BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
-    BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
-)>;
-
-// 2 Cycles Branch operations, 3 input operands
-def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read],
-      (instrs
     BCCTR, BCCTR8, BCCTR8n, BCCTRn, gBCCTR,
-    BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL
+    BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL,
+    BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
+    BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
+    BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS,
+    BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
 )>;
 
-// 2 Cycles Branch operations, 4 input operands
-def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read, P10BR_Read],
+// 2 Cycles Branch operations, 2 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read],
       (instrs
+    BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
     BDNZA, BDNZAm, BDNZAp, BDZA, BDZAm, BDZAp, gBCA, gBCAat,
+    BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
     BDNZLA, BDNZLAm, BDNZLAp, BDZLA, BDZLAm, BDZLAp, gBCLA, gBCLAat
 )>;
 
 // 7 Cycles Crypto operations, 1 input operands
 def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read],
       (instrs
+    VGNB,
     VSBOX
 )>;
 
@@ -358,7 +349,6 @@ def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read, P10CY_Read],
     VCIPHERLAST,
     VCLZDM,
     VCTZDM,
-    VGNB,
     VNCIPHER,
     VNCIPHERLAST,
     VPDEPD,
@@ -384,29 +374,24 @@ def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read],
     XSCVSDQP,
     XSCVSQQP,
     XSCVUDQP,
-    XSCVUQQP
+    XSCVUQQP,
+    XSRQPI,
+    XSRQPIX,
+    XSRQPXP
 )>;
 
 // 13 Cycles Decimal Floating Point operations, 2 input operands
 def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
       (instrs
+    BCDSR_rec,
     XSADDQP,
     XSADDQPO,
     XSSUBQP,
     XSSUBQPO
 )>;
 
-// 13 Cycles Decimal Floating Point operations, 3 input operands
-def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
-      (instrs
-    BCDSR_rec,
-    XSRQPI,
-    XSRQPIX,
-    XSRQPXP
-)>;
-
 // 2-way crack instructions
-// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands
+// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 1 input operands
 def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY],
       (instrs
     HASHST, HASHST8,
@@ -439,8 +424,8 @@ def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
     XSNMSUBQPO
 )>;
 
-// 38 Cycles Decimal Floating Point operations, 2 input operands
-def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+// 38 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read],
       (instrs
     BCDCFSQ_rec
 )>;
@@ -594,20 +579,26 @@ def : InstRW<[P10W_DV_83C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
 // 5 Cycles Fixed-Point and BCD operations, 1 input operands
 def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read],
       (instrs
+    BCDCFN_rec,
+    BCDCFZ_rec,
     BCDCTN_rec,
+    BCDCTZ_rec,
+    BCDSETSGN_rec,
     VMUL10CUQ,
     VMUL10UQ,
-    XSXSIGQP
+    XSTSTDCQP,
+    XSXSIGQP,
+    XXGENPCVBM
 )>;
 
 // 5 Cycles Fixed-Point and BCD operations, 2 input operands
 def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
       (instrs
-    BCDCFN_rec,
-    BCDCFZ_rec,
+    BCDADD_rec,
     BCDCPSGN_rec,
-    BCDCTZ_rec,
-    BCDSETSGN_rec,
+    BCDS_rec,
+    BCDSUB_rec,
+    BCDTRUNC_rec,
     BCDUS_rec,
     BCDUTRUNC_rec,
     VADDCUQ,
@@ -623,18 +614,12 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
     XSCMPOQP,
     XSCMPUQP,
     XSMAXCQP,
-    XSMINCQP,
-    XSTSTDCQP,
-    XXGENPCVBM
+    XSMINCQP
 )>;
 
 // 5 Cycles Fixed-Point and BCD operations, 3 input operands
 def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
       (instrs
-    BCDADD_rec,
-    BCDS_rec,
-    BCDSUB_rec,
-    BCDTRUNC_rec,
     VADDECUQ,
     VADDEUQM,
     VSUBECUQ,
@@ -644,7 +629,7 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
 // 4 Cycles ALU2 operations, 0 input operands
 def : InstRW<[P10W_F2_4C, P10W_DISP_ANY],
       (instrs
-    TRAP, TW
+    MTVSRBMI
 )>;
 
 // 4 Cycles ALU2 operations, 1 input operands
@@ -660,9 +645,9 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
     CNTTZD_rec,
     CNTTZW, CNTTZW8,
     CNTTZW8_rec, CNTTZW_rec,
+    EXTSWSLI_32_64_rec, EXTSWSLI_rec,
     FTSQRT,
     MTVSRBM,
-    MTVSRBMI,
     MTVSRDM,
     MTVSRHM,
     MTVSRQM,
@@ -670,10 +655,18 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
     POPCNTB, POPCNTB8,
     POPCNTD,
     POPCNTW,
+    RLDIC_rec,
+    RLDICL_32_rec, RLDICL_rec,
+    RLDICR_rec,
+    RLWINM8_rec, RLWINM_rec,
     VCLZB,
     VCLZD,
     VCLZH,
     VCLZW,
+    VCNTMBB,
+    VCNTMBD,
+    VCNTMBH,
+    VCNTMBW,
     VCTZB,
     VCTZD,
     VCTZH,
@@ -694,27 +687,40 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
     VPOPCNTW,
     VPRTYBD,
     VPRTYBW,
+    VSHASIGMAD,
+    VSHASIGMAW,
     XSCVHPDP,
     XSCVSPDPN,
     XSTSQRTDP,
+    XSTSTDCDP,
+    XSTSTDCSP,
     XVCVHPSP,
     XVTLSBB,
     XVTSQRTDP,
-    XVTSQRTSP
+    XVTSQRTSP,
+    XVTSTDCDP,
+    XVTSTDCSP
 )>;
 
 // 4 Cycles ALU2 operations, 2 input operands
 def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
       (instrs
     CMPEQB,
-    EXTSWSLI_32_64_rec, EXTSWSLI_rec,
+    CMPRB, CMPRB8,
     FCMPOD, FCMPOS,
     FCMPUD, FCMPUS,
     FTDIV,
+    RLDCL_rec,
+    RLDCR_rec,
+    RLDIMI_rec,
+    RLWIMI8_rec, RLWIMI_rec,
+    RLWNM8_rec, RLWNM_rec,
     SLD_rec,
     SLW8_rec, SLW_rec,
     SRD_rec,
     SRW8_rec, SRW_rec,
+    TDI,
+    TWI,
     VABSDUB,
     VABSDUH,
     VABSDUW,
@@ -765,10 +771,6 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
     VCMPNEZW_rec,
     VCMPSQ,
     VCMPUQ,
-    VCNTMBB,
-    VCNTMBD,
-    VCNTMBH,
-    VCNTMBW,
     VMAXFP,
     VMINFP,
     VSUBCUW,
@@ -791,8 +793,6 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
     XSMINDP,
     XSMINJDP,
     XSTDIVDP,
-    XSTSTDCDP,
-    XSTSTDCSP,
     XVCMPEQDP,
     XVCMPEQDP_rec,
     XVCMPEQSP,
@@ -810,39 +810,24 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
     XVMINDP,
     XVMINSP,
     XVTDIVDP,
-    XVTDIVSP,
-    XVTSTDCDP,
-    XVTSTDCSP
+    XVTDIVSP
 )>;
 
 // 4 Cycles ALU2 operations, 3 input operands
 def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
       (instrs
-    CMPRB, CMPRB8,
-    RLDCL_rec,
-    RLDCR_rec,
-    RLDIC_rec,
-    RLDICL_32_rec, RLDICL_rec,
-    RLDICR_rec,
     TD,
-    TDI,
-    TWI,
-    VSHASIGMAD,
-    VSHASIGMAW
-)>;
-
-// 4 Cycles ALU2 operations, 4 input operands
-def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
-      (instrs
-    RLDIMI_rec,
-    RLWINM8_rec, RLWINM_rec,
-    RLWNM8_rec, RLWNM_rec
+    TRAP, TW
 )>;
 
-// 4 Cycles ALU2 operations, 5 input operands
-def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+// Single crack instructions
+// 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
       (instrs
-    RLWIMI8_rec, RLWIMI_rec
+    SRADI_rec,
+    SRAWI_rec,
+    TABORTDCI,
+    TABORTWCI
 )>;
 
 // Single crack instructions
@@ -850,19 +835,9 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10
 def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
       (instrs
     SRAD_rec,
-    SRADI_rec,
     SRAW_rec,
-    SRAWI_rec
-)>;
-
-// Single crack instructions
-// 4 Cycles ALU2 operations, 3 input operands
-def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
-      (instrs
     TABORTDC,
-    TABORTDCI,
-    TABORTWC,
-    TABORTWCI
+    TABORTWC
 )>;
 
 // 2-way crack instructions
@@ -900,32 +875,34 @@ def : InstRW<[P10W_FX_2C, P10W_DISP_ANY, P10FX_Read],
 // 3 Cycles ALU operations, 0 input operands
 def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
       (instrs
-    CR6SET, CREQV, CRSET,
     DSS, DSSALL,
     MCRXRX,
     MFCTR, MFCTR8,
     MFLR, MFLR8,
-    NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8,
-    VXOR, V_SET0, V_SET0B, V_SET0H,
-    XXLEQV, XXLEQVOnes,
-    XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz
+    WAIT
 )>;
 
 // 3 Cycles ALU operations, 1 input operands
 def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
       (instrs
-    ADDI, ADDI8, ADDIdtprelL32,  ADDItlsldLADDR32,  ADDItocL, LI, LI8,
-    ADDIS, ADDIS8,  ADDISdtprelHA32, ADDIStocHA,  ADDIStocHA8, LIS, LIS8,
+    ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
+    ADDIC, ADDIC8,
+    ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
     ADDME, ADDME8,
     ADDME8O, ADDMEO,
     ADDZE, ADDZE8,
     ADDZE8O, ADDZEO,
+    ANDI8_rec, ANDI_rec,
+    ANDIS8_rec, ANDIS_rec,
+    CMPDI, CMPWI,
+    CMPLDI, CMPLWI,
     EXTSB, EXTSB8, EXTSB8_32_64,
     EXTSB8_rec, EXTSB_rec,
     EXTSH, EXTSH8, EXTSH8_32_64,
     EXTSH8_rec, EXTSH_rec,
     EXTSW, EXTSW_32, EXTSW_32_64,
     EXTSW_32_64_rec, EXTSW_rec,
+    EXTSWSLI, EXTSWSLI_32_64,
     FABSD, FABSS,
     FMR,
     FNABSD, FNABSS,
@@ -941,11 +918,20 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
     NEG, NEG8,
     NEG8_rec, NEG_rec,
     NEG8O, NEGO,
+    NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8,
+    ORIS, ORIS8,
+    RLDIC,
+    RLDICL, RLDICL_32, RLDICL_32_64,
+    RLDICR, RLDICR_32,
+    RLWINM, RLWINM8,
     SETB, SETB8,
     SETBC, SETBC8,
     SETBCR, SETBCR8,
     SETNBC, SETNBC8,
     SETNBCR, SETNBCR8,
+    SRADI, SRADI_32,
+    SRAWI,
+    SUBFIC, SUBFIC8,
     SUBFME, SUBFME8,
     SUBFME8O, SUBFMEO,
     SUBFZE, SUBFZE8,
@@ -958,7 +944,8 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
     VEXTSW2D, VEXTSW2Ds,
     VNEGD,
     VNEGW,
-    WAIT,
+    XORI, XORI8,
+    XORIS, XORIS8,
     XSABSDP,
     XSABSQP,
     XSNABSDP, XSNABSDPs,
@@ -987,29 +974,27 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
     ADD4_rec, ADD8_rec,
     ADDE, ADDE8,
     ADDE8O, ADDEO,
-    ADDIC, ADDIC8,
+    ADDEX, ADDEX8,
     ADD4O, ADD8O,
     AND, AND8,
     AND8_rec, AND_rec,
     ANDC, ANDC8,
     ANDC8_rec, ANDC_rec,
-    ANDI8_rec, ANDI_rec,
-    ANDIS8_rec, ANDIS_rec,
     CMPD, CMPW,
     CMPB, CMPB8,
-    CMPDI, CMPWI,
     CMPLD, CMPLW,
-    CMPLDI, CMPLWI,
     CRAND,
     CRANDC,
+    CR6SET, CREQV, CRSET,
     CRNAND,
     CRNOR,
     CROR,
     CRORC,
     CR6UNSET, CRUNSET, CRXOR,
+    DST, DST64, DSTT, DSTT64,
+    DSTST, DSTST64, DSTSTT, DSTSTT64,
     EQV, EQV8,
     EQV8_rec, EQV_rec,
-    EXTSWSLI, EXTSWSLI_32_64,
     FCPSGND, FCPSGNS,
     NAND, NAND8,
     NAND8_rec, NAND_rec,
@@ -1019,20 +1004,21 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
     OR8_rec, OR_rec,
     ORC, ORC8,
     ORC8_rec, ORC_rec,
-    ORIS, ORIS8,
+    RLDCL,
+    RLDCR,
+    RLDIMI,
+    RLWIMI, RLWIMI8,
+    RLWNM, RLWNM8,
     SLD,
     SLW, SLW8,
     SRAD,
-    SRADI, SRADI_32,
     SRAW,
-    SRAWI,
     SRD,
     SRW, SRW8,
     SUBF, SUBF8,
     SUBF8_rec, SUBF_rec,
     SUBFE, SUBFE8,
     SUBFE8O, SUBFEO,
-    SUBFIC, SUBFIC8,
     SUBF8O, SUBFO,
     VADDUBM,
     VADDUDM,
@@ -1103,10 +1089,9 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
     VSUBUDM,
     VSUBUHM,
     VSUBUWM,
+    VXOR, V_SET0, V_SET0B, V_SET0H,
     XOR, XOR8,
     XOR8_rec, XOR_rec,
-    XORI, XORI8,
-    XORIS, XORIS8,
     XSCPSGNDP,
     XSCPSGNQP,
     XSIEXPDP,
@@ -1117,69 +1102,52 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
     XVIEXPSP,
     XXLAND,
     XXLANDC,
+    XXLEQV, XXLEQVOnes,
     XXLNAND,
     XXLNOR,
     XXLOR, XXLORf,
-    XXLORC
+    XXLORC,
+    XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz
 )>;
 
 // 3 Cycles ALU operations, 3 input operands
 def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
       (instrs
-    ADDEX, ADDEX8,
-    DST, DST64, DSTT, DSTT64,
-    DSTST, DSTST64, DSTSTT, DSTSTT64,
     ISEL, ISEL8,
-    RLDCL,
-    RLDCR,
-    RLDIC,
-    RLDICL, RLDICL_32, RLDICL_32_64,
-    RLDICR, RLDICR_32,
     VRLDMI,
     VRLWMI,
     VSEL,
     XXSEL
 )>;
 
-// 3 Cycles ALU operations, 4 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
-      (instrs
-    RLDIMI,
-    RLWINM, RLWINM8,
-    RLWNM, RLWNM8
-)>;
-
-// 3 Cycles ALU operations, 5 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
-      (instrs
-    RLWIMI, RLWIMI8
-)>;
-
 // Single crack instructions
 // 3 Cycles ALU operations, 0 input operands
 def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
       (instrs
     MFFS,
     MFFS_rec,
+    MFFSCDRNI,
+    MFFSCRNI,
     MFFSL,
     MFVSCR,
-    TRECHKPT
+    MTFSB0,
+    TBEGIN,
+    TRECHKPT,
+    TSR
 )>;
 
 // Single crack instructions
 // 3 Cycles ALU operations, 1 input operands
 def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
       (instrs
+    ADDIC_rec,
     ADDME8_rec, ADDME_rec,
     ADDME8O_rec, ADDMEO_rec,
     ADDZE8_rec, ADDZE_rec,
     ADDZE8O_rec, ADDZEO_rec,
     MCRFS,
     MFFSCDRN,
-    MFFSCDRNI,
     MFFSCRN,
-    MFFSCRNI,
-    MTFSB0,
     MTVSCR,
     NEG8O_rec, NEGO_rec,
     SUBFME8_rec, SUBFME_rec,
@@ -1187,9 +1155,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
     SUBFZE8_rec, SUBFZE_rec,
     SUBFZE8O_rec, SUBFZEO_rec,
     TABORT,
-    TBEGIN,
-    TRECLAIM,
-    TSR
+    TRECLAIM
 )>;
 
 // Single crack instructions
@@ -1198,7 +1164,6 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read, P10FX_Read]
       (instrs
     ADDE8_rec, ADDE_rec,
     ADDE8O_rec, ADDEO_rec,
-    ADDIC_rec,
     ADD4O_rec, ADD8O_rec,
     SUBFE8_rec, SUBFE_rec,
     SUBFE8O_rec, SUBFEO_rec,
@@ -1218,7 +1183,12 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
       (instrs
     HRFID,
     MFFSCE,
+    MTFSB1,
+    MTFSFI, MTFSFIb,
+    MTFSFI_rec,
+    RFEBB,
     RFID,
+    SC,
     STOP
 )>;
 
@@ -1230,9 +1200,8 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read]
     FMR_rec,
     FNABSD_rec, FNABSS_rec,
     FNEGD_rec, FNEGS_rec,
-    MTFSB1,
-    RFEBB,
-    SC
+    MTFSF, MTFSFb,
+    MTFSF_rec
 )>;
 
 // 2-way crack instructions
@@ -1243,27 +1212,11 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read,
     ADDC8_rec, ADDC_rec,
     ADDC8O, ADDCO,
     FCPSGND_rec, FCPSGNS_rec,
-    MTFSF, MTFSFb,
-    MTFSFI, MTFSFIb,
     SUBFC, SUBFC8,
     SUBFC8_rec, SUBFC_rec,
     SUBFC8O, SUBFCO
 )>;
 
-// 2-way crack instructions
-// 3 Cycles ALU operations, and 3 Cycles ALU operations, 3 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
-      (instrs
-    MTFSFI_rec
-)>;
-
-// 2-way crack instructions
-// 3 Cycles ALU operations, and 3 Cycles ALU operations, 4 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
-      (instrs
-    MTFSF_rec
-)>;
-
 // 4-way crack instructions
 // 3 Cycles ALU operations, 3 Cycles ALU operations, 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
 def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
@@ -1283,40 +1236,61 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
 )>;
 
 // 2-way crack instructions
-// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read, P10FX_Read],
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read],
       (instrs
     MTCRF, MTCRF8
 )>;
 
+// 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY],
+      (instrs
+    LBZ, LBZ8,
+    LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI,  LDtocL, SPILLTOVSR_LD,
+    DFLOADf32, DFLOADf64, LFD,
+    LHA, LHA8,
+    LHZ, LHZ8,
+    LWA, LWA_32,
+    LWZ, LWZ8, LWZtoc, LWZtocL,
+    LXSD,
+    LXV
+)>;
+
 // 6 Cycles Load operations, 1 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
       (instrs
-    LBZ, LBZ8,
-    LD,  LDtoc,  LDtocBA,  LDtocCPT,  LDtocJTI,  LDtocL, SPILLTOVSR_LD,
+    LXVL,
+    LXVLL
+)>;
+
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+      (instrs
+    DCBT,
+    DCBTST,
+    ICBT,
+    LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
     LDBRX,
-     DFLOADf32, DFLOADf64, LFD,
+    LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
+    LFDX, LFDXTLS, LFDXTLS_, XFLOADf32, XFLOADf64,
     LFIWAX, LIWAX,
     LFIWZX, LIWZX,
-    LHA, LHA8,
+    LHAX, LHAX8, LHAXTLS, LHAXTLS_, LHAXTLS_32,
     LHBRX, LHBRX8,
-    LHZ, LHZ8,
+    LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
     LVEBX,
     LVEHX,
     LVEWX,
     LVX,
     LVXL,
-    LWA, LWA_32,
     LWAX, LWAXTLS, LWAXTLS_, LWAXTLS_32, LWAX_32,
     LWBRX, LWBRX8,
-    LWZ, LWZ8,  LWZtoc, LWZtocL,
-    LXSD,
+    LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
     LXSDX,
     LXSIBZX,
     LXSIHZX,
     LXSIWAX,
     LXSIWZX,
-    LXV,
     LXVB16X,
     LXVD2X,
     LXVDSX,
@@ -1330,24 +1304,8 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
     LXVX
 )>;
 
-// 6 Cycles Load operations, 2 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
-      (instrs
-    DCBT,
-    DCBTST,
-    ICBT,
-    LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
-    LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
-    LFDX, LFDXTLS, LFDXTLS_,  XFLOADf32, XFLOADf64,
-    LHAX, LHAX8, LHAXTLS, LHAXTLS_, LHAXTLS_32,
-    LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
-    LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
-    LXVL,
-    LXVLL
-)>;
-
 // 2-way crack instructions
-// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands
+// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 1 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
       (instrs
     HASHCHK, HASHCHK8,
@@ -1358,6 +1316,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
 // 6 Cycles Load operations, 0 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY],
       (instrs
+    DARN,
     SLBIA
 )>;
 
@@ -1365,11 +1324,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY],
 // 6 Cycles Load operations, 1 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read],
       (instrs
-    DARN,
-    LBARX, LBARXL,
-    LDARX, LDARXL,
-    LHARX, LHARXL,
-    LWARX, LWARXL,
+    MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv,
     SLBFEE_rec,
     SLBIE,
     SLBMFEE,
@@ -1380,48 +1335,57 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read],
 // 6 Cycles Load operations, 2 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
       (instrs
+    LBARX, LBARXL,
     LBZCIX,
+    LDARX, LDARXL,
     LDCIX,
+    LHARX, LHARXL,
     LHZCIX,
-    LWZCIX,
-    MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv
+    LWARX, LWARXL,
+    LWZCIX
 )>;
 
 // Expand instructions
-// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY],
       (instrs
     LMW
 )>;
 
 // Expand instructions
-// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
       (instrs
     LSWI
 )>;
 
 // 2-way crack instructions
-// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY],
       (instrs
     LBZU, LBZU8,
-    LBZUX, LBZUX8,
     LDU,
-    LDUX,
     LFDU,
-    LFDUX,
     LHAU, LHAU8,
-    LHAUX, LHAUX8,
     LHZU, LHZU8,
+    LWZU, LWZU8
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY],
+      (instrs
+    LBZUX, LBZUX8,
+    LDUX,
+    LFDUX,
+    LHAUX, LHAUX8,
     LHZUX, LHZUX8,
     LWAUX,
-    LWZU, LWZU8,
     LWZUX, LWZUX8
 )>;
 
-// 6 Cycles Load operations, 1 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+// 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR],
       (instrs
     PLBZ, PLBZ8, PLBZ8pc, PLBZpc,
     PLD, PLDpc,
@@ -1438,26 +1402,32 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
 )>;
 
 // 2-way crack instructions
-// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 1 input operands
+// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 0 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
       (instrs
     LFS,
-    LXSSP,
-    LXSSPX
+    LXSSP
 )>;
 
 // 2-way crack instructions
 // 6 Cycles Load operations, and 4 Cycles ALU2 operations, 2 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
       (instrs
-    LFSX, LFSXTLS, LFSXTLS_
+    LFSX, LFSXTLS, LFSXTLS_,
+    LXSSPX
 )>;
 
 // 4-way crack instructions
-// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
+      (instrs
+    LFSU
+)>;
+
+// 4-way crack instructions
+// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 2 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
       (instrs
-    LFSU,
     LFSUX
 )>;
 
@@ -1476,10 +1446,16 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read
 )>;
 
 // 2-way crack instructions
-// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
+      (instrs
+    LXVP
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
 def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
       (instrs
-    LXVP,
     LXVPX
 )>;
 
@@ -1535,34 +1511,21 @@ def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read, P10MM_Read],
     XVI8GER4SPP
 )>;
 
-// 10 Cycles SIMD Matrix Multiply Engine operations, 4 input operands
-def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
-      (instrs
-    PMXVF32GER,
-    PMXVF64GER
-)>;
-
-// 10 Cycles SIMD Matrix Multiply Engine operations, 5 input operands
-def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+// 10 Cycles SIMD Matrix Multiply Engine operations, 2 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read],
       (instrs
     PMXVBF16GER2,
     PMXVF16GER2,
-    PMXVF32GERNN,
-    PMXVF32GERNP,
-    PMXVF32GERPN,
-    PMXVF32GERPP,
-    PMXVF64GERNN,
-    PMXVF64GERNP,
-    PMXVF64GERPN,
-    PMXVF64GERPP,
+    PMXVF32GER,
+    PMXVF64GER,
     PMXVI16GER2,
     PMXVI16GER2S,
     PMXVI4GER8,
     PMXVI8GER4
 )>;
 
-// 10 Cycles SIMD Matrix Multiply Engine operations, 6 input operands
-def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read],
       (instrs
     PMXVBF16GER2NN,
     PMXVBF16GER2NP,
@@ -1572,6 +1535,14 @@ def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P
     PMXVF16GER2NP,
     PMXVF16GER2PN,
     PMXVF16GER2PP,
+    PMXVF32GERNN,
+    PMXVF32GERNP,
+    PMXVF32GERPN,
+    PMXVF32GERPP,
+    PMXVF64GERNN,
+    PMXVF64GERNP,
+    PMXVF64GERPN,
+    PMXVF64GERPP,
     PMXVI16GER2PP,
     PMXVI16GER2SPP,
     PMXVI4GER8PP,
@@ -1593,6 +1564,12 @@ def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C, P10W_MM_10C, P10W_DISP_PA
     XXMFACC
 )>;
 
+// 5 Cycles GPR Multiply operations, 1 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read],
+      (instrs
+    MULLI, MULLI8
+)>;
+
 // 5 Cycles GPR Multiply operations, 2 input operands
 def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read],
       (instrs
@@ -1602,7 +1579,6 @@ def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read],
     MULHWU,
     MULLD,
     MULLDO,
-    MULLI, MULLI8,
     MULLW,
     MULLWO,
     VMULHSD,
@@ -1635,7 +1611,11 @@ def : InstRW<[P10W_MU_5C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
 // 4 Cycles Permute operations, 0 input operands
 def : InstRW<[P10W_PM_4C, P10W_DISP_ANY],
       (instrs
-    VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH
+    LXVKQ,
+    VSPLTISB,
+    VSPLTISH,
+    VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH,
+    XXSPLTIB
 )>;
 
 // 4 Cycles Permute operations, 1 input operands
@@ -1644,17 +1624,21 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
     BRD,
     BRH, BRH8,
     BRW, BRW8,
-    LVSL,
-    LVSR,
-    LXVKQ,
     MFVSRLD,
     MTVSRWS,
     VCLZLSBB,
     VCTZLSBB,
+    VEXTRACTD,
+    VEXTRACTUB,
+    VEXTRACTUH,
+    VEXTRACTUW,
     VGBBD,
+    VINSERTD,
+    VINSERTW,
     VPRTYBQ,
-    VSPLTISB,
-    VSPLTISH,
+    VSPLTB, VSPLTBs,
+    VSPLTH, VSPLTHs,
+    VSPLTW,
     VSTRIBL,
     VSTRIBR,
     VSTRIHL,
@@ -1672,30 +1656,34 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
     XXBRH,
     XXBRQ,
     XXBRW,
-    XXSPLTIB
+    XXEXTRACTUW,
+    XXGENPCVDM,
+    XXGENPCVHM,
+    XXGENPCVWM,
+    XXSPLTW, XXSPLTWs
 )>;
 
 // 4 Cycles Permute operations, 2 input operands
 def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read],
       (instrs
     BPERMD,
+    LVSL,
+    LVSR,
     MTVSRDD,
     VBPERMD,
     VBPERMQ,
     VCLRLB,
     VCLRRB,
-    VEXTRACTD,
-    VEXTRACTUB,
-    VEXTRACTUH,
-    VEXTRACTUW,
     VEXTUBLX,
     VEXTUBRX,
     VEXTUHLX,
     VEXTUHRX,
     VEXTUWLX,
     VEXTUWRX,
-    VINSERTD,
-    VINSERTW,
+    VINSD,
+    VINSERTB,
+    VINSERTH,
+    VINSW,
     VMRGHB,
     VMRGHH,
     VMRGHW,
@@ -1716,23 +1704,19 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read],
     VPKUWUM,
     VPKUWUS,
     VSL,
+    VSLDBI,
+    VSLDOI,
     VSLO,
     VSLV,
-    VSPLTB, VSPLTBs,
-    VSPLTH, VSPLTHs,
-    VSPLTW,
     VSR,
+    VSRDBI,
     VSRO,
     VSRV,
-    XXEXTRACTUW,
-    XXGENPCVDM,
-    XXGENPCVHM,
-    XXGENPCVWM,
+    XXINSERTW,
     XXMRGHW,
     XXMRGLW,
     XXPERMDI, XXPERMDIs,
-    XXSLDWI, XXSLDWIs,
-    XXSPLTW, XXSPLTWs
+    XXSLDWI, XXSLDWIs
 )>;
 
 // 4 Cycles Permute operations, 3 input operands
@@ -1750,16 +1734,12 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read],
     VINSBRX,
     VINSBVLX,
     VINSBVRX,
-    VINSD,
     VINSDLX,
     VINSDRX,
-    VINSERTB,
-    VINSERTH,
     VINSHLX,
     VINSHRX,
     VINSHVLX,
     VINSHVRX,
-    VINSW,
     VINSWLX,
     VINSWRX,
     VINSWVLX,
@@ -1767,10 +1747,6 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read],
     VPERM,
     VPERMR,
     VPERMXOR,
-    VSLDBI,
-    VSLDOI,
-    VSRDBI,
-    XXINSERTW,
     XXPERM,
     XXPERMR
 )>;
@@ -1782,13 +1758,19 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_EVEN, P10W_vMU_7C, P10W_DISP_ANY],
     VSUMSWS
 )>;
 
-// 4 Cycles Permute operations, 1 input operands
-def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read],
+// 4 Cycles Permute operations, 0 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR],
       (instrs
     XXSPLTIDP,
     XXSPLTIW
 )>;
 
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read],
+      (instrs
+    XXSPLTI32DX
+)>;
+
 // 4 Cycles Permute operations, 3 input operands
 def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read],
       (instrs
@@ -1796,12 +1778,6 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read],
     XXBLENDVD,
     XXBLENDVH,
     XXBLENDVW,
-    XXSPLTI32DX
-)>;
-
-// 4 Cycles Permute operations, 4 input operands
-def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P10PM_Read],
-      (instrs
     XXEVAL,
     XXPERMX
 )>;
@@ -1809,51 +1785,65 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P1
 // 3 Cycles Store operations, 1 input operands
 def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read],
       (instrs
-    DCBST,
-    DCBZ,
-    ICBI
+    PSTXVP, PSTXVPpc,
+    STB, STB8,
+    STBU, STBU8,
+    SPILLTOVSR_ST, STD,
+    STDU,
+    DFSTOREf32, DFSTOREf64, STFD,
+    STFDU,
+    STFS,
+    STFSU,
+    STH, STH8,
+    STHU, STHU8,
+    STW, STW8,
+    STWU, STWU8,
+    STXSD,
+    STXSSP,
+    STXV
 )>;
 
 // 3 Cycles Store operations, 2 input operands
 def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
       (instrs
+    CP_COPY, CP_COPY8,
     DCBF,
-    PSTXVP, PSTXVPpc,
-    STB, STB8,
-    STBU, STBU8,
+    DCBST,
+    DCBZ,
+    ICBI,
+    STXVL,
+    STXVLL
+)>;
+
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+      (instrs
     STBUX, STBUX8,
-    SPILLTOVSR_ST, STD,
+    STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
     STDBRX,
-    STDU,
     STDUX,
-     DFSTOREf32, DFSTOREf64, STFD,
-    STFDU,
+    SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
     STFDUX,
+    STFDX, STFDXTLS, STFDXTLS_,
     STFIWX, STIWX,
-    STFS,
-    STFSU,
     STFSUX,
-    STH, STH8,
+    STFSX, STFSXTLS, STFSXTLS_,
     STHBRX,
-    STHU, STHU8,
     STHUX, STHUX8,
+    STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
     STVEBX,
     STVEHX,
     STVEWX,
     STVX,
     STVXL,
-    STW, STW8,
     STWBRX,
-    STWU, STWU8,
     STWUX, STWUX8,
-    STXSD,
+    STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
     STXSDX,
     STXSIBX, STXSIBXv,
     STXSIHX, STXSIHXv,
     STXSIWX,
-    STXSSP,
     STXSSPX,
-    STXV,
     STXVB16X,
     STXVD2X,
     STXVH8X,
@@ -1865,20 +1855,6 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
     STXVX
 )>;
 
-// 3 Cycles Store operations, 3 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
-      (instrs
-    CP_COPY, CP_COPY8,
-    STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
-    SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
-    STFDX, STFDXTLS, STFDXTLS_,
-    STFSX, STFSXTLS, STFSXTLS_,
-    STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
-    STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
-    STXVL,
-    STXVLL
-)>;
-
 // Single crack instructions
 // 3 Cycles Store operations, 0 input operands
 def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
@@ -1887,25 +1863,16 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
     MSGSYNC,
     SLBSYNC,
     TCHECK,
+    TEND,
     TLBSYNC
 )>;
 
-// Single crack instructions
-// 3 Cycles Store operations, 1 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read],
-      (instrs
-    TEND
-)>;
-
 // Single crack instructions
 // 3 Cycles Store operations, 2 input operands
 def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
       (instrs
+    CP_PASTE8_rec, CP_PASTE_rec,
     SLBIEG,
-    STBCX,
-    STDCX,
-    STHCX,
-    STWCX,
     TLBIE
 )>;
 
@@ -1913,29 +1880,26 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read]
 // 3 Cycles Store operations, 3 input operands
 def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
       (instrs
-    CP_PASTE8_rec, CP_PASTE_rec,
     STBCIX,
+    STBCX,
     STDCIX,
+    STDCX,
     STHCIX,
-    STWCIX
+    STHCX,
+    STWCIX,
+    STWCX
 )>;
 
 // 2-way crack instructions
 // 3 Cycles Store operations, and 3 Cycles ALU operations, 0 input operands
 def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
       (instrs
-    ISYNC
-)>;
-
-// 2-way crack instructions
-// 3 Cycles Store operations, and 3 Cycles ALU operations, 1 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
-      (instrs
+    ISYNC,
     SYNC
 )>;
 
 // Expand instructions
-// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 2 input operands
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 1 input operands
 def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
       (instrs
     LDAT,
@@ -1943,7 +1907,7 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C,
 )>;
 
 // 4-way crack instructions
-// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
 def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
       (instrs
     STDAT,
@@ -1951,21 +1915,21 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C,
 )>;
 
 // Expand instructions
-// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read],
       (instrs
     STMW
 )>;
 
 // Expand instructions
-// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
       (instrs
     STSWI
 )>;
 
-// 3 Cycles Store operations, 2 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read],
       (instrs
     PSTB, PSTB8, PSTB8pc, PSTBpc,
     PSTD, PSTDpc,
@@ -1979,10 +1943,16 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read],
 )>;
 
 // 2-way crack instructions
-// 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, and 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read],
+      (instrs
+    STXVP
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read, P10ST_Read],
       (instrs
-    STXVP,
     STXVPX
 )>;
 
@@ -2024,27 +1994,21 @@ def : InstRW<[P10W_SX, P10W_DISP_ANY],
 def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
       (instrs
     CLRBHRB,
-    MFMSR
+    MFBHRBE,
+    MFMSR,
+    MFTB
 )>;
 
 // Single crack instructions
 // 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
 def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read],
       (instrs
-    MFTB
-)>;
-
-// Single crack instructions
-// 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
-def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read, P10SX_Read],
-      (instrs
-    MFBHRBE,
     MTMSR,
     MTMSRD
 )>;
 
 // 2-way crack instructions
-// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 0 input operands
 def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
       (instrs
     ADDPCIS

diff  --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
index f89ef735a36711..25be37718af241 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP10.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
@@ -25,14 +25,8 @@ def P10vMU_Read : SchedRead;
 
 def P10Model : SchedMachineModel {
   let IssueWidth = 8;
-
-  // TODO - Need to be updated according to P10 UM.
   let MicroOpBufferSize = 44;
-
-  // TODO - tune this on real HW once it arrives. For now, we will use the same
-  // value as we do on P9.
   let LoopMicroOpBufferSize = 60;
-
   let CompleteModel = 1;
 
   // Do not support SPE (Signal Procesing Engine) on Power 10.

diff  --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
index 0ddf0330e90648..0ef03630095434 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -120,8 +120,8 @@ define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
 ;
 ; CHECK-LINUXBE-LABEL: vec_xst_trunc_ss:
 ; CHECK-LINUXBE:       # %bb.0: # %entry
-; CHECK-LINUXBE-NEXT:    sldi r3, r5, 1
 ; CHECK-LINUXBE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-LINUXBE-NEXT:    sldi r3, r5, 1
 ; CHECK-LINUXBE-NEXT:    stxsihx v2, r6, r3
 ; CHECK-LINUXBE-NEXT:    blr
 ;
@@ -137,8 +137,8 @@ define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
 ;
 ; CHECK-AIXBE-LABEL: vec_xst_trunc_ss:
 ; CHECK-AIXBE:       # %bb.0: # %entry
-; CHECK-AIXBE-NEXT:    sldi r3, r3, 1
 ; CHECK-AIXBE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-AIXBE-NEXT:    sldi r3, r3, 1
 ; CHECK-AIXBE-NEXT:    stxsihx v2, r4, r3
 ; CHECK-AIXBE-NEXT:    blr
 entry:
@@ -158,8 +158,8 @@ define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
 ;
 ; CHECK-LINUXBE-LABEL: vec_xst_trunc_us:
 ; CHECK-LINUXBE:       # %bb.0: # %entry
-; CHECK-LINUXBE-NEXT:    sldi r3, r5, 1
 ; CHECK-LINUXBE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-LINUXBE-NEXT:    sldi r3, r5, 1
 ; CHECK-LINUXBE-NEXT:    stxsihx v2, r6, r3
 ; CHECK-LINUXBE-NEXT:    blr
 ;
@@ -175,8 +175,8 @@ define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
 ;
 ; CHECK-AIXBE-LABEL: vec_xst_trunc_us:
 ; CHECK-AIXBE:       # %bb.0: # %entry
-; CHECK-AIXBE-NEXT:    sldi r3, r3, 1
 ; CHECK-AIXBE-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-AIXBE-NEXT:    sldi r3, r3, 1
 ; CHECK-AIXBE-NEXT:    stxsihx v2, r4, r3
 ; CHECK-AIXBE-NEXT:    blr
 entry:
@@ -196,8 +196,8 @@ define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
 ;
 ; CHECK-LINUXBE-LABEL: vec_xst_trunc_si:
 ; CHECK-LINUXBE:       # %bb.0: # %entry
-; CHECK-LINUXBE-NEXT:    sldi r3, r5, 2
 ; CHECK-LINUXBE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-LINUXBE-NEXT:    sldi r3, r5, 2
 ; CHECK-LINUXBE-NEXT:    stfiwx f0, r6, r3
 ; CHECK-LINUXBE-NEXT:    blr
 ;
@@ -213,8 +213,8 @@ define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
 ;
 ; CHECK-AIXBE-LABEL: vec_xst_trunc_si:
 ; CHECK-AIXBE:       # %bb.0: # %entry
-; CHECK-AIXBE-NEXT:    sldi r3, r3, 2
 ; CHECK-AIXBE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-AIXBE-NEXT:    sldi r3, r3, 2
 ; CHECK-AIXBE-NEXT:    stfiwx f0, r4, r3
 ; CHECK-AIXBE-NEXT:    blr
 entry:
@@ -234,8 +234,8 @@ define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
 ;
 ; CHECK-LINUXBE-LABEL: vec_xst_trunc_ui:
 ; CHECK-LINUXBE:       # %bb.0: # %entry
-; CHECK-LINUXBE-NEXT:    sldi r3, r5, 2
 ; CHECK-LINUXBE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-LINUXBE-NEXT:    sldi r3, r5, 2
 ; CHECK-LINUXBE-NEXT:    stfiwx f0, r6, r3
 ; CHECK-LINUXBE-NEXT:    blr
 ;
@@ -251,8 +251,8 @@ define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
 ;
 ; CHECK-AIXBE-LABEL: vec_xst_trunc_ui:
 ; CHECK-AIXBE:       # %bb.0: # %entry
-; CHECK-AIXBE-NEXT:    sldi r3, r3, 2
 ; CHECK-AIXBE-NEXT:    xxsldwi vs0, v2, v2, 3
+; CHECK-AIXBE-NEXT:    sldi r3, r3, 2
 ; CHECK-AIXBE-NEXT:    stfiwx f0, r4, r3
 ; CHECK-AIXBE-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index f2b6c649343127..f21e1d4f296fa0 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -454,8 +454,8 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; LE-PAIRED-NEXT:    add r6, r5, r3
 ; LE-PAIRED-NEXT:    lxvx v3, r5, r3
 ; LE-PAIRED-NEXT:    sldi r3, r4, 5
-; LE-PAIRED-NEXT:    lxv v2, 16(r6)
 ; LE-PAIRED-NEXT:    add r4, r5, r3
+; LE-PAIRED-NEXT:    lxv v2, 16(r6)
 ; LE-PAIRED-NEXT:    stxvx v3, r5, r3
 ; LE-PAIRED-NEXT:    stxv v2, 16(r4)
 ; LE-PAIRED-NEXT:    blr
@@ -468,8 +468,8 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-NEXT:    add r6, r5, r3
 ; BE-PAIRED-NEXT:    lxvx v2, r5, r3
 ; BE-PAIRED-NEXT:    sldi r3, r4, 5
-; BE-PAIRED-NEXT:    lxv v3, 16(r6)
 ; BE-PAIRED-NEXT:    add r4, r5, r3
+; BE-PAIRED-NEXT:    lxv v3, 16(r6)
 ; BE-PAIRED-NEXT:    stxvx v2, r5, r3
 ; BE-PAIRED-NEXT:    stxv v3, 16(r4)
 ; BE-PAIRED-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
index f1a05c105099cb..3a742588d23b59 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
@@ -20,8 +20,8 @@
 define dso_local signext i32 @jumptable(i32 signext %param) {
 ; CHECK-R-LABEL: jumptable:
 ; CHECK-R:       # %bb.1: # %entry
-; CHECK-R-NEXT:    paddi r5, 0, .LJTI0_0 at PCREL, 1
 ; CHECK-R-NEXT:    rldic r4, r4
+; CHECK-R-NEXT:    paddi r5, 0, .LJTI0_0 at PCREL, 1
 ; CHECK-R-NEXT:    lwax r4, r4, r5
 ; CHECK-R-NEXT:    add r4, r4, r5
 ; CHECK-R-NEXT:    mtctr r4
@@ -35,8 +35,8 @@ define dso_local signext i32 @jumptable(i32 signext %param) {
 ; CHECK-A-LE-NEXT:    bctr
 ; CHECK-A-BE-LABEL: jumptable:
 ; CHECK-A-BE:       # %bb.1: # %entry
-; CHECK-A-BE-NEXT:    paddi r5, 0, .LJTI0_0 at PCREL, 1
 ; CHECK-A-BE-NEXT:    rldic r4, r4
+; CHECK-A-BE-NEXT:    paddi r5, 0, .LJTI0_0 at PCREL, 1
 ; CHECK-A-BE-NEXT:    lwax r4, r4, r5
 ; CHECK-A-BE-NEXT:    mtctr r4
 ; CHECK-A-BE-NEXT:    bctr

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
index 99ad7c8c93bb26..14c5eaa5532398 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
@@ -108,8 +108,8 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
 ; CHECK-P10-CMP-LE-NEXT:    lbz r3, 0(r3)
 ; CHECK-P10-CMP-LE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
 ; CHECK-P10-CMP-LE-NEXT:    clrlwi r3, r3, 31
-; CHECK-P10-CMP-LE-NEXT:    clrlwi r4, r4, 31
 ; CHECK-P10-CMP-LE-NEXT:    clrldi r3, r3, 32
+; CHECK-P10-CMP-LE-NEXT:    clrlwi r4, r4, 31
 ; CHECK-P10-CMP-LE-NEXT:    clrldi r4, r4, 32
 ; CHECK-P10-CMP-LE-NEXT:    sub r3, r3, r4
 ; CHECK-P10-CMP-LE-NEXT:    rldicl r3, r3, 1, 63

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
index 030dfde8f3b712..ea0997674630da 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
@@ -110,8 +110,8 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
 ; CHECK-P10-CMP-LE-NEXT:    lbz r3, 0(r3)
 ; CHECK-P10-CMP-LE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
 ; CHECK-P10-CMP-LE-NEXT:    clrlwi r3, r3, 31
-; CHECK-P10-CMP-LE-NEXT:    clrlwi r4, r4, 31
 ; CHECK-P10-CMP-LE-NEXT:    clrldi r3, r3, 32
+; CHECK-P10-CMP-LE-NEXT:    clrlwi r4, r4, 31
 ; CHECK-P10-CMP-LE-NEXT:    clrldi r4, r4, 32
 ; CHECK-P10-CMP-LE-NEXT:    sub r3, r4, r3
 ; CHECK-P10-CMP-LE-NEXT:    rldicl r3, r3, 1, 63

diff  --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
index 2ba113f59da3c4..0892210fc74432 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
@@ -1083,16 +1083,16 @@ define dso_local signext i32 @v16i8tov16i32_sign(<16 x i8> %a) local_unnamed_add
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI17_1 at toc@l
 ; PWR10BE-NEXT:    lxv v4, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI17_2 at toc@ha
-; PWR10BE-NEXT:    addi r3, r3, .LCPI17_2 at toc@l
 ; PWR10BE-NEXT:    vperm v3, v2, v2, v3
+; PWR10BE-NEXT:    addi r3, r3, .LCPI17_2 at toc@l
+; PWR10BE-NEXT:    vextsb2w v3, v3
 ; PWR10BE-NEXT:    lxv v5, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI17_3 at toc@ha
-; PWR10BE-NEXT:    vextsb2w v3, v3
-; PWR10BE-NEXT:    addi r3, r3, .LCPI17_3 at toc@l
 ; PWR10BE-NEXT:    vperm v4, v2, v2, v4
+; PWR10BE-NEXT:    addi r3, r3, .LCPI17_3 at toc@l
+; PWR10BE-NEXT:    vextsb2w v4, v4
 ; PWR10BE-NEXT:    lxv v0, 0(r3)
 ; PWR10BE-NEXT:    li r3, 0
-; PWR10BE-NEXT:    vextsb2w v4, v4
 ; PWR10BE-NEXT:    vperm v5, v2, v2, v5
 ; PWR10BE-NEXT:    vadduwm v3, v4, v3
 ; PWR10BE-NEXT:    vextsb2w v5, v5
@@ -1206,15 +1206,15 @@ define dso_local zeroext i32 @v16i8tov16i32_zero(<16 x i8> %a) local_unnamed_add
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI18_1 at toc@l
 ; PWR10BE-NEXT:    lxv v5, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI18_2 at toc@ha
-; PWR10BE-NEXT:    addi r3, r3, .LCPI18_2 at toc@l
 ; PWR10BE-NEXT:    vperm v3, v4, v2, v3
+; PWR10BE-NEXT:    addi r3, r3, .LCPI18_2 at toc@l
 ; PWR10BE-NEXT:    lxv v0, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI18_3 at toc@ha
-; PWR10BE-NEXT:    addi r3, r3, .LCPI18_3 at toc@l
 ; PWR10BE-NEXT:    vperm v5, v4, v2, v5
+; PWR10BE-NEXT:    addi r3, r3, .LCPI18_3 at toc@l
+; PWR10BE-NEXT:    vadduwm v3, v5, v3
 ; PWR10BE-NEXT:    lxv v1, 0(r3)
 ; PWR10BE-NEXT:    li r3, 0
-; PWR10BE-NEXT:    vadduwm v3, v5, v3
 ; PWR10BE-NEXT:    vperm v0, v4, v2, v0
 ; PWR10BE-NEXT:    vperm v2, v4, v2, v1
 ; PWR10BE-NEXT:    vadduwm v2, v2, v0
@@ -1572,38 +1572,38 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI23_1 at toc@l
 ; PWR10BE-NEXT:    lxv v4, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_2 at toc@ha
-; PWR10BE-NEXT:    addi r3, r3, .LCPI23_2 at toc@l
 ; PWR10BE-NEXT:    vperm v3, v2, v2, v3
+; PWR10BE-NEXT:    addi r3, r3, .LCPI23_2 at toc@l
+; PWR10BE-NEXT:    vextsb2d v3, v3
 ; PWR10BE-NEXT:    lxv v5, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_3 at toc@ha
-; PWR10BE-NEXT:    vextsb2d v3, v3
-; PWR10BE-NEXT:    addi r3, r3, .LCPI23_3 at toc@l
 ; PWR10BE-NEXT:    vperm v4, v2, v2, v4
+; PWR10BE-NEXT:    addi r3, r3, .LCPI23_3 at toc@l
+; PWR10BE-NEXT:    vextsb2d v4, v4
 ; PWR10BE-NEXT:    lxv v0, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_4 at toc@ha
-; PWR10BE-NEXT:    vextsb2d v4, v4
-; PWR10BE-NEXT:    addi r3, r3, .LCPI23_4 at toc@l
 ; PWR10BE-NEXT:    vperm v5, v2, v2, v5
+; PWR10BE-NEXT:    addi r3, r3, .LCPI23_4 at toc@l
+; PWR10BE-NEXT:    vextsb2d v5, v5
 ; PWR10BE-NEXT:    lxv v1, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_5 at toc@ha
-; PWR10BE-NEXT:    vextsb2d v5, v5
-; PWR10BE-NEXT:    addi r3, r3, .LCPI23_5 at toc@l
 ; PWR10BE-NEXT:    vperm v0, v2, v2, v0
+; PWR10BE-NEXT:    addi r3, r3, .LCPI23_5 at toc@l
+; PWR10BE-NEXT:    vextsb2d v0, v0
 ; PWR10BE-NEXT:    lxv v6, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_6 at toc@ha
-; PWR10BE-NEXT:    vextsb2d v0, v0
-; PWR10BE-NEXT:    addi r3, r3, .LCPI23_6 at toc@l
 ; PWR10BE-NEXT:    vperm v1, v2, v2, v1
 ; PWR10BE-NEXT:    vaddudm v5, v0, v5
 ; PWR10BE-NEXT:    vaddudm v3, v4, v3
 ; PWR10BE-NEXT:    vaddudm v3, v3, v5
+; PWR10BE-NEXT:    addi r3, r3, .LCPI23_6 at toc@l
+; PWR10BE-NEXT:    vextsb2d v1, v1
 ; PWR10BE-NEXT:    lxv v7, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_7 at toc@ha
-; PWR10BE-NEXT:    vextsb2d v1, v1
-; PWR10BE-NEXT:    addi r3, r3, .LCPI23_7 at toc@l
 ; PWR10BE-NEXT:    vperm v6, v2, v2, v6
-; PWR10BE-NEXT:    lxv v8, 0(r3)
+; PWR10BE-NEXT:    addi r3, r3, .LCPI23_7 at toc@l
 ; PWR10BE-NEXT:    vextsb2d v6, v6
+; PWR10BE-NEXT:    lxv v8, 0(r3)
 ; PWR10BE-NEXT:    vperm v7, v2, v2, v7
 ; PWR10BE-NEXT:    vextsb2d v7, v7
 ; PWR10BE-NEXT:    vperm v2, v2, v2, v8
@@ -1758,28 +1758,28 @@ define dso_local i64 @v16i8tov16i64_zero(<16 x i8> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI24_1 at toc@l
 ; PWR10BE-NEXT:    lxv v5, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI24_2 at toc@ha
-; PWR10BE-NEXT:    addi r3, r3, .LCPI24_2 at toc@l
 ; PWR10BE-NEXT:    vperm v3, v4, v2, v3
+; PWR10BE-NEXT:    addi r3, r3, .LCPI24_2 at toc@l
 ; PWR10BE-NEXT:    lxv v0, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI24_3 at toc@ha
-; PWR10BE-NEXT:    addi r3, r3, .LCPI24_3 at toc@l
 ; PWR10BE-NEXT:    vperm v5, v4, v2, v5
+; PWR10BE-NEXT:    addi r3, r3, .LCPI24_3 at toc@l
 ; PWR10BE-NEXT:    lxv v1, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI24_4 at toc@ha
-; PWR10BE-NEXT:    addi r3, r3, .LCPI24_4 at toc@l
 ; PWR10BE-NEXT:    vperm v0, v4, v2, v0
+; PWR10BE-NEXT:    addi r3, r3, .LCPI24_4 at toc@l
 ; PWR10BE-NEXT:    lxv v6, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI24_5 at toc@ha
-; PWR10BE-NEXT:    addi r3, r3, .LCPI24_5 at toc@l
 ; PWR10BE-NEXT:    vperm v1, v4, v2, v1
+; PWR10BE-NEXT:    addi r3, r3, .LCPI24_5 at toc@l
 ; PWR10BE-NEXT:    lxv v7, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI24_6 at toc@ha
-; PWR10BE-NEXT:    addi r3, r3, .LCPI24_6 at toc@l
 ; PWR10BE-NEXT:    vperm v6, v4, v2, v6
+; PWR10BE-NEXT:    addi r3, r3, .LCPI24_6 at toc@l
 ; PWR10BE-NEXT:    lxv v8, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI24_7 at toc@ha
-; PWR10BE-NEXT:    addi r3, r3, .LCPI24_7 at toc@l
 ; PWR10BE-NEXT:    vperm v7, v4, v2, v7
+; PWR10BE-NEXT:    addi r3, r3, .LCPI24_7 at toc@l
 ; PWR10BE-NEXT:    lxv v9, 0(r3)
 ; PWR10BE-NEXT:    vperm v8, v4, v2, v8
 ; PWR10BE-NEXT:    vperm v2, v4, v2, v9


        


More information about the llvm-commits mailing list