[llvm] a2b5117 - [PowerPC] Update InputOps of Power10 SchedModel
Qiu Chaofan via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 7 07:46:31 PDT 2023
Author: Qiu Chaofan
Date: 2023-07-07T22:46:22+08:00
New Revision: a2b5117df75d1be8a65b2a86d5f75a59a8565fe6
URL: https://github.com/llvm/llvm-project/commit/a2b5117df75d1be8a65b2a86d5f75a59a8565fe6
DIFF: https://github.com/llvm/llvm-project/commit/a2b5117df75d1be8a65b2a86d5f75a59a8565fe6.diff
LOG: [PowerPC] Update InputOps of Power10 SchedModel
Count of input operands affect pipeline forwarding in scheduling model.
Previous Power10 model definition arranges some instructions into
incorrect groups, by counting the wrong number of input operands.
This patch updates the model, setting the input operands count correctly
by excluding irrelevant immediate operands and count memory operands of
load instructions correctly.
Reviewed By: shchenz
Differential Revision: https://reviews.llvm.org/D153842
Added:
Modified:
llvm/lib/Target/PowerPC/P10InstrResources.td
llvm/lib/Target/PowerPC/PPCScheduleP10.td
llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 6fe23699e93e23..0827e528a80f46 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
// Automatically generated file, do not edit!
//
-// This file defines the itinerary class data for the POWER10 processor.
+// This file defines instruction data for SchedModel of the POWER10 processor.
//
//===----------------------------------------------------------------------===//
// 22 Cycles Binary Floating Point operations, 2 input operands
@@ -307,41 +307,32 @@ def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
FSELD_rec, FSELS_rec
)>;
-// 2 Cycles Branch operations, 0 input operands
-def : InstRW<[P10W_BR_2C, P10W_DISP_ANY],
- (instrs
- BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
- BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
- BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS
-)>;
-
// 2 Cycles Branch operations, 1 input operands
def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
(instrs
B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8,
BA, TAILBA, TAILBA8,
- BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
- BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
- BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
-)>;
-
-// 2 Cycles Branch operations, 3 input operands
-def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read],
- (instrs
BCCTR, BCCTR8, BCCTR8n, BCCTRn, gBCCTR,
- BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL
+ BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL,
+ BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
+ BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
+ BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS,
+ BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
)>;
-// 2 Cycles Branch operations, 4 input operands
-def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read, P10BR_Read],
+// 2 Cycles Branch operations, 2 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read],
(instrs
+ BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
BDNZA, BDNZAm, BDNZAp, BDZA, BDZAm, BDZAp, gBCA, gBCAat,
+ BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
BDNZLA, BDNZLAm, BDNZLAp, BDZLA, BDZLAm, BDZLAp, gBCLA, gBCLAat
)>;
// 7 Cycles Crypto operations, 1 input operands
def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read],
(instrs
+ VGNB,
VSBOX
)>;
@@ -358,7 +349,6 @@ def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read, P10CY_Read],
VCIPHERLAST,
VCLZDM,
VCTZDM,
- VGNB,
VNCIPHER,
VNCIPHERLAST,
VPDEPD,
@@ -384,29 +374,24 @@ def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read],
XSCVSDQP,
XSCVSQQP,
XSCVUDQP,
- XSCVUQQP
+ XSCVUQQP,
+ XSRQPI,
+ XSRQPIX,
+ XSRQPXP
)>;
// 13 Cycles Decimal Floating Point operations, 2 input operands
def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
(instrs
+ BCDSR_rec,
XSADDQP,
XSADDQPO,
XSSUBQP,
XSSUBQPO
)>;
-// 13 Cycles Decimal Floating Point operations, 3 input operands
-def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
- (instrs
- BCDSR_rec,
- XSRQPI,
- XSRQPIX,
- XSRQPXP
-)>;
-
// 2-way crack instructions
-// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands
+// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 1 input operands
def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY],
(instrs
HASHST, HASHST8,
@@ -439,8 +424,8 @@ def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
XSNMSUBQPO
)>;
-// 38 Cycles Decimal Floating Point operations, 2 input operands
-def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+// 38 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read],
(instrs
BCDCFSQ_rec
)>;
@@ -594,20 +579,26 @@ def : InstRW<[P10W_DV_83C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
// 5 Cycles Fixed-Point and BCD operations, 1 input operands
def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read],
(instrs
+ BCDCFN_rec,
+ BCDCFZ_rec,
BCDCTN_rec,
+ BCDCTZ_rec,
+ BCDSETSGN_rec,
VMUL10CUQ,
VMUL10UQ,
- XSXSIGQP
+ XSTSTDCQP,
+ XSXSIGQP,
+ XXGENPCVBM
)>;
// 5 Cycles Fixed-Point and BCD operations, 2 input operands
def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
(instrs
- BCDCFN_rec,
- BCDCFZ_rec,
+ BCDADD_rec,
BCDCPSGN_rec,
- BCDCTZ_rec,
- BCDSETSGN_rec,
+ BCDS_rec,
+ BCDSUB_rec,
+ BCDTRUNC_rec,
BCDUS_rec,
BCDUTRUNC_rec,
VADDCUQ,
@@ -623,18 +614,12 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
XSCMPOQP,
XSCMPUQP,
XSMAXCQP,
- XSMINCQP,
- XSTSTDCQP,
- XXGENPCVBM
+ XSMINCQP
)>;
// 5 Cycles Fixed-Point and BCD operations, 3 input operands
def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
(instrs
- BCDADD_rec,
- BCDS_rec,
- BCDSUB_rec,
- BCDTRUNC_rec,
VADDECUQ,
VADDEUQM,
VSUBECUQ,
@@ -644,7 +629,7 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
// 4 Cycles ALU2 operations, 0 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_ANY],
(instrs
- TRAP, TW
+ MTVSRBMI
)>;
// 4 Cycles ALU2 operations, 1 input operands
@@ -660,9 +645,9 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
CNTTZD_rec,
CNTTZW, CNTTZW8,
CNTTZW8_rec, CNTTZW_rec,
+ EXTSWSLI_32_64_rec, EXTSWSLI_rec,
FTSQRT,
MTVSRBM,
- MTVSRBMI,
MTVSRDM,
MTVSRHM,
MTVSRQM,
@@ -670,10 +655,18 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
POPCNTB, POPCNTB8,
POPCNTD,
POPCNTW,
+ RLDIC_rec,
+ RLDICL_32_rec, RLDICL_rec,
+ RLDICR_rec,
+ RLWINM8_rec, RLWINM_rec,
VCLZB,
VCLZD,
VCLZH,
VCLZW,
+ VCNTMBB,
+ VCNTMBD,
+ VCNTMBH,
+ VCNTMBW,
VCTZB,
VCTZD,
VCTZH,
@@ -694,27 +687,40 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
VPOPCNTW,
VPRTYBD,
VPRTYBW,
+ VSHASIGMAD,
+ VSHASIGMAW,
XSCVHPDP,
XSCVSPDPN,
XSTSQRTDP,
+ XSTSTDCDP,
+ XSTSTDCSP,
XVCVHPSP,
XVTLSBB,
XVTSQRTDP,
- XVTSQRTSP
+ XVTSQRTSP,
+ XVTSTDCDP,
+ XVTSTDCSP
)>;
// 4 Cycles ALU2 operations, 2 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
CMPEQB,
- EXTSWSLI_32_64_rec, EXTSWSLI_rec,
+ CMPRB, CMPRB8,
FCMPOD, FCMPOS,
FCMPUD, FCMPUS,
FTDIV,
+ RLDCL_rec,
+ RLDCR_rec,
+ RLDIMI_rec,
+ RLWIMI8_rec, RLWIMI_rec,
+ RLWNM8_rec, RLWNM_rec,
SLD_rec,
SLW8_rec, SLW_rec,
SRD_rec,
SRW8_rec, SRW_rec,
+ TDI,
+ TWI,
VABSDUB,
VABSDUH,
VABSDUW,
@@ -765,10 +771,6 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
VCMPNEZW_rec,
VCMPSQ,
VCMPUQ,
- VCNTMBB,
- VCNTMBD,
- VCNTMBH,
- VCNTMBW,
VMAXFP,
VMINFP,
VSUBCUW,
@@ -791,8 +793,6 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
XSMINDP,
XSMINJDP,
XSTDIVDP,
- XSTSTDCDP,
- XSTSTDCSP,
XVCMPEQDP,
XVCMPEQDP_rec,
XVCMPEQSP,
@@ -810,39 +810,24 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
XVMINDP,
XVMINSP,
XVTDIVDP,
- XVTDIVSP,
- XVTSTDCDP,
- XVTSTDCSP
+ XVTDIVSP
)>;
// 4 Cycles ALU2 operations, 3 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
(instrs
- CMPRB, CMPRB8,
- RLDCL_rec,
- RLDCR_rec,
- RLDIC_rec,
- RLDICL_32_rec, RLDICL_rec,
- RLDICR_rec,
TD,
- TDI,
- TWI,
- VSHASIGMAD,
- VSHASIGMAW
-)>;
-
-// 4 Cycles ALU2 operations, 4 input operands
-def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
- (instrs
- RLDIMI_rec,
- RLWINM8_rec, RLWINM_rec,
- RLWNM8_rec, RLWNM_rec
+ TRAP, TW
)>;
-// 4 Cycles ALU2 operations, 5 input operands
-def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+// Single crack instructions
+// 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
(instrs
- RLWIMI8_rec, RLWIMI_rec
+ SRADI_rec,
+ SRAWI_rec,
+ TABORTDCI,
+ TABORTWCI
)>;
// Single crack instructions
@@ -850,19 +835,9 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
SRAD_rec,
- SRADI_rec,
SRAW_rec,
- SRAWI_rec
-)>;
-
-// Single crack instructions
-// 4 Cycles ALU2 operations, 3 input operands
-def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
- (instrs
TABORTDC,
- TABORTDCI,
- TABORTWC,
- TABORTWCI
+ TABORTWC
)>;
// 2-way crack instructions
@@ -900,32 +875,34 @@ def : InstRW<[P10W_FX_2C, P10W_DISP_ANY, P10FX_Read],
// 3 Cycles ALU operations, 0 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
(instrs
- CR6SET, CREQV, CRSET,
DSS, DSSALL,
MCRXRX,
MFCTR, MFCTR8,
MFLR, MFLR8,
- NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8,
- VXOR, V_SET0, V_SET0B, V_SET0H,
- XXLEQV, XXLEQVOnes,
- XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz
+ WAIT
)>;
// 3 Cycles ALU operations, 1 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
(instrs
- ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
- ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
+ ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
+ ADDIC, ADDIC8,
+ ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
ADDME, ADDME8,
ADDME8O, ADDMEO,
ADDZE, ADDZE8,
ADDZE8O, ADDZEO,
+ ANDI8_rec, ANDI_rec,
+ ANDIS8_rec, ANDIS_rec,
+ CMPDI, CMPWI,
+ CMPLDI, CMPLWI,
EXTSB, EXTSB8, EXTSB8_32_64,
EXTSB8_rec, EXTSB_rec,
EXTSH, EXTSH8, EXTSH8_32_64,
EXTSH8_rec, EXTSH_rec,
EXTSW, EXTSW_32, EXTSW_32_64,
EXTSW_32_64_rec, EXTSW_rec,
+ EXTSWSLI, EXTSWSLI_32_64,
FABSD, FABSS,
FMR,
FNABSD, FNABSS,
@@ -941,11 +918,20 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
NEG, NEG8,
NEG8_rec, NEG_rec,
NEG8O, NEGO,
+ NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8,
+ ORIS, ORIS8,
+ RLDIC,
+ RLDICL, RLDICL_32, RLDICL_32_64,
+ RLDICR, RLDICR_32,
+ RLWINM, RLWINM8,
SETB, SETB8,
SETBC, SETBC8,
SETBCR, SETBCR8,
SETNBC, SETNBC8,
SETNBCR, SETNBCR8,
+ SRADI, SRADI_32,
+ SRAWI,
+ SUBFIC, SUBFIC8,
SUBFME, SUBFME8,
SUBFME8O, SUBFMEO,
SUBFZE, SUBFZE8,
@@ -958,7 +944,8 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
VEXTSW2D, VEXTSW2Ds,
VNEGD,
VNEGW,
- WAIT,
+ XORI, XORI8,
+ XORIS, XORIS8,
XSABSDP,
XSABSQP,
XSNABSDP, XSNABSDPs,
@@ -987,29 +974,27 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
ADD4_rec, ADD8_rec,
ADDE, ADDE8,
ADDE8O, ADDEO,
- ADDIC, ADDIC8,
+ ADDEX, ADDEX8,
ADD4O, ADD8O,
AND, AND8,
AND8_rec, AND_rec,
ANDC, ANDC8,
ANDC8_rec, ANDC_rec,
- ANDI8_rec, ANDI_rec,
- ANDIS8_rec, ANDIS_rec,
CMPD, CMPW,
CMPB, CMPB8,
- CMPDI, CMPWI,
CMPLD, CMPLW,
- CMPLDI, CMPLWI,
CRAND,
CRANDC,
+ CR6SET, CREQV, CRSET,
CRNAND,
CRNOR,
CROR,
CRORC,
CR6UNSET, CRUNSET, CRXOR,
+ DST, DST64, DSTT, DSTT64,
+ DSTST, DSTST64, DSTSTT, DSTSTT64,
EQV, EQV8,
EQV8_rec, EQV_rec,
- EXTSWSLI, EXTSWSLI_32_64,
FCPSGND, FCPSGNS,
NAND, NAND8,
NAND8_rec, NAND_rec,
@@ -1019,20 +1004,21 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
OR8_rec, OR_rec,
ORC, ORC8,
ORC8_rec, ORC_rec,
- ORIS, ORIS8,
+ RLDCL,
+ RLDCR,
+ RLDIMI,
+ RLWIMI, RLWIMI8,
+ RLWNM, RLWNM8,
SLD,
SLW, SLW8,
SRAD,
- SRADI, SRADI_32,
SRAW,
- SRAWI,
SRD,
SRW, SRW8,
SUBF, SUBF8,
SUBF8_rec, SUBF_rec,
SUBFE, SUBFE8,
SUBFE8O, SUBFEO,
- SUBFIC, SUBFIC8,
SUBF8O, SUBFO,
VADDUBM,
VADDUDM,
@@ -1103,10 +1089,9 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
VSUBUDM,
VSUBUHM,
VSUBUWM,
+ VXOR, V_SET0, V_SET0B, V_SET0H,
XOR, XOR8,
XOR8_rec, XOR_rec,
- XORI, XORI8,
- XORIS, XORIS8,
XSCPSGNDP,
XSCPSGNQP,
XSIEXPDP,
@@ -1117,69 +1102,52 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
XVIEXPSP,
XXLAND,
XXLANDC,
+ XXLEQV, XXLEQVOnes,
XXLNAND,
XXLNOR,
XXLOR, XXLORf,
- XXLORC
+ XXLORC,
+ XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz
)>;
// 3 Cycles ALU operations, 3 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
(instrs
- ADDEX, ADDEX8,
- DST, DST64, DSTT, DSTT64,
- DSTST, DSTST64, DSTSTT, DSTSTT64,
ISEL, ISEL8,
- RLDCL,
- RLDCR,
- RLDIC,
- RLDICL, RLDICL_32, RLDICL_32_64,
- RLDICR, RLDICR_32,
VRLDMI,
VRLWMI,
VSEL,
XXSEL
)>;
-// 3 Cycles ALU operations, 4 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
- (instrs
- RLDIMI,
- RLWINM, RLWINM8,
- RLWNM, RLWNM8
-)>;
-
-// 3 Cycles ALU operations, 5 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
- (instrs
- RLWIMI, RLWIMI8
-)>;
-
// Single crack instructions
// 3 Cycles ALU operations, 0 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
(instrs
MFFS,
MFFS_rec,
+ MFFSCDRNI,
+ MFFSCRNI,
MFFSL,
MFVSCR,
- TRECHKPT
+ MTFSB0,
+ TBEGIN,
+ TRECHKPT,
+ TSR
)>;
// Single crack instructions
// 3 Cycles ALU operations, 1 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
(instrs
+ ADDIC_rec,
ADDME8_rec, ADDME_rec,
ADDME8O_rec, ADDMEO_rec,
ADDZE8_rec, ADDZE_rec,
ADDZE8O_rec, ADDZEO_rec,
MCRFS,
MFFSCDRN,
- MFFSCDRNI,
MFFSCRN,
- MFFSCRNI,
- MTFSB0,
MTVSCR,
NEG8O_rec, NEGO_rec,
SUBFME8_rec, SUBFME_rec,
@@ -1187,9 +1155,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
SUBFZE8_rec, SUBFZE_rec,
SUBFZE8O_rec, SUBFZEO_rec,
TABORT,
- TBEGIN,
- TRECLAIM,
- TSR
+ TRECLAIM
)>;
// Single crack instructions
@@ -1198,7 +1164,6 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read, P10FX_Read]
(instrs
ADDE8_rec, ADDE_rec,
ADDE8O_rec, ADDEO_rec,
- ADDIC_rec,
ADD4O_rec, ADD8O_rec,
SUBFE8_rec, SUBFE_rec,
SUBFE8O_rec, SUBFEO_rec,
@@ -1218,7 +1183,12 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
(instrs
HRFID,
MFFSCE,
+ MTFSB1,
+ MTFSFI, MTFSFIb,
+ MTFSFI_rec,
+ RFEBB,
RFID,
+ SC,
STOP
)>;
@@ -1230,9 +1200,8 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read]
FMR_rec,
FNABSD_rec, FNABSS_rec,
FNEGD_rec, FNEGS_rec,
- MTFSB1,
- RFEBB,
- SC
+ MTFSF, MTFSFb,
+ MTFSF_rec
)>;
// 2-way crack instructions
@@ -1243,27 +1212,11 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read,
ADDC8_rec, ADDC_rec,
ADDC8O, ADDCO,
FCPSGND_rec, FCPSGNS_rec,
- MTFSF, MTFSFb,
- MTFSFI, MTFSFIb,
SUBFC, SUBFC8,
SUBFC8_rec, SUBFC_rec,
SUBFC8O, SUBFCO
)>;
-// 2-way crack instructions
-// 3 Cycles ALU operations, and 3 Cycles ALU operations, 3 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
- (instrs
- MTFSFI_rec
-)>;
-
-// 2-way crack instructions
-// 3 Cycles ALU operations, and 3 Cycles ALU operations, 4 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
- (instrs
- MTFSF_rec
-)>;
-
// 4-way crack instructions
// 3 Cycles ALU operations, 3 Cycles ALU operations, 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
@@ -1283,40 +1236,61 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
)>;
// 2-way crack instructions
-// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read, P10FX_Read],
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read],
(instrs
MTCRF, MTCRF8
)>;
+// 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY],
+ (instrs
+ LBZ, LBZ8,
+ LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD,
+ DFLOADf32, DFLOADf64, LFD,
+ LHA, LHA8,
+ LHZ, LHZ8,
+ LWA, LWA_32,
+ LWZ, LWZ8, LWZtoc, LWZtocL,
+ LXSD,
+ LXV
+)>;
+
// 6 Cycles Load operations, 1 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
(instrs
- LBZ, LBZ8,
- LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD,
+ LXVL,
+ LXVLL
+)>;
+
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ DCBT,
+ DCBTST,
+ ICBT,
+ LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
LDBRX,
- DFLOADf32, DFLOADf64, LFD,
+ LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
+ LFDX, LFDXTLS, LFDXTLS_, XFLOADf32, XFLOADf64,
LFIWAX, LIWAX,
LFIWZX, LIWZX,
- LHA, LHA8,
+ LHAX, LHAX8, LHAXTLS, LHAXTLS_, LHAXTLS_32,
LHBRX, LHBRX8,
- LHZ, LHZ8,
+ LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
LVEBX,
LVEHX,
LVEWX,
LVX,
LVXL,
- LWA, LWA_32,
LWAX, LWAXTLS, LWAXTLS_, LWAXTLS_32, LWAX_32,
LWBRX, LWBRX8,
- LWZ, LWZ8, LWZtoc, LWZtocL,
- LXSD,
+ LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
LXSDX,
LXSIBZX,
LXSIHZX,
LXSIWAX,
LXSIWZX,
- LXV,
LXVB16X,
LXVD2X,
LXVDSX,
@@ -1330,24 +1304,8 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
LXVX
)>;
-// 6 Cycles Load operations, 2 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
- (instrs
- DCBT,
- DCBTST,
- ICBT,
- LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
- LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
- LFDX, LFDXTLS, LFDXTLS_, XFLOADf32, XFLOADf64,
- LHAX, LHAX8, LHAXTLS, LHAXTLS_, LHAXTLS_32,
- LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
- LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
- LXVL,
- LXVLL
-)>;
-
// 2-way crack instructions
-// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands
+// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 1 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
(instrs
HASHCHK, HASHCHK8,
@@ -1358,6 +1316,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
// 6 Cycles Load operations, 0 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY],
(instrs
+ DARN,
SLBIA
)>;
@@ -1365,11 +1324,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY],
// 6 Cycles Load operations, 1 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read],
(instrs
- DARN,
- LBARX, LBARXL,
- LDARX, LDARXL,
- LHARX, LHARXL,
- LWARX, LWARXL,
+ MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv,
SLBFEE_rec,
SLBIE,
SLBMFEE,
@@ -1380,48 +1335,57 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read],
// 6 Cycles Load operations, 2 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
(instrs
+ LBARX, LBARXL,
LBZCIX,
+ LDARX, LDARXL,
LDCIX,
+ LHARX, LHARXL,
LHZCIX,
- LWZCIX,
- MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv
+ LWARX, LWARXL,
+ LWZCIX
)>;
// Expand instructions
-// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY],
(instrs
LMW
)>;
// Expand instructions
-// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
(instrs
LSWI
)>;
// 2-way crack instructions
-// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY],
(instrs
LBZU, LBZU8,
- LBZUX, LBZUX8,
LDU,
- LDUX,
LFDU,
- LFDUX,
LHAU, LHAU8,
- LHAUX, LHAUX8,
LHZU, LHZU8,
+ LWZU, LWZU8
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY],
+ (instrs
+ LBZUX, LBZUX8,
+ LDUX,
+ LFDUX,
+ LHAUX, LHAUX8,
LHZUX, LHZUX8,
LWAUX,
- LWZU, LWZU8,
LWZUX, LWZUX8
)>;
-// 6 Cycles Load operations, 1 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+// 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR],
(instrs
PLBZ, PLBZ8, PLBZ8pc, PLBZpc,
PLD, PLDpc,
@@ -1438,26 +1402,32 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
)>;
// 2-way crack instructions
-// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 1 input operands
+// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 0 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
(instrs
LFS,
- LXSSP,
- LXSSPX
+ LXSSP
)>;
// 2-way crack instructions
// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 2 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
(instrs
- LFSX, LFSXTLS, LFSXTLS_
+ LFSX, LFSXTLS, LFSXTLS_,
+ LXSSPX
)>;
// 4-way crack instructions
-// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ LFSU
+)>;
+
+// 4-way crack instructions
+// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 2 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
(instrs
- LFSU,
LFSUX
)>;
@@ -1476,10 +1446,16 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read
)>;
// 2-way crack instructions
-// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
+ (instrs
+ LXVP
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
(instrs
- LXVP,
LXVPX
)>;
@@ -1535,34 +1511,21 @@ def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read, P10MM_Read],
XVI8GER4SPP
)>;
-// 10 Cycles SIMD Matrix Multiply Engine operations, 4 input operands
-def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
- (instrs
- PMXVF32GER,
- PMXVF64GER
-)>;
-
-// 10 Cycles SIMD Matrix Multiply Engine operations, 5 input operands
-def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+// 10 Cycles SIMD Matrix Multiply Engine operations, 2 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read],
(instrs
PMXVBF16GER2,
PMXVF16GER2,
- PMXVF32GERNN,
- PMXVF32GERNP,
- PMXVF32GERPN,
- PMXVF32GERPP,
- PMXVF64GERNN,
- PMXVF64GERNP,
- PMXVF64GERPN,
- PMXVF64GERPP,
+ PMXVF32GER,
+ PMXVF64GER,
PMXVI16GER2,
PMXVI16GER2S,
PMXVI4GER8,
PMXVI8GER4
)>;
-// 10 Cycles SIMD Matrix Multiply Engine operations, 6 input operands
-def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read],
(instrs
PMXVBF16GER2NN,
PMXVBF16GER2NP,
@@ -1572,6 +1535,14 @@ def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P
PMXVF16GER2NP,
PMXVF16GER2PN,
PMXVF16GER2PP,
+ PMXVF32GERNN,
+ PMXVF32GERNP,
+ PMXVF32GERPN,
+ PMXVF32GERPP,
+ PMXVF64GERNN,
+ PMXVF64GERNP,
+ PMXVF64GERPN,
+ PMXVF64GERPP,
PMXVI16GER2PP,
PMXVI16GER2SPP,
PMXVI4GER8PP,
@@ -1593,6 +1564,12 @@ def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C, P10W_MM_10C, P10W_DISP_PA
XXMFACC
)>;
+// 5 Cycles GPR Multiply operations, 1 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read],
+ (instrs
+ MULLI, MULLI8
+)>;
+
// 5 Cycles GPR Multiply operations, 2 input operands
def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read],
(instrs
@@ -1602,7 +1579,6 @@ def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read],
MULHWU,
MULLD,
MULLDO,
- MULLI, MULLI8,
MULLW,
MULLWO,
VMULHSD,
@@ -1635,7 +1611,11 @@ def : InstRW<[P10W_MU_5C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
// 4 Cycles Permute operations, 0 input operands
def : InstRW<[P10W_PM_4C, P10W_DISP_ANY],
(instrs
- VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH
+ LXVKQ,
+ VSPLTISB,
+ VSPLTISH,
+ VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH,
+ XXSPLTIB
)>;
// 4 Cycles Permute operations, 1 input operands
@@ -1644,17 +1624,21 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
BRD,
BRH, BRH8,
BRW, BRW8,
- LVSL,
- LVSR,
- LXVKQ,
MFVSRLD,
MTVSRWS,
VCLZLSBB,
VCTZLSBB,
+ VEXTRACTD,
+ VEXTRACTUB,
+ VEXTRACTUH,
+ VEXTRACTUW,
VGBBD,
+ VINSERTD,
+ VINSERTW,
VPRTYBQ,
- VSPLTISB,
- VSPLTISH,
+ VSPLTB, VSPLTBs,
+ VSPLTH, VSPLTHs,
+ VSPLTW,
VSTRIBL,
VSTRIBR,
VSTRIHL,
@@ -1672,30 +1656,34 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
XXBRH,
XXBRQ,
XXBRW,
- XXSPLTIB
+ XXEXTRACTUW,
+ XXGENPCVDM,
+ XXGENPCVHM,
+ XXGENPCVWM,
+ XXSPLTW, XXSPLTWs
)>;
// 4 Cycles Permute operations, 2 input operands
def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read],
(instrs
BPERMD,
+ LVSL,
+ LVSR,
MTVSRDD,
VBPERMD,
VBPERMQ,
VCLRLB,
VCLRRB,
- VEXTRACTD,
- VEXTRACTUB,
- VEXTRACTUH,
- VEXTRACTUW,
VEXTUBLX,
VEXTUBRX,
VEXTUHLX,
VEXTUHRX,
VEXTUWLX,
VEXTUWRX,
- VINSERTD,
- VINSERTW,
+ VINSD,
+ VINSERTB,
+ VINSERTH,
+ VINSW,
VMRGHB,
VMRGHH,
VMRGHW,
@@ -1716,23 +1704,19 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read],
VPKUWUM,
VPKUWUS,
VSL,
+ VSLDBI,
+ VSLDOI,
VSLO,
VSLV,
- VSPLTB, VSPLTBs,
- VSPLTH, VSPLTHs,
- VSPLTW,
VSR,
+ VSRDBI,
VSRO,
VSRV,
- XXEXTRACTUW,
- XXGENPCVDM,
- XXGENPCVHM,
- XXGENPCVWM,
+ XXINSERTW,
XXMRGHW,
XXMRGLW,
XXPERMDI, XXPERMDIs,
- XXSLDWI, XXSLDWIs,
- XXSPLTW, XXSPLTWs
+ XXSLDWI, XXSLDWIs
)>;
// 4 Cycles Permute operations, 3 input operands
@@ -1750,16 +1734,12 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read],
VINSBRX,
VINSBVLX,
VINSBVRX,
- VINSD,
VINSDLX,
VINSDRX,
- VINSERTB,
- VINSERTH,
VINSHLX,
VINSHRX,
VINSHVLX,
VINSHVRX,
- VINSW,
VINSWLX,
VINSWRX,
VINSWVLX,
@@ -1767,10 +1747,6 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read],
VPERM,
VPERMR,
VPERMXOR,
- VSLDBI,
- VSLDOI,
- VSRDBI,
- XXINSERTW,
XXPERM,
XXPERMR
)>;
@@ -1782,13 +1758,19 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_EVEN, P10W_vMU_7C, P10W_DISP_ANY],
VSUMSWS
)>;
-// 4 Cycles Permute operations, 1 input operands
-def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read],
+// 4 Cycles Permute operations, 0 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR],
(instrs
XXSPLTIDP,
XXSPLTIW
)>;
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read],
+ (instrs
+ XXSPLTI32DX
+)>;
+
// 4 Cycles Permute operations, 3 input operands
def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read],
(instrs
@@ -1796,12 +1778,6 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read],
XXBLENDVD,
XXBLENDVH,
XXBLENDVW,
- XXSPLTI32DX
-)>;
-
-// 4 Cycles Permute operations, 4 input operands
-def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P10PM_Read],
- (instrs
XXEVAL,
XXPERMX
)>;
@@ -1809,51 +1785,65 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P1
// 3 Cycles Store operations, 1 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read],
(instrs
- DCBST,
- DCBZ,
- ICBI
+ PSTXVP, PSTXVPpc,
+ STB, STB8,
+ STBU, STBU8,
+ SPILLTOVSR_ST, STD,
+ STDU,
+ DFSTOREf32, DFSTOREf64, STFD,
+ STFDU,
+ STFS,
+ STFSU,
+ STH, STH8,
+ STHU, STHU8,
+ STW, STW8,
+ STWU, STWU8,
+ STXSD,
+ STXSSP,
+ STXV
)>;
// 3 Cycles Store operations, 2 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
(instrs
+ CP_COPY, CP_COPY8,
DCBF,
- PSTXVP, PSTXVPpc,
- STB, STB8,
- STBU, STBU8,
+ DCBST,
+ DCBZ,
+ ICBI,
+ STXVL,
+ STXVLL
+)>;
+
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
STBUX, STBUX8,
- SPILLTOVSR_ST, STD,
+ STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
STDBRX,
- STDU,
STDUX,
- DFSTOREf32, DFSTOREf64, STFD,
- STFDU,
+ SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
STFDUX,
+ STFDX, STFDXTLS, STFDXTLS_,
STFIWX, STIWX,
- STFS,
- STFSU,
STFSUX,
- STH, STH8,
+ STFSX, STFSXTLS, STFSXTLS_,
STHBRX,
- STHU, STHU8,
STHUX, STHUX8,
+ STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
STVEBX,
STVEHX,
STVEWX,
STVX,
STVXL,
- STW, STW8,
STWBRX,
- STWU, STWU8,
STWUX, STWUX8,
- STXSD,
+ STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
STXSDX,
STXSIBX, STXSIBXv,
STXSIHX, STXSIHXv,
STXSIWX,
- STXSSP,
STXSSPX,
- STXV,
STXVB16X,
STXVD2X,
STXVH8X,
@@ -1865,20 +1855,6 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
STXVX
)>;
-// 3 Cycles Store operations, 3 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
- (instrs
- CP_COPY, CP_COPY8,
- STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
- SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
- STFDX, STFDXTLS, STFDXTLS_,
- STFSX, STFSXTLS, STFSXTLS_,
- STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
- STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
- STXVL,
- STXVLL
-)>;
-
// Single crack instructions
// 3 Cycles Store operations, 0 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
@@ -1887,25 +1863,16 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
MSGSYNC,
SLBSYNC,
TCHECK,
+ TEND,
TLBSYNC
)>;
-// Single crack instructions
-// 3 Cycles Store operations, 1 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read],
- (instrs
- TEND
-)>;
-
// Single crack instructions
// 3 Cycles Store operations, 2 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
(instrs
+ CP_PASTE8_rec, CP_PASTE_rec,
SLBIEG,
- STBCX,
- STDCX,
- STHCX,
- STWCX,
TLBIE
)>;
@@ -1913,29 +1880,26 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read]
// 3 Cycles Store operations, 3 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
(instrs
- CP_PASTE8_rec, CP_PASTE_rec,
STBCIX,
+ STBCX,
STDCIX,
+ STDCX,
STHCIX,
- STWCIX
+ STHCX,
+ STWCIX,
+ STWCX
)>;
// 2-way crack instructions
// 3 Cycles Store operations, and 3 Cycles ALU operations, 0 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
(instrs
- ISYNC
-)>;
-
-// 2-way crack instructions
-// 3 Cycles Store operations, and 3 Cycles ALU operations, 1 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
- (instrs
+ ISYNC,
SYNC
)>;
// Expand instructions
-// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 2 input operands
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 1 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
(instrs
LDAT,
@@ -1943,7 +1907,7 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C,
)>;
// 4-way crack instructions
-// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
(instrs
STDAT,
@@ -1951,21 +1915,21 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C,
)>;
// Expand instructions
-// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read],
(instrs
STMW
)>;
// Expand instructions
-// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
(instrs
STSWI
)>;
-// 3 Cycles Store operations, 2 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read],
(instrs
PSTB, PSTB8, PSTB8pc, PSTBpc,
PSTD, PSTDpc,
@@ -1979,10 +1943,16 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read],
)>;
// 2-way crack instructions
-// 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, and 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read],
+ (instrs
+ STXVP
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read, P10ST_Read],
(instrs
- STXVP,
STXVPX
)>;
@@ -2024,27 +1994,21 @@ def : InstRW<[P10W_SX, P10W_DISP_ANY],
def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
(instrs
CLRBHRB,
- MFMSR
+ MFBHRBE,
+ MFMSR,
+ MFTB
)>;
// Single crack instructions
// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read],
(instrs
- MFTB
-)>;
-
-// Single crack instructions
-// 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
-def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read, P10SX_Read],
- (instrs
- MFBHRBE,
MTMSR,
MTMSRD
)>;
// 2-way crack instructions
-// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 0 input operands
def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
(instrs
ADDPCIS
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
index f89ef735a36711..25be37718af241 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP10.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
@@ -25,14 +25,8 @@ def P10vMU_Read : SchedRead;
def P10Model : SchedMachineModel {
let IssueWidth = 8;
-
- // TODO - Need to be updated according to P10 UM.
let MicroOpBufferSize = 44;
-
- // TODO - tune this on real HW once it arrives. For now, we will use the same
- // value as we do on P9.
let LoopMicroOpBufferSize = 60;
-
let CompleteModel = 1;
// Do not support SPE (Signal Procesing Engine) on Power 10.
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
index 0ddf0330e90648..0ef03630095434 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -120,8 +120,8 @@ define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
;
; CHECK-LINUXBE-LABEL: vec_xst_trunc_ss:
; CHECK-LINUXBE: # %bb.0: # %entry
-; CHECK-LINUXBE-NEXT: sldi r3, r5, 1
; CHECK-LINUXBE-NEXT: vsldoi v2, v2, v2, 10
+; CHECK-LINUXBE-NEXT: sldi r3, r5, 1
; CHECK-LINUXBE-NEXT: stxsihx v2, r6, r3
; CHECK-LINUXBE-NEXT: blr
;
@@ -137,8 +137,8 @@ define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
;
; CHECK-AIXBE-LABEL: vec_xst_trunc_ss:
; CHECK-AIXBE: # %bb.0: # %entry
-; CHECK-AIXBE-NEXT: sldi r3, r3, 1
; CHECK-AIXBE-NEXT: vsldoi v2, v2, v2, 10
+; CHECK-AIXBE-NEXT: sldi r3, r3, 1
; CHECK-AIXBE-NEXT: stxsihx v2, r4, r3
; CHECK-AIXBE-NEXT: blr
entry:
@@ -158,8 +158,8 @@ define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
;
; CHECK-LINUXBE-LABEL: vec_xst_trunc_us:
; CHECK-LINUXBE: # %bb.0: # %entry
-; CHECK-LINUXBE-NEXT: sldi r3, r5, 1
; CHECK-LINUXBE-NEXT: vsldoi v2, v2, v2, 10
+; CHECK-LINUXBE-NEXT: sldi r3, r5, 1
; CHECK-LINUXBE-NEXT: stxsihx v2, r6, r3
; CHECK-LINUXBE-NEXT: blr
;
@@ -175,8 +175,8 @@ define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
;
; CHECK-AIXBE-LABEL: vec_xst_trunc_us:
; CHECK-AIXBE: # %bb.0: # %entry
-; CHECK-AIXBE-NEXT: sldi r3, r3, 1
; CHECK-AIXBE-NEXT: vsldoi v2, v2, v2, 10
+; CHECK-AIXBE-NEXT: sldi r3, r3, 1
; CHECK-AIXBE-NEXT: stxsihx v2, r4, r3
; CHECK-AIXBE-NEXT: blr
entry:
@@ -196,8 +196,8 @@ define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
;
; CHECK-LINUXBE-LABEL: vec_xst_trunc_si:
; CHECK-LINUXBE: # %bb.0: # %entry
-; CHECK-LINUXBE-NEXT: sldi r3, r5, 2
; CHECK-LINUXBE-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-LINUXBE-NEXT: sldi r3, r5, 2
; CHECK-LINUXBE-NEXT: stfiwx f0, r6, r3
; CHECK-LINUXBE-NEXT: blr
;
@@ -213,8 +213,8 @@ define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
;
; CHECK-AIXBE-LABEL: vec_xst_trunc_si:
; CHECK-AIXBE: # %bb.0: # %entry
-; CHECK-AIXBE-NEXT: sldi r3, r3, 2
; CHECK-AIXBE-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-AIXBE-NEXT: sldi r3, r3, 2
; CHECK-AIXBE-NEXT: stfiwx f0, r4, r3
; CHECK-AIXBE-NEXT: blr
entry:
@@ -234,8 +234,8 @@ define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
;
; CHECK-LINUXBE-LABEL: vec_xst_trunc_ui:
; CHECK-LINUXBE: # %bb.0: # %entry
-; CHECK-LINUXBE-NEXT: sldi r3, r5, 2
; CHECK-LINUXBE-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-LINUXBE-NEXT: sldi r3, r5, 2
; CHECK-LINUXBE-NEXT: stfiwx f0, r6, r3
; CHECK-LINUXBE-NEXT: blr
;
@@ -251,8 +251,8 @@ define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, ptr nocapture %_
;
; CHECK-AIXBE-LABEL: vec_xst_trunc_ui:
; CHECK-AIXBE: # %bb.0: # %entry
-; CHECK-AIXBE-NEXT: sldi r3, r3, 2
; CHECK-AIXBE-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-AIXBE-NEXT: sldi r3, r3, 2
; CHECK-AIXBE-NEXT: stfiwx f0, r4, r3
; CHECK-AIXBE-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index f2b6c649343127..f21e1d4f296fa0 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -454,8 +454,8 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; LE-PAIRED-NEXT: add r6, r5, r3
; LE-PAIRED-NEXT: lxvx v3, r5, r3
; LE-PAIRED-NEXT: sldi r3, r4, 5
-; LE-PAIRED-NEXT: lxv v2, 16(r6)
; LE-PAIRED-NEXT: add r4, r5, r3
+; LE-PAIRED-NEXT: lxv v2, 16(r6)
; LE-PAIRED-NEXT: stxvx v3, r5, r3
; LE-PAIRED-NEXT: stxv v2, 16(r4)
; LE-PAIRED-NEXT: blr
@@ -468,8 +468,8 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: add r6, r5, r3
; BE-PAIRED-NEXT: lxvx v2, r5, r3
; BE-PAIRED-NEXT: sldi r3, r4, 5
-; BE-PAIRED-NEXT: lxv v3, 16(r6)
; BE-PAIRED-NEXT: add r4, r5, r3
+; BE-PAIRED-NEXT: lxv v3, 16(r6)
; BE-PAIRED-NEXT: stxvx v2, r5, r3
; BE-PAIRED-NEXT: stxv v3, 16(r4)
; BE-PAIRED-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
index f1a05c105099cb..3a742588d23b59 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-jump-table.ll
@@ -20,8 +20,8 @@
define dso_local signext i32 @jumptable(i32 signext %param) {
; CHECK-R-LABEL: jumptable:
; CHECK-R: # %bb.1: # %entry
-; CHECK-R-NEXT: paddi r5, 0, .LJTI0_0 at PCREL, 1
; CHECK-R-NEXT: rldic r4, r4
+; CHECK-R-NEXT: paddi r5, 0, .LJTI0_0 at PCREL, 1
; CHECK-R-NEXT: lwax r4, r4, r5
; CHECK-R-NEXT: add r4, r4, r5
; CHECK-R-NEXT: mtctr r4
@@ -35,8 +35,8 @@ define dso_local signext i32 @jumptable(i32 signext %param) {
; CHECK-A-LE-NEXT: bctr
; CHECK-A-BE-LABEL: jumptable:
; CHECK-A-BE: # %bb.1: # %entry
-; CHECK-A-BE-NEXT: paddi r5, 0, .LJTI0_0 at PCREL, 1
; CHECK-A-BE-NEXT: rldic r4, r4
+; CHECK-A-BE-NEXT: paddi r5, 0, .LJTI0_0 at PCREL, 1
; CHECK-A-BE-NEXT: lwax r4, r4, r5
; CHECK-A-BE-NEXT: mtctr r4
; CHECK-A-BE-NEXT: bctr
diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
index 99ad7c8c93bb26..14c5eaa5532398 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
@@ -108,8 +108,8 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
; CHECK-P10-CMP-LE-NEXT: lbz r3, 0(r3)
; CHECK-P10-CMP-LE-NEXT: lbz r4, testCompare1 at toc@l(r4)
; CHECK-P10-CMP-LE-NEXT: clrlwi r3, r3, 31
-; CHECK-P10-CMP-LE-NEXT: clrlwi r4, r4, 31
; CHECK-P10-CMP-LE-NEXT: clrldi r3, r3, 32
+; CHECK-P10-CMP-LE-NEXT: clrlwi r4, r4, 31
; CHECK-P10-CMP-LE-NEXT: clrldi r4, r4, 32
; CHECK-P10-CMP-LE-NEXT: sub r3, r3, r4
; CHECK-P10-CMP-LE-NEXT: rldicl r3, r3, 1, 63
diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
index 030dfde8f3b712..ea0997674630da 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
@@ -110,8 +110,8 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
; CHECK-P10-CMP-LE-NEXT: lbz r3, 0(r3)
; CHECK-P10-CMP-LE-NEXT: lbz r4, testCompare1 at toc@l(r4)
; CHECK-P10-CMP-LE-NEXT: clrlwi r3, r3, 31
-; CHECK-P10-CMP-LE-NEXT: clrlwi r4, r4, 31
; CHECK-P10-CMP-LE-NEXT: clrldi r3, r3, 32
+; CHECK-P10-CMP-LE-NEXT: clrlwi r4, r4, 31
; CHECK-P10-CMP-LE-NEXT: clrldi r4, r4, 32
; CHECK-P10-CMP-LE-NEXT: sub r3, r4, r3
; CHECK-P10-CMP-LE-NEXT: rldicl r3, r3, 1, 63
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
index 2ba113f59da3c4..0892210fc74432 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
@@ -1083,16 +1083,16 @@ define dso_local signext i32 @v16i8tov16i32_sign(<16 x i8> %a) local_unnamed_add
; PWR10BE-NEXT: addi r3, r3, .LCPI17_1 at toc@l
; PWR10BE-NEXT: lxv v4, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI17_2 at toc@ha
-; PWR10BE-NEXT: addi r3, r3, .LCPI17_2 at toc@l
; PWR10BE-NEXT: vperm v3, v2, v2, v3
+; PWR10BE-NEXT: addi r3, r3, .LCPI17_2 at toc@l
+; PWR10BE-NEXT: vextsb2w v3, v3
; PWR10BE-NEXT: lxv v5, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI17_3 at toc@ha
-; PWR10BE-NEXT: vextsb2w v3, v3
-; PWR10BE-NEXT: addi r3, r3, .LCPI17_3 at toc@l
; PWR10BE-NEXT: vperm v4, v2, v2, v4
+; PWR10BE-NEXT: addi r3, r3, .LCPI17_3 at toc@l
+; PWR10BE-NEXT: vextsb2w v4, v4
; PWR10BE-NEXT: lxv v0, 0(r3)
; PWR10BE-NEXT: li r3, 0
-; PWR10BE-NEXT: vextsb2w v4, v4
; PWR10BE-NEXT: vperm v5, v2, v2, v5
; PWR10BE-NEXT: vadduwm v3, v4, v3
; PWR10BE-NEXT: vextsb2w v5, v5
@@ -1206,15 +1206,15 @@ define dso_local zeroext i32 @v16i8tov16i32_zero(<16 x i8> %a) local_unnamed_add
; PWR10BE-NEXT: addi r3, r3, .LCPI18_1 at toc@l
; PWR10BE-NEXT: lxv v5, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI18_2 at toc@ha
-; PWR10BE-NEXT: addi r3, r3, .LCPI18_2 at toc@l
; PWR10BE-NEXT: vperm v3, v4, v2, v3
+; PWR10BE-NEXT: addi r3, r3, .LCPI18_2 at toc@l
; PWR10BE-NEXT: lxv v0, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI18_3 at toc@ha
-; PWR10BE-NEXT: addi r3, r3, .LCPI18_3 at toc@l
; PWR10BE-NEXT: vperm v5, v4, v2, v5
+; PWR10BE-NEXT: addi r3, r3, .LCPI18_3 at toc@l
+; PWR10BE-NEXT: vadduwm v3, v5, v3
; PWR10BE-NEXT: lxv v1, 0(r3)
; PWR10BE-NEXT: li r3, 0
-; PWR10BE-NEXT: vadduwm v3, v5, v3
; PWR10BE-NEXT: vperm v0, v4, v2, v0
; PWR10BE-NEXT: vperm v2, v4, v2, v1
; PWR10BE-NEXT: vadduwm v2, v2, v0
@@ -1572,38 +1572,38 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
; PWR10BE-NEXT: addi r3, r3, .LCPI23_1 at toc@l
; PWR10BE-NEXT: lxv v4, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_2 at toc@ha
-; PWR10BE-NEXT: addi r3, r3, .LCPI23_2 at toc@l
; PWR10BE-NEXT: vperm v3, v2, v2, v3
+; PWR10BE-NEXT: addi r3, r3, .LCPI23_2 at toc@l
+; PWR10BE-NEXT: vextsb2d v3, v3
; PWR10BE-NEXT: lxv v5, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_3 at toc@ha
-; PWR10BE-NEXT: vextsb2d v3, v3
-; PWR10BE-NEXT: addi r3, r3, .LCPI23_3 at toc@l
; PWR10BE-NEXT: vperm v4, v2, v2, v4
+; PWR10BE-NEXT: addi r3, r3, .LCPI23_3 at toc@l
+; PWR10BE-NEXT: vextsb2d v4, v4
; PWR10BE-NEXT: lxv v0, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_4 at toc@ha
-; PWR10BE-NEXT: vextsb2d v4, v4
-; PWR10BE-NEXT: addi r3, r3, .LCPI23_4 at toc@l
; PWR10BE-NEXT: vperm v5, v2, v2, v5
+; PWR10BE-NEXT: addi r3, r3, .LCPI23_4 at toc@l
+; PWR10BE-NEXT: vextsb2d v5, v5
; PWR10BE-NEXT: lxv v1, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_5 at toc@ha
-; PWR10BE-NEXT: vextsb2d v5, v5
-; PWR10BE-NEXT: addi r3, r3, .LCPI23_5 at toc@l
; PWR10BE-NEXT: vperm v0, v2, v2, v0
+; PWR10BE-NEXT: addi r3, r3, .LCPI23_5 at toc@l
+; PWR10BE-NEXT: vextsb2d v0, v0
; PWR10BE-NEXT: lxv v6, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_6 at toc@ha
-; PWR10BE-NEXT: vextsb2d v0, v0
-; PWR10BE-NEXT: addi r3, r3, .LCPI23_6 at toc@l
; PWR10BE-NEXT: vperm v1, v2, v2, v1
; PWR10BE-NEXT: vaddudm v5, v0, v5
; PWR10BE-NEXT: vaddudm v3, v4, v3
; PWR10BE-NEXT: vaddudm v3, v3, v5
+; PWR10BE-NEXT: addi r3, r3, .LCPI23_6 at toc@l
+; PWR10BE-NEXT: vextsb2d v1, v1
; PWR10BE-NEXT: lxv v7, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_7 at toc@ha
-; PWR10BE-NEXT: vextsb2d v1, v1
-; PWR10BE-NEXT: addi r3, r3, .LCPI23_7 at toc@l
; PWR10BE-NEXT: vperm v6, v2, v2, v6
-; PWR10BE-NEXT: lxv v8, 0(r3)
+; PWR10BE-NEXT: addi r3, r3, .LCPI23_7 at toc@l
; PWR10BE-NEXT: vextsb2d v6, v6
+; PWR10BE-NEXT: lxv v8, 0(r3)
; PWR10BE-NEXT: vperm v7, v2, v2, v7
; PWR10BE-NEXT: vextsb2d v7, v7
; PWR10BE-NEXT: vperm v2, v2, v2, v8
@@ -1758,28 +1758,28 @@ define dso_local i64 @v16i8tov16i64_zero(<16 x i8> %a) local_unnamed_addr #0 {
; PWR10BE-NEXT: addi r3, r3, .LCPI24_1 at toc@l
; PWR10BE-NEXT: lxv v5, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI24_2 at toc@ha
-; PWR10BE-NEXT: addi r3, r3, .LCPI24_2 at toc@l
; PWR10BE-NEXT: vperm v3, v4, v2, v3
+; PWR10BE-NEXT: addi r3, r3, .LCPI24_2 at toc@l
; PWR10BE-NEXT: lxv v0, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI24_3 at toc@ha
-; PWR10BE-NEXT: addi r3, r3, .LCPI24_3 at toc@l
; PWR10BE-NEXT: vperm v5, v4, v2, v5
+; PWR10BE-NEXT: addi r3, r3, .LCPI24_3 at toc@l
; PWR10BE-NEXT: lxv v1, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI24_4 at toc@ha
-; PWR10BE-NEXT: addi r3, r3, .LCPI24_4 at toc@l
; PWR10BE-NEXT: vperm v0, v4, v2, v0
+; PWR10BE-NEXT: addi r3, r3, .LCPI24_4 at toc@l
; PWR10BE-NEXT: lxv v6, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI24_5 at toc@ha
-; PWR10BE-NEXT: addi r3, r3, .LCPI24_5 at toc@l
; PWR10BE-NEXT: vperm v1, v4, v2, v1
+; PWR10BE-NEXT: addi r3, r3, .LCPI24_5 at toc@l
; PWR10BE-NEXT: lxv v7, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI24_6 at toc@ha
-; PWR10BE-NEXT: addi r3, r3, .LCPI24_6 at toc@l
; PWR10BE-NEXT: vperm v6, v4, v2, v6
+; PWR10BE-NEXT: addi r3, r3, .LCPI24_6 at toc@l
; PWR10BE-NEXT: lxv v8, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI24_7 at toc@ha
-; PWR10BE-NEXT: addi r3, r3, .LCPI24_7 at toc@l
; PWR10BE-NEXT: vperm v7, v4, v2, v7
+; PWR10BE-NEXT: addi r3, r3, .LCPI24_7 at toc@l
; PWR10BE-NEXT: lxv v9, 0(r3)
; PWR10BE-NEXT: vperm v8, v4, v2, v8
; PWR10BE-NEXT: vperm v2, v4, v2, v9
More information about the llvm-commits
mailing list