[llvm] r326447 - [Power9] Add missing instructions to the Power 9 scheduler
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 1 08:16:08 PST 2018
Author: stefanp
Date: Thu Mar 1 08:16:08 2018
New Revision: 326447
URL: http://llvm.org/viewvc/llvm-project?rev=326447&view=rev
Log:
[Power9] Add missing instructions to the Power 9 scheduler
Adding more instructions using InstRW so that we can move away from ItinRW
and ultimately have a complete Power 9 scheduler.
Differential Revision: https://reviews.llvm.org/D43899
Modified:
llvm/trunk/lib/Target/PowerPC/P9InstrResources.td
llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td
Modified: llvm/trunk/lib/Target/PowerPC/P9InstrResources.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/P9InstrResources.td?rev=326447&r1=326446&r2=326447&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/P9InstrResources.td (original)
+++ llvm/trunk/lib/Target/PowerPC/P9InstrResources.td Thu Mar 1 08:16:08 2018
@@ -37,19 +37,7 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP
DISP_1C, DISP_1C, DISP_1C],
(instrs
(instregex "VADDU(B|H|W|D)M$"),
- VADDCUW,
- VAND,
- VANDC,
- VCMPEQUB,
- VCMPEQUD,
- VCMPEQUH,
- VCMPEQUW,
- VCMPNEB,
- VCMPNEH,
- VCMPNEW,
- VCMPNEZB,
- VCMPNEZH,
- VCMPNEZW,
+ (instregex "VAND(C)?$"),
VEQV,
VEXTSB2D,
VEXTSB2W,
@@ -175,14 +163,15 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DIS
(instregex "EXTSWSLI$"),
SRADI_32,
RLDIC,
- ADDIC,
- ADDICo,
+ RFEBB,
LA,
(instregex "CMP(WI|LWI|W|LW)(8)?$"),
(instregex "SUBF(I)?C(8)?$"),
(instregex "ANDI(S)?o(8)?$"),
- (instregex "ADD(I)?C(8)?(o)?$"),
- (instregex "ADD(E|ME|ZE)(8)?$"),
+ (instregex "ADDC(8)?$"),
+ (instregex "ADDIC(8)?(o)?$"),
+ (instregex "ADD(8|4)(o)?$"),
+ (instregex "ADD(E|ME|ZE)(8)?(o)?$"),
(instregex "SUBF(E|ME|ZE)?(8)?$"),
(instregex "NEG(8)?$"),
(instregex "POPCNTB$"),
@@ -191,7 +180,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DIS
(instregex "(X)?OR(I|IS)?(8)?$"),
NOP,
(instregex "NAND(8)?$"),
- (instregex "AND(C)?(8)?$"),
+ (instregex "AND(C)?(8)?(o)?$"),
(instregex "NOR(8)?$"),
(instregex "OR(C)?(8)?$"),
(instregex "EQV(8)?$"),
@@ -231,10 +220,19 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DIS
def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
DISP_1C, DISP_1C, DISP_1C],
(instrs
+ (instregex "VCMPNEZ(B|H|W)$"),
+ VCMPEQUB,
+ VCMPEQUD,
+ VCMPEQUH,
+ VCMPEQUW,
+ VCMPNEB,
+ VCMPNEH,
+ VCMPNEW,
VBPERMD,
VABSDUB,
VABSDUH,
VABSDUW,
+ VADDCUW,
VADDUBS,
VADDUHS,
VADDUWS,
@@ -518,9 +516,9 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP
XSNMSUBMSP
)>;
-// 7 cycle Restricted DP operation and one 2 cycle ALU operation.
+// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
// The DP is restricted so we need a full 5 dispatches.
-def : InstRW<[P9_DPOpAndALUOp_9C, IP_EXEC_1C, IP_EXEC_1C,
+def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
FMULo,
@@ -665,7 +663,17 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_
XSCMPOQP,
XSCMPUQP,
XSTSTDCQP,
- XSXSIGQP
+ XSXSIGQP,
+ BCDCFNo,
+ BCDCFZo,
+ BCDCPSGNo,
+ BCDCTNo,
+ BCDCTZo,
+ BCDSETSGNo,
+ BCDSo,
+ BCDTRUNCo,
+ BCDUSo,
+ BCDUTRUNCo
)>;
// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
@@ -673,6 +681,7 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_
// dispatches.
def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
+ BCDSRo,
XSADDQP,
XSADDQPO,
XSCVDPQP,
@@ -690,6 +699,14 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, I
XSSUBQPO
)>;
+// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ BCDCTSQo
+)>;
+
// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
// dispatches.
@@ -707,6 +724,14 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, I
XSNMSUBQPO
)>;
+// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ BCDCFSQo
+)>;
+
// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
// dispatches.
@@ -730,6 +755,7 @@ def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP
(instrs
LXSDX,
LXVD2X,
+ LXVWSX,
LXSIWZX,
LXV,
LXVX,
@@ -761,9 +787,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP
def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
- LFIWAX,
- LFSX,
- LFS
+ LFIWAX
)>;
// Cracked Load instruction.
@@ -773,12 +797,33 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXE
def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
DISP_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
- LXSSPX,
LXSIWAX,
+ LIWAX
+)>;
+
+// Cracked Load instruction.
+// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
+// cycles. The Load and ALU operations cannot be done at the same time and so
+// their latencies are added.
+// Full 6 dispatches are required as this is a restricted instruction.
+def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ LFSX,
+ LFS
+)>;
+
+// Cracked Load instruction.
+// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
+// operations cannot be done at the same time and so their latencies are added.
+// Full 4 dispatches are required as this is a cracked instruction.
+def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
LXSSP,
- DFLOADf32,
+ LXSSPX,
XFLOADf32,
- LIWAX
+ DFLOADf32
)>;
// Cracked Load that requires the PM resource.
@@ -791,7 +836,6 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN
DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
LXVDSX,
- LXVWSX,
LXVW4X
)>;
@@ -828,7 +872,9 @@ def : InstRW<[P9_LS_1C, P9_LS_1C, IP_EXE
// dispatches.
def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
- (instregex "M(T|F)VRSAVE(v)?$")
+ (instregex "M(T|F)VRSAVE(v)?$"),
+ (instregex "MF(SPR|CTR|LR)(8)?$"),
+ MFDCR
)>;
// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
@@ -905,6 +951,17 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_E
MTCRF8
)>;
+// Cracked ALU operations.
+// Here the two ALU ops can actually be done in parallel and therefore the
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 4 dispatches.
+// ALU ops are 2 cycles each.
+def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "ADDC(8)?o$")
+)>;
+
// Cracked, restricted, ALU operations.
// Here the two ALU ops can actually be done in parallel and therefore the
// latencies are not added together. Otherwise this is like having two
@@ -931,7 +988,7 @@ def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, D
)>;
// 33 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
-def : InstRW<[P9_DPOpAndALUOp_35C_8, IP_EXEC_1C, IP_EXEC_1C,
+def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
FDIVo
@@ -950,7 +1007,7 @@ def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, D
)>;
// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
-def : InstRW<[P9_DPOpAndALUOp_24C_5, IP_EXEC_1C, IP_EXEC_1C,
+def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
FDIVSo
@@ -988,7 +1045,7 @@ def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8
// Both the load and the ALU that depends on it are restricted and so they take
// a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
-def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
+def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
DISP_1C, DISP_1C, DISP_1C, DISP_1C,
DISP_1C, DISP_1C, DISP_1C, DISP_1C],
@@ -1023,19 +1080,64 @@ def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AG
// dispatches.
def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
- VPMSUMB,
- VPMSUMD,
- VPMSUMH,
- VPMSUMW,
- VCIPHER,
- VCIPHERLAST,
- VNCIPHER,
- VNCIPHERLAST,
- VSBOX
+ (instregex "VPMSUM(B|H|W|D)$"),
+ (instregex "V(N)?CIPHER(LAST)?$"),
+ VSBOX
+)>;
+
+// Branch Instructions
+
+// Two Cycle Branch
+def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "BCCCTR(L)?(8)?$"),
+ (instregex "BCCL(A|R|RL)?$"),
+ (instregex "BCCTR(L)?(8)?(n)?$"),
+ (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
+ (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
+ (instregex "BL(_TLS)?$"),
+ (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
+ (instregex "BLA(8|8_NOP)?$"),
+ (instregex "BLR(8|L)?$"),
+ (instregex "TAILB(A)?(8)?$"),
+ (instregex "TAILBCTR(8)?$"),
+ (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
+ (instregex "BCLR(L)?(n)?$"),
+ (instregex "BCTR(L)?(8)?$"),
+ B,
+ BA,
+ BC,
+ BCC,
+ BCCA,
+ BCL,
+ BCLalways,
+ BCLn,
+ BCTRL8_LDinto_toc,
+ BCn,
+ CTRL_DEP
+)>;
+
+// Five Cycle Branch with a 2 Cycle ALU Op
+// Operations must be done consecutively and not in parallel.
+def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ ADDPCIS
)>;
+
// Instructions without scheduling support.
def : InstRW<[],
(instrs
- (instregex "(H)?RFI(D)?$")
+ (instregex "(H)?RFI(D)?$"),
+ ATTN,
+ BRINC,
+ CLRBHRB,
+ MFBHRBE,
+ NAP,
+ RFCI,
+ RFDI,
+ RFMCI,
+ SC,
+ WAIT
)> { let Unsupported = 1; }
Modified: llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td?rev=326447&r1=326446&r2=326447&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td Thu Mar 1 08:16:08 2018
@@ -264,11 +264,21 @@ let SchedModel = P9Model in {
let Latency = 12;
}
+ def P9_DFU_23C : SchedWriteRes<[DFU]> {
+ let Latency = 23;
+ let ResourceCycles = [11];
+ }
+
def P9_DFU_24C : SchedWriteRes<[DFU]> {
let Latency = 24;
let ResourceCycles = [12];
}
+ def P9_DFU_37C : SchedWriteRes<[DFU]> {
+ let Latency = 37;
+ let ResourceCycles = [25];
+ }
+
def P9_DFU_58C : SchedWriteRes<[DFU]> {
let Latency = 58;
let ResourceCycles = [44];
@@ -295,6 +305,8 @@ let SchedModel = P9Model in {
def P9_LoadAndALUOp_6C : WriteSequence<[P9_LS_4C, P9_ALU_2C]>;
def P9_LoadAndALUOp_7C : WriteSequence<[P9_LS_5C, P9_ALU_2C]>;
+ def P9_LoadAndALU2Op_7C : WriteSequence<[P9_LS_4C, P9_ALU_3C]>;
+ def P9_LoadAndALU2Op_8C : WriteSequence<[P9_LS_5C, P9_ALU_3C]>;
def P9_LoadAndPMOp_8C : WriteSequence<[P9_LS_5C, P9_PM_3C]>;
def P9_LoadAndLoadOp_8C : WriteSequence<[P9_LS_4C, P9_LS_4C]>;
def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>;
@@ -302,8 +314,12 @@ let SchedModel = P9Model in {
def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>;
def P9_ALUOpAndALUOp_4C : WriteSequence<[P9_ALU_2C, P9_ALU_2C]>;
def P9_DPOpAndALUOp_9C : WriteSequence<[P9_DP_7C, P9_ALU_2C]>;
+ def P9_DPOpAndALU2Op_10C : WriteSequence<[P9_DP_7C, P9_ALU_3C]>;
def P9_DPOpAndALUOp_24C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_2C]>;
def P9_DPOpAndALUOp_35C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_2C]>;
+ def P9_DPOpAndALU2Op_25C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_3C]>;
+ def P9_DPOpAndALU2Op_36C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_3C]>;
+ def P9_BROpAndALUOp_7C : WriteSequence<[P9_BR_5C, P9_ALU_2C]>;
// ***************** Defining Itinerary Class Resources *****************
More information about the llvm-commits
mailing list