[llvm] r326701 - [Power9] Add more missing instructions to the Power 9 scheduler

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 5 06:34:59 PST 2018


Author: stefanp
Date: Mon Mar  5 06:34:59 2018
New Revision: 326701

URL: http://llvm.org/viewvc/llvm-project?rev=326701&view=rev
Log:
[Power9] Add more missing instructions to the Power 9 scheduler

Adding more instructions using InstRW so that we can move away from ItinRW
and ultimately have a complete Power 9 scheduler.

Modified:
    llvm/trunk/lib/Target/PowerPC/P9InstrResources.td
    llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td

Modified: llvm/trunk/lib/Target/PowerPC/P9InstrResources.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/P9InstrResources.td?rev=326701&r1=326700&r2=326701&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/P9InstrResources.td (original)
+++ llvm/trunk/lib/Target/PowerPC/P9InstrResources.td Mon Mar  5 06:34:59 2018
@@ -120,6 +120,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DIS
     XSTSTDCSP,
     FTDIV,
     FTSQRT,
+    CMPEQB,
     (instregex "CMPRB(8)?$"),
     (instregex "TD(I)?$"),
     (instregex "TW(I)?$")
@@ -137,7 +138,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DIS
     XSCVSPDPN,
     SETB,
     BPERMD,
-    (instregex "CNT(L|T)Z(D|W)(8)?$"),
+    (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
     (instregex "POPCNT(D|W)$"),
     (instregex "CMPB(8)?$")
 )>;
@@ -162,6 +163,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DIS
     RFEBB,
     LA,
     (instregex "CMP(WI|LWI|W|LW)(8)?$"),
+    (instregex "CMP(L)?D(I)?$"),
     (instregex "SUBF(I)?C(8)?$"),
     (instregex "ANDI(S)?o(8)?$"),
     (instregex "ADDC(8)?$"),
@@ -179,7 +181,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DIS
     (instregex "AND(C)?(8)?(o)?$"),
     (instregex "NOR(8)?$"),
     (instregex "OR(C)?(8)?$"),
-    (instregex "EQV(8)?$"),
+    (instregex "EQV(8)?(o)?$"),
     (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
     (instregex "ADD(4|8)(TLS)?(_)?$"),
     (instregex "NEG(8)?$")
@@ -201,6 +203,10 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DIS
     (instregex "RLWIMI(8)?$"),
     XSIEXPDP,
     FMR,
+    CREQV,
+    CRXOR,
+    (instregex "CR(6)?(UN)?SET$"),
+    (instregex "CR(N)?(OR|AND)(C)?$"),
     (instregex "S(L|R)W(8)?$"),
     (instregex "RLW(INM|NM)(8)?$"),
     (instregex "F(N)?ABS(D|S)$"),
@@ -456,42 +462,25 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_E
 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     FRSP,
-    FRIND,
-    FRINS,
-    FRIPD,
-    FRIPS,
-    FRIZD,
-    FRIZS,
-    FRIMD,
-    FRIMS,
-    FRE,
-    FRES,
+    (instregex "FRI(N|P|Z|M)(D|S)$"),
+    (instregex "FRE(S)?$"),
+    (instregex "FADD(S)?$"),
+    (instregex "FMSUB(S)?$"),
+    (instregex "FMADD(S)?$"),
     FRSQRTE,
     FRSQRTES,
-    FMADDS,
-    FMADD,
-    FMSUBS,
-    FMSUB,
     FNMADDS,
     FNMADD,
     FNMSUBS,
     FNMSUB,
     FSELD,
     FSELS,
-    FADDS,
     FMULS,
     FMUL,
-    FSUBS,
-    FCFID,
-    FCTID,
-    FCTIDZ,
-    FCFIDU,
-    FCFIDS,
-    FCFIDUS,
-    FCTIDUZ,
-    FCTIWUZ,
-    FCTIW,
-    FCTIWZ,
+    (instregex "FSUB(S)?$"),
+    (instregex "FCFID(U)?(S)?$"),
+    (instregex "FCTID(U)?(Z)?$"),
+    (instregex "FCTIW(U)?(Z)?$"),
     XSMADDADP,
     XSMADDASP,
     XSMADDMDP,
@@ -513,15 +502,32 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP
 )>;
 
 // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
+// These operations can be done in parallel.
+//  The DP is restricted so we need a full 5 dispatches.
+def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    (instregex "FSEL(D|S)o$")
+)>;
+
+// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
+// These operations must be done sequentially.
 //  The DP is restricted so we need a full 5 dispatches.
 def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
-    FMULo,
-    FMADDo,
-    FMSUBo,
-    FNMADDo,
-    FNMSUBo
+    (instregex "FRI(N|P|Z|M)(D|S)o$"),
+    (instregex "FRE(S)?o$"),
+    (instregex "FADD(S)?o$"),
+    (instregex "FSUB(S)?o$"),
+    (instregex "F(N)?MSUB(S)?o$"),
+    (instregex "F(N)?MADD(S)?o$"),
+    (instregex "FCFID(U)?(S)?o$"),
+    (instregex "FCTID(U)?(Z)?o$"),
+    (instregex "FCTIW(U)?(Z)?o$"),
+    (instregex "FMUL(S)?o$"),
+    (instregex "FRSQRTE(S)?o$"),
+    FRSPo
 )>;
 
 // 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
@@ -764,7 +770,21 @@ def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP
 // 4 Cycle load uses a single slice.
 def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
       (instrs
-    COPY
+    COPY,
+    CP_ABORT,
+    DARN,
+    EnforceIEIO,
+    ISYNC,
+    (instregex "DCB(F|T|ST)(EP)?$"),
+    (instregex "DCBZ(L)?(EP)?$"),
+    (instregex "DCBTST(EP)?$"),
+    (instregex "CP_COPY(8)?$"),
+    (instregex "CP_PASTE(8)?$"),
+    (instregex "ICBI(EP)?$"),
+    (instregex "ICBT(LS)?$"),
+    (instregex "LBARX(L)?$"),
+    (instregex "LBZ(CIX|8|X|X8)?$"),
+    (instregex "LD(ARX|ARXL|BRX|CIX|X)?$")
 )>;
 
 // 4 Cycle Restricted load uses a single slice but the dispatch for the whole
@@ -776,6 +796,16 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP
     LFD
 )>;
 
+// Cracked Load instruction.
+// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
+//  operations cannot be done at the same time and so their latencies are added.
+def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    (instregex "LHA(8)?$"),
+    (instregex "CP_PASTE(8)?o$")
+)>;
+
 // Cracked Restricted Load instruction.
 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
 //  operations cannot be done at the same time and so their latencies are added.
@@ -912,6 +942,15 @@ def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C,
 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
 //  and one full superslice for the DIV operation since there is only one DIV
 //  per superslice. Latency of DIV plus ALU is 26.
+def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    (instregex "DIVW(U)?(O)?o$")
+)>;
+
+// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
+//  and one full superslice for the DIV operation since there is only one DIV
+//  per superslice. Latency of DIV plus ALU is 26.
 def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
@@ -958,10 +997,24 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_E
     (instregex "ADDC(8)?o$")
 )>;
 
-// Cracked, restricted, ALU operations.
+// Cracked ALU operations.
+// Two ALU ops can be done in parallel.
+// One is three cycle ALU the ohter is a two cycle ALU.
+// One of the ALU ops is restricted the other is not so we have a total of
+// 5 dispatches.
+def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    (instregex "F(N)?ABS(D|S)o$"),
+    (instregex "FCPSGN(D|S)o$"),
+    (instregex "FNEG(D|S)o$"),
+    FMRo
+)>;
+
+// Cracked ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
 //  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 6 dispatches.
+//  instructions running together on two pipelines and 4 dispatches.
 // ALU ops are 3 cycles each.
 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C],
@@ -994,13 +1047,39 @@ def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, D
     FDIV
 )>;
 
-// 33 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
+// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     FDIVo
 )>;
 
+// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FSQRT
+)>;
+
+// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
+def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FSQRTo
+)>;
+
+// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FSQRTS
+)>;
+
+// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
+def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FSQRTSo
+)>;
+
 // 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
 def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
@@ -1070,6 +1149,16 @@ def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AG
 )>;
 
 // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
+// the load and so it can be run at the same time as the load.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    (instregex "LBZU(X)?(8)?$"),
+    (instregex "LDU(X)?$")
+)>;
+
+
+// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
 //  the load and so it can be run at the same time as the load. The load is also
 //  restricted. 3 dispatches are from the restricted load while the other two
 //  are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
@@ -1132,6 +1221,18 @@ def : InstRW<[P9_BROpAndALUOp_7C, IP_EXE
     ADDPCIS
 )>;
 
+// Special Extracted Instructions
+
+// Atomic Load
+def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
+              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
+              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
+              DISP_1C],
+      (instrs
+    LDAT
+)>;
+
 // Signal Processing Engine (SPE) Instructions
 // These instructions are not supported on Power 9
 def : InstRW<[],
@@ -1182,6 +1283,9 @@ def : InstRW<[],
 def : InstRW<[],
     (instrs
   (instregex "(H)?RFI(D)?$"),
+  (instregex "DSS(ALL)?$"),
+  (instregex "DST(ST)?(T)?(64)?$"),
+  (instregex "ICBL(C|Q)$"),
   ATTN,
   CLRBHRB,
   MFBHRBE,
@@ -1190,5 +1294,10 @@ def : InstRW<[],
   RFDI,
   RFMCI,
   SC,
-  WAIT
+  WAIT,
+  DCBA,
+  DCBI,
+  DCCCI,
+  ICCCI,
+  LBEPX
 )> { let Unsupported = 1; }

Modified: llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td?rev=326701&r1=326700&r2=326701&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td Mon Mar  5 06:34:59 2018
@@ -309,6 +309,7 @@ let SchedModel = P9Model in {
   def P9_LoadAndALU2Op_8C : WriteSequence<[P9_LS_5C, P9_ALU_3C]>;
   def P9_LoadAndPMOp_8C : WriteSequence<[P9_LS_5C, P9_PM_3C]>;
   def P9_LoadAndLoadOp_8C : WriteSequence<[P9_LS_4C, P9_LS_4C]>;
+  def P9_IntDivAndALUOp_18C_8 : WriteSequence<[P9_DIV_16C_8, P9_ALU_2C]>;
   def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>;
   def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>;
   def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>;
@@ -318,7 +319,9 @@ let SchedModel = P9Model in {
   def P9_DPOpAndALUOp_24C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_2C]>;
   def P9_DPOpAndALUOp_35C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_2C]>;
   def P9_DPOpAndALU2Op_25C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_3C]>;
+  def P9_DPOpAndALU2Op_29C_5 : WriteSequence<[P9_DP_26C_5, P9_ALU_3C]>;
   def P9_DPOpAndALU2Op_36C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_3C]>;
+  def P9_DPOpAndALU2Op_39C_10 : WriteSequence<[P9_DP_36C_10, P9_ALU_3C]>;
   def P9_BROpAndALUOp_7C : WriteSequence<[P9_BR_5C, P9_ALU_2C]>;
 
   // ***************** Defining Itinerary Class Resources *****************




More information about the llvm-commits mailing list