[llvm] r327174 - [Power9] Code Cleaup and adding Comments for Power 9 Scheduler

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 9 13:08:35 PST 2018


Author: stefanp
Date: Fri Mar  9 13:08:35 2018
New Revision: 327174

URL: http://llvm.org/viewvc/llvm-project?rev=327174&view=rev
Log:
[Power9] Code Cleaup and adding Comments for Power 9 Scheduler

Did some code cleanup up removing ItinRW that are not needed and resource types
that are no longer used.

Also added more comments to the td files related to the Power 9 sheduler model.

Modified:
    llvm/trunk/lib/Target/PowerPC/P9InstrResources.td
    llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td

Modified: llvm/trunk/lib/Target/PowerPC/P9InstrResources.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/P9InstrResources.td?rev=327174&r1=327173&r2=327174&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/P9InstrResources.td (original)
+++ llvm/trunk/lib/Target/PowerPC/P9InstrResources.td Fri Mar  9 13:08:35 2018
@@ -7,10 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines resources required by some of P9 instruction. This is part
-// P9 processor model used for instruction scheduling. Not every instruction
-// is listed here. Instructions in this file belong to itinerary classes that
-// have instructions with different resource requirements.
+// This file defines the resources required by P9 instructions. This is part
+// P9 processor model used for instruction scheduling. This file should contain
+// all of the instructions that may be used on Power 9. This is not just
+// instructions that are new on Power 9 but also instructions that were
+// available on earlier architectures and are still used in Power 9.
 //
 // The makeup of the P9 CPU is modeled as follows:
 //   - Each CPU is made up of two superslices.
@@ -31,8 +32,8 @@
 //===----------------------------------------------------------------------===//
 
 // Two cycle ALU vector operation that uses an entire superslice.
-//  Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
+// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
+// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
 def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
@@ -40,56 +41,28 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP
     (instregex "VAND(C)?$"),
     (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
     (instregex "V_SET0(B|H)?$"),
+    (instregex "VS(R|L)(B|H|W|D)$"),
+    (instregex "VSUBU(B|H|W|D)M$"),
+    (instregex "VPOPCNT(B|H)$"),
+    (instregex "VRL(B|H|W|D)$"),
+    (instregex "VSRA(B|H|W|D)$"),
+    (instregex "XV(N)?ABS(D|S)P$"),
+    (instregex "XVCPSGN(D|S)P$"),
+    (instregex "XV(I|X)EXP(D|S)P$"),
+    (instregex "VRL(D|W)(MI|NM)$"),
+    (instregex "VMRG(E|O)W$"),
     MTVSRDD,
     VEQV,
-    VRLB,
-    VRLD,
-    VRLDMI,
-    VRLDNM,
-    VRLH,
-    VRLW,
-    VRLWMI,
-    VRLWNM,
-    VSRAB,
-    VSRAD,
-    VSRAH,
-    VSRAW,
-    VSRB,
-    VSRD,
-    VSRH,
-    VSRW,
-    VSLB,
-    VSLD,
-    VSLH,
-    VSLW,
-    VMRGEW,
-    VMRGOW,
     VNAND,
     VNEGD,
     VNEGW,
     VNOR,
     VOR,
     VORC,
-    VPOPCNTB,
-    VPOPCNTH,
     VSEL,
-    VSUBUBM,
-    VSUBUDM,
-    VSUBUHM,
-    VSUBUWM,
     VXOR,
-    XVABSDP,
-    XVABSSP,
-    XVCPSGNDP,
-    XVCPSGNSP,
-    XVIEXPDP,
-    XVNABSDP,
-    XVNABSSP,
     XVNEGDP,
     XVNEGSP,
-    XVXEXPDP,
-    XVIEXPSP,
-    XVXEXPSP,
     XXLAND,
     XXLANDC,
     XXLEQV,
@@ -112,23 +85,21 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP
 )>;
 
 // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
-//  slingle slice. However, since it is Restricted it requires all 3 dispatches
-//  (DISP) for that superslice.
+// slingle slice. However, since it is Restricted it requires all 3 dispatches
+// (DISP) for that superslice.
 def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
-    FCMPUS,
-    FCMPUD,
-    XSTSTDCDP,
-    XSTSTDCSP,
-    FTDIV,
-    FTSQRT,
-    CMPEQB,
     (instregex "TABORT(D|W)C(I)?$"),
     (instregex "MTFSB(0|1)$"),
     (instregex "MFFSC(D)?RN(I)?$"),
     (instregex "CMPRB(8)?$"),
     (instregex "TD(I)?$"),
-    (instregex "TW(I)?$")
+    (instregex "TW(I)?$"),
+    (instregex "FCMPU(S|D)$"),
+    (instregex "XSTSTDC(S|D)P$"),
+    FTDIV,
+    FTSQRT,
+    CMPEQB
 )>;
 
 // Standard Dispatch ALU operation for 3 cycles. Only one slice used.
@@ -137,42 +108,26 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DIS
     (instregex "XSMAX(C|J)?DP$"),
     (instregex "XSMIN(C|J)?DP$"),
     (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
+    (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
+    (instregex "POPCNT(D|W)$"),
+    (instregex "CMPB(8)?$"),
     XSTDIVDP,
     XSTSQRTDP,
     XSXSIGDP,
     XSCVSPDPN,
     SETB,
-    BPERMD,
-    (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
-    (instregex "POPCNT(D|W)$"),
-    (instregex "CMPB(8)?$")
+    BPERMD
 )>;
 
 // Standard Dispatch ALU operation for 2 cycles. Only one slice used.
 def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
-    ADDIStocHA,
-    ADDItocL,
-    MCRF,
-    MCRXRX,
-    XSNABSDP,
-    XSXEXPDP,
-    XSABSDP,
-    XSNEGDP,
-    XSCPSGNDP,
     (instregex "S(L|R)D$"),
     (instregex "SRAD(I)?$"),
     (instregex "EXTSWSLI$"),
     (instregex "MFV(S)?RD$"),
     (instregex "MTVSRD$"),
     (instregex "MTVSRW(A|Z)$"),
-    MFVSRWZ,
-    SRADI_32,
-    RLDIC,
-    RFEBB,
-    LA,
-    TBEGIN,
-    TRECHKPT,
     (instregex "CMP(WI|LWI|W|LW)(8)?$"),
     (instregex "CMP(L)?D(I)?$"),
     (instregex "SUBF(I)?C(8)?$"),
@@ -187,7 +142,6 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DIS
     (instregex "ADD(I|IS)?(8)?$"),
     (instregex "LI(S)?(8)?$"),
     (instregex "(X)?OR(I|IS)?(8)?(o)?$"),
-    NOP,
     (instregex "NAND(8)?(o)?$"),
     (instregex "AND(C)?(8)?(o)?$"),
     (instregex "NOR(8)?(o)?$"),
@@ -195,7 +149,24 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DIS
     (instregex "EQV(8)?(o)?$"),
     (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
     (instregex "ADD(4|8)(TLS)?(_)?$"),
-    (instregex "NEG(8)?$")
+    (instregex "NEG(8)?$"),
+    (instregex "ADDI(S)?toc(HA|L)$"),
+    MCRF,
+    MCRXRX,
+    XSNABSDP,
+    XSXEXPDP,
+    XSABSDP,
+    XSNEGDP,
+    XSCPSGNDP,
+    MFVSRWZ,
+    SRADI_32,
+    RLDIC,
+    RFEBB,
+    LA,
+    TBEGIN,
+    TRECHKPT,
+    NOP,
+    WAIT
 )>;
 
 // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
@@ -203,23 +174,10 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DIS
 //  (DISP) for that superslice.
 def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
-    RLDCL,
-    RLDCR,
-    RLDIMI,
-    RLDICL,
-    RLDICR,
-    RLDICL_32_64,
-    RLDICL_32,
-    RLDICR_32,
+    (instregex "RLDC(L|R)$"),
     (instregex "RLWIMI(8)?$"),
-    XSIEXPDP,
-    FMR,
-    CREQV,
-    CRXOR,
-    TRECLAIM,
-    TSR,
-    TABORT,
-    (instregex "MFOCRF(8)?$"),
+    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
+    (instregex "M(F|T)OCRF(8)?$"),
     (instregex "CR(6)?(UN)?SET$"),
     (instregex "CR(N)?(OR|AND)(C)?$"),
     (instregex "S(L|R)W(8)?$"),
@@ -228,75 +186,38 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DIS
     (instregex "FNEG(D|S)$"),
     (instregex "FCPSGN(D|S)$"),
     (instregex "SRAW(I)?$"),
-    (instregex "ISEL(8)?$")
+    (instregex "ISEL(8)?$"),
+    RLDIMI,
+    XSIEXPDP,
+    FMR,
+    CREQV,
+    CRXOR,
+    TRECLAIM,
+    TSR,
+    TABORT
 )>;
 
 // Three cycle ALU vector operation that uses an entire superslice.
-//  Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
+// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
+// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
 def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
     (instregex "M(T|F)VSCR$"),
     (instregex "VCMPNEZ(B|H|W)$"),
-    VCMPEQUB,
-    VCMPEQUD,
-    VCMPEQUH,
-    VCMPEQUW,
-    VCMPNEB,
-    VCMPNEH,
-    VCMPNEW,
+    (instregex "VCMPEQU(B|H|W|D)$"),
+    (instregex "VCMPNE(B|H|W)$"),
+    (instregex "VABSDU(B|H|W)$"),
+    (instregex "VADDU(B|H|W)S$"),
+    (instregex "VAVG(S|U)(B|H|W)$"),
+    (instregex "VCMP(EQ|GE|GT)FP(o)?$"),
+    (instregex "VCMPBFP(o)?$"),
+    (instregex "VC(L|T)Z(B|H|W|D)$"),
+    (instregex "VADDS(B|H|W)S$"),
+    (instregex "V(MIN|MAX)FP$"),
+    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
     VBPERMD,
-    VABSDUB,
-    VABSDUH,
-    VABSDUW,
     VADDCUW,
-    VADDUBS,
-    VADDUHS,
-    VADDUWS,
-    VAVGSB,
-    VAVGSH,
-    VAVGSW,
-    VAVGUB,
-    VAVGUH,
-    VAVGUW,
-    VCMPEQFP,
-    VCMPEQFPo,
-    VCMPGEFP,
-    VCMPGEFPo,
-    VCMPBFP,
-    VCMPBFPo,
-    VCMPGTFP,
-    VCMPGTFPo,
-    VCLZB,
-    VCLZD,
-    VCLZH,
-    VCLZW,
-    VCTZB,
-    VCTZD,
-    VCTZH,
-    VCTZW,
-    VADDSBS,
-    VADDSHS,
-    VADDSWS,
-    VMINFP,
-    VMINSB,
-    VMINSD,
-    VMINSH,
-    VMINSW,
-    VMINUB,
-    VMINUD,
-    VMINUH,
-    VMINUW,
-    VMAXFP,
-    VMAXSB,
-    VMAXSD,
-    VMAXSH,
-    VMAXSW,
-    VMAXUB,
-    VMAXUD,
-    VMAXUH,
-    VMAXUW,
     VPOPCNTW,
     VPOPCNTD,
     VPRTYBD,
@@ -492,8 +413,11 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP
     (instregex "FADD(S)?$"),
     (instregex "FMSUB(S)?$"),
     (instregex "FMADD(S)?$"),
-    FRSQRTE,
-    FRSQRTES,
+    (instregex "FSUB(S)?$"),
+    (instregex "FCFID(U)?(S)?$"),
+    (instregex "FCTID(U)?(Z)?$"),
+    (instregex "FCTIW(U)?(Z)?$"),
+    (instregex "FRSQRTE(S)?$"),
     FNMADDS,
     FNMADD,
     FNMSUBS,
@@ -502,10 +426,6 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP
     FSELS,
     FMULS,
     FMUL,
-    (instregex "FSUB(S)?$"),
-    (instregex "FCFID(U)?(S)?$"),
-    (instregex "FCTID(U)?(Z)?$"),
-    (instregex "FCTIW(U)?(Z)?$"),
     XSMADDADP,
     XSMADDASP,
     XSMADDMDP,
@@ -607,6 +527,7 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_
     (instregex "VSPLT(W|H|B)(s)?$"),
     (instregex "V_SETALLONES(B|H)?$"),
     (instregex "VEXTRACTU(B|H|W)$"),
+    (instregex "VINSERT(B|H|W|D)$"),
     MFVSRLD,
     MTVSRWS,
     VBPERMQ,
@@ -620,10 +541,6 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_
     VEXTUWLX,
     VEXTUWRX,
     VGBBD,
-    VINSERTB,
-    VINSERTD,
-    VINSERTH,
-    VINSERTW,
     VMRGHB,
     VMRGHH,
     VMRGHW,
@@ -782,13 +699,13 @@ def : InstRW<[P9_DFU_76C, IP_EXECE_1C, I
     XSSQRTQPO
 )>;
 
-// 6 Cycle load uses a single slice.
+// 6 Cycle Load uses a single slice.
 def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C],
       (instrs
     (instregex "LXVL(L)?")
 )>;
 
-// 5 Cycle load uses a single slice.
+// 5 Cycle Load uses a single slice.
 def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
       (instrs
     (instregex "LVE(B|H|W)X$"),
@@ -807,17 +724,9 @@ def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP
     LIWZX
 )>;
 
-// 4 Cycle load uses a single slice.
+// 4 Cycle Load uses a single slice.
 def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
       (instrs
-    COPY,
-    CP_ABORT,
-    DARN,
-    EnforceIEIO,
-    ISYNC,
-    MSGSYNC,
-    TLBSYNC,
-    SYNC,
     (instregex "DCB(F|T|ST)(EP)?$"),
     (instregex "DCBZ(L)?(EP)?$"),
     (instregex "DCBTST(EP)?$"),
@@ -832,6 +741,14 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP
     (instregex "LWARX(L)?$"),
     (instregex "LWBRX(8)?$"),
     (instregex "LWZ(8|CIX|X|X8)?$"),
+    COPY,
+    CP_ABORT,
+    DARN,
+    EnforceIEIO,
+    ISYNC,
+    MSGSYNC,
+    TLBSYNC,
+    SYNC,
     LHZ,
     LHZ8,
     LHZCIX,
@@ -850,8 +767,8 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP
     LFD
 )>;
 
-// Cracked load instructions.
-// Load instruction that can be done in parallel.
+// Cracked Load Instructions.
+// Load instructions that can be done in parallel.
 def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
@@ -863,7 +780,7 @@ def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGE
     TLBIEL
 )>;
 
-// Cracked Load instruction.
+// Cracked Load Instruction.
 // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
 // operations can be run in parallel.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
@@ -873,12 +790,23 @@ def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EX
     TEND
 )>;
 
+// Cracked Store Instruction
+// Consecutive Store and ALU instructions. The store is restricted and requires
+// three dispatches.
 def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     (instregex "ST(B|H|W|D)CX$")
 )>;
 
+// Cracked Load Instruction.
+// Two consecutive load operations for a total of 8 cycles.
+def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    LDMX
+)>;
+
 // Cracked Load instruction.
 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
 //  operations cannot be done at the same time and so their latencies are added.
@@ -967,21 +895,20 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_A
     (instregex "STF(S|D|IWX|SX|DX)$"),
     (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
     (instregex "STW(8)?$"),
-    DFSTOREf32,
-    DFSTOREf64,
-    XFSTOREf32,
-    XFSTOREf64,
+    (instregex "(D|X)FSTORE(f32|f64)$"),
+    (instregex "ST(W|H|D)BRX$"),
+    (instregex "ST(B|H|D)(8)?$"),
+    (instregex "ST(B|W|H|D)(CI)?X(8)?$"),
     STIWX,
     SLBIEG,
     STMW,
     STSWI,
-    TLBIE,
-    (instregex "ST(W|H|D)BRX$"),
-    (instregex "ST(B|H|D)(8)?$"),
-    (instregex "ST(B|W|H|D)(CI)?X(8)?$")
+    TLBIE
 )>;
 
-// Store operation that requires the whole superslice.
+// Vector Store Instruction
+// Requires the whole superslice and therefore requires all three dispatches
+// as well as both the Even and Odd exec pipelines.
 def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
               DISP_1C, DISP_1C, DISP_1C],
       (instrs
@@ -990,13 +917,6 @@ def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_
     (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
 )>;
 
-// Cracked instruction made up up two restriced stores.
-//def : InstRW<[P9_LS_1C, P9_LS_1C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
-//              IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-//      (instrs
-//      STFDEPX
-//)>;
-
 // 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 // dispatches.
@@ -1096,8 +1016,6 @@ def : InstRW<[P9_IntDivAndALUOp_42C_8, I
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
-    MTOCRF,
-    MTOCRF8,
     MTCRF,
     MTCRF8
 )>;
@@ -1175,6 +1093,9 @@ def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_
     (instregex "MFFS(L|CE|o)?$")
 )>;
 
+// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
+// total of 6 cycles. All of the ALU operations are also restricted so each
+// takes 3 dispatches for a total of 9.
 def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
               DISP_1C, DISP_1C],
@@ -1194,8 +1115,6 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EX
     RLDICo
 )>;
 
-// FP Div instructions in IIC_FPDivD and IIC_FPDivS.
-
 // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
 def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
@@ -1210,6 +1129,7 @@ def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP
 )>;
 
 // 36 Cycle DP Instruction.
+// Instruction can be done on a single slice.
 def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     XSSQRTDP
@@ -1304,8 +1224,6 @@ def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8
     XVDIVDP
 )>;
 
-// Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.
-
 // Instruction cracked into three pieces. One Load and two ALU operations.
 // The Load and one of the ALU ops cannot be run at the same time and so the
 //  latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
@@ -1403,7 +1321,7 @@ def : InstRW<[P9_BROpAndALUOp_7C, IP_EXE
     ADDPCIS
 )>;
 
-// Special Extracted Instructions
+// Special Extracted Instructions For Atomics
 
 // Atomic Load
 def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
@@ -1493,12 +1411,10 @@ def : InstRW<[],
   NAP,
   STOP,
   TRAP,
-  LDMX,
   RFCI,
   RFDI,
   RFMCI,
   SC,
-  WAIT,
   DCBA,
   DCBI,
   DCCCI,

Modified: llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td?rev=327174&r1=327173&r2=327174&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCScheduleP9.td Fri Mar  9 13:08:35 2018
@@ -13,17 +13,29 @@
 include "PPCInstrInfo.td"
 
 def P9Model : SchedMachineModel {
+  // The maximum number of instructions to be issued at the same time.
+  // While a value of 8 is technically correct since 8 instructions can be
+  // fetched from the instruction cache. However, only 6 instructions may be
+  // actually dispatched at a time.
   let IssueWidth = 8;
 
+  // Load latency is 4 or 5 cycles depending on the load. This latency assumes
+  // that we have a cache hit. For a cache miss the load latency will be more.
+  // There are two instructions (lxvl, lxvll) that have a latencty of 6 cycles.
+  // However it is not worth bumping this value up to 6 when the vast majority
+  // of instructions are 4 or 5 cycles.
   let LoadLatency = 5;
 
+  // A total of 16 cycles to recover from a branch mispredict.
   let MispredictPenalty = 16;
 
   // Try to make sure we have at least 10 dispatch groups in a loop.
+  // A dispatch group is 6 instructions.
   let LoopMicroOpBufferSize = 60;
 
   let CompleteModel = 1;
 
+  // Do not support QPX (Quad Processing eXtension) on Power 9.
   let UnsupportedFeatures = [HasQPX];
 
 }
@@ -36,6 +48,12 @@ let SchedModel = P9Model in {
   def DISPATCHER : ProcResource<12>;
 
   // Issue Ports
+  // An instruction can go down one of two issue queues.
+  // Address Generation (AGEN) mainly for loads and stores.
+  // Execution (EXEC) for most other instructions.
+  // Some instructions cannot be run on just any issue queue and may require an
+  // Even or an Odd queue. The EXECE represents the even queues and the EXECO
+  // represents the odd queues.
   def IP_AGEN : ProcResource<4>;
   def IP_EXEC : ProcResource<4>;
   def IP_EXECE : ProcResource<2> {
@@ -48,6 +66,7 @@ let SchedModel = P9Model in {
   }
 
   // Pipeline Groups
+  // Four ALU (Fixed Point Arithmetic) units in total. Two even, two Odd.
   def ALU : ProcResource<4>;
   def ALUE : ProcResource<2> {
     //Even ALU pipelines
@@ -57,7 +76,11 @@ let SchedModel = P9Model in {
     //Odd ALU pipelines
     let Super = ALU;
   }
+
+  // Two DIV (Fixed Point Divide) units.
   def DIV : ProcResource<2>;
+
+  // Four DP (Floating Point) units in total. Two even, two Odd.
   def DP : ProcResource<4>;
   def DPE : ProcResource<2> {
     //Even DP pipelines
@@ -67,15 +90,23 @@ let SchedModel = P9Model in {
     //Odd DP pipelines
     let Super = DP;
   }
+
+  // Four LS (Load or Store) units.
   def LS : ProcResource<4>;
+
+  // Two PM (Permute) units.
   def PM : ProcResource<2>;
+
+  // Only one DFU (Decimal Floating Point and Quad Precision) unit.
   def DFU : ProcResource<1>;
+
+  // Only one Branch unit.
   def BR : ProcResource<1> {
     let BufferSize = 16;
   }
-  def CY : ProcResource<1>;
 
-  def TestGroup : ProcResGroup<[ALU, DP]>;
+  // Only one CY (Crypto) unit.
+  def CY : ProcResource<1>;
 
   // ***************** SchedWriteRes Definitions *****************
 
@@ -107,6 +138,11 @@ let SchedModel = P9Model in {
   }
 
   //Pipeline Groups
+
+  // ALU Units
+  // An ALU may take either 2 or 3 cycles to complete the operation.
+  // However, the ALU unit is only every busy for 1 cycle at a time and may
+  // receive new instructions each cycle.
   def P9_ALU_2C : SchedWriteRes<[ALU]> {
     let Latency = 2;
   }
@@ -131,26 +167,9 @@ let SchedModel = P9Model in {
     let Latency = 3;
   }
 
-  def P9_ALU_4C : SchedWriteRes<[ALU]> {
-    let Latency = 4;
-  }
-
-  def P9_ALUE_4C : SchedWriteRes<[ALUE]> {
-    let Latency = 4;
-  }
-
-  def P9_ALUO_4C : SchedWriteRes<[ALUO]> {
-    let Latency = 4;
-  }
-
-  def P9_ALU_5C : SchedWriteRes<[ALU]> {
-    let Latency = 5;
-  }
-
-  def P9_ALU_6C : SchedWriteRes<[ALU]> {
-    let Latency = 6;
-  }
-
+  // DIV Unit
+  // A DIV unit may take from 5 to 40 cycles to complete.
+  // Some DIV operations may keep the unit busy for up to 8 cycles.
   def P9_DIV_5C : SchedWriteRes<[DIV]> {
     let Latency = 5;
   }
@@ -174,6 +193,9 @@ let SchedModel = P9Model in {
     let Latency = 40;
   }
 
+  // DP Unit
+  // A DP unit may take from 2 to 36 cycles to complete.
+  // Some DP operations keep the unit busy for up to 10 cycles.
   def P9_DP_2C : SchedWriteRes<[DP]> {
     let Latency = 2;
   }
@@ -264,14 +286,18 @@ let SchedModel = P9Model in {
     let Latency = 36;
   }
 
+  // PM Unit
+  // Three cycle permute operations.
   def P9_PM_3C : SchedWriteRes<[PM]> {
     let Latency = 3;
   }
 
-  def P9_PM_7C : SchedWriteRes<[PM]> {
-    let Latency = 3;
-  }
-
+  // Load and Store Units
+  // Loads can have 4, 5 or 6 cycles of latency.
+  // Stores are listed as having a single cycle of latency. This is not
+  // completely accurate since it takes more than 1 cycle to actually store
+  // the value. However, since the store does not produce a result it can be
+  // considered complete after one cycle.
   def P9_LS_1C : SchedWriteRes<[LS]> {
     let Latency = 1;
   }
@@ -288,6 +314,10 @@ let SchedModel = P9Model in {
     let Latency = 6;
   }
 
+  // DFU Unit
+  // Some of the most expensive ops use the DFU.
+  // Can take from 12 cycles to 76 cycles to obtain a result.
+  // The unit may be busy for up to 62 cycles.
   def P9_DFU_12C : SchedWriteRes<[DFU]> {
     let Latency = 12;
   }
@@ -312,11 +342,12 @@ let SchedModel = P9Model in {
     let ResourceCycles = [44];
   }
 
-  def P9_DFU_76C : SchedWriteRes<[TestGroup, DFU]> {
+  def P9_DFU_76C : SchedWriteRes<[DFU]> {
     let Latency = 76;
     let ResourceCycles = [62];
   }
 
+  // 2 or 5 cycle latencies for the branch unit.
   def P9_BR_2C : SchedWriteRes<[BR]> {
     let Latency = 2;
   }
@@ -325,12 +356,16 @@ let SchedModel = P9Model in {
     let Latency = 5;
   }
 
+  // 6 cycle latency for the crypto unit
   def P9_CY_6C : SchedWriteRes<[CY]> {
     let Latency = 6;
   }
 
   // ***************** WriteSeq Definitions *****************
 
+  // These are combinations of the resources listed above.
+  // The idea is that some cracked instructions cannot be done in parallel and
+  // so the latencies for their resources must be added.
   def P9_LoadAndALUOp_6C : WriteSequence<[P9_LS_4C, P9_ALU_2C]>;
   def P9_LoadAndALUOp_7C : WriteSequence<[P9_LS_5C, P9_ALU_2C]>;
   def P9_LoadAndALU2Op_7C : WriteSequence<[P9_LS_4C, P9_ALU_3C]>;
@@ -357,120 +392,7 @@ let SchedModel = P9Model in {
   def P9_DPOpAndALU2Op_39C_10 : WriteSequence<[P9_DP_36C_10, P9_ALU_3C]>;
   def P9_BROpAndALUOp_7C : WriteSequence<[P9_BR_5C, P9_ALU_2C]>;
 
-  // ***************** Defining Itinerary Class Resources *****************
-
-  // The following itineraries are fully covered by the InstRW definitions in
-  // P9InstrResources.td so aren't listed here.
-  // IIC_FPDivD, IIC_FPDivS, IIC_FPFused, IIC_IntDivD, IIC_LdStLFDU,
-  // IIC_LdStLFDUX
-
-  def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
-               [IIC_IntSimple, IIC_IntGeneral, IIC_IntRFID,
-                IIC_IntRotateD, IIC_IntRotateDI, IIC_IntTrapD,
-                IIC_SprRFI]>;
-
-  def : ItinRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
-               [IIC_IntTrapW]>;
-
-  def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_IntISEL, IIC_IntRotate, IIC_IntShift]>;
-
-  def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], [IIC_IntCompare]>;
-
-  def : ItinRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
-                DISP_1C, DISP_1C], [IIC_VecGeneral, IIC_FPCompare]>;
-
-  def : ItinRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_IntMulHW, IIC_IntMulHWU, IIC_IntMulLI, IIC_IntMulHD]>;
-
-  def : ItinRW<[P9_LS_5C, IP_EXEC_1C, DISP_1C, DISP_1C],
-               [IIC_LdStLoad, IIC_LdStLD, IIC_LdStLFD]>;
-
-  def : ItinRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
-                DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_LdStLoadUpd, IIC_LdStLDU]>;
-
-  def : ItinRW<[P9_LS_4C, P9_ALU_2C, IP_EXECE_1C, IP_EXECO_1C,
-                DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_LdStLoadUpdX, IIC_LdStLDUX]>;
-
-  def : ItinRW<[P9_LS_1C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
-                DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_LdStSTFDU]>;
-
-  def : ItinRW<[P9_LoadAndALUOp_6C,
-                IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_LdStLHA, IIC_LdStLWA]>;
-
-  def : ItinRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
-                IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
-                DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_LdStLHAU, IIC_LdStLHAUX]>;
-
-  // IIC_LdStLMW contains two microcoded insns. This is not accurate, but
-  // those insns are not used that much, if at all.
-  def : ItinRW<[P9_LS_4C, IP_EXEC_1C, DISP_1C, DISP_1C],
-               [IIC_LdStLWARX, IIC_LdStLDARX, IIC_LdStLMW]>;
-
-  def : ItinRW<[P9_LS_4C, IP_EXEC_1C, DISP_1C, DISP_1C],
-               [IIC_LdStCOPY, IIC_SprABORT, IIC_LdStPASTE, IIC_LdStDCBF,
-                IIC_LdStICBI, IIC_LdStSync, IIC_SprISYNC, IIC_SprMSGSYNC,
-                IIC_SprSLBIA, IIC_SprSLBSYNC, IIC_SprTLBSYNC]>;
-
-  def : ItinRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_LdStSTFD, IIC_LdStSTD, IIC_LdStStore]>;
-
-  def : ItinRW<[P9_LS_1C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
-                DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_LdStSTDU, IIC_LdStSTDUX, IIC_LdStStoreUpd, IIC_SprSLBIEG,
-                IIC_SprTLBIA, IIC_SprTLBIE]>;
-
-  def : ItinRW<[P9_StoreAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
-                DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_LdStSTDCX, IIC_LdStSTWCX]>;
-
-  def : ItinRW<[P9_ALU_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_BrCR, IIC_IntMTFSB0]>;
-
-  def : ItinRW<[P9_ALUOpAndALUOp_4C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
-                IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-                DISP_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_SprMFCR, IIC_SprMFCRF, IIC_BrMCR, IIC_BrMCRX, IIC_IntMFFS]>;
-
-  def : ItinRW<[P9_BR_2C, DISP_1C], [IIC_BrB]>;
-  def : ItinRW<[P9_BR_5C, DISP_1C], [IIC_SprMFSPR]>;
-
-  // This class should be broken down to instruction level, once some missing
-  // info is obtained.
-  def : ItinRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
-                DISP_1C, DISP_1C, DISP_1C], [IIC_SprMTSPR]>;
-
-  def : ItinRW<[P9_LoadAndLoadOp_8C, IP_EXEC_1C, DISP_1C, DISP_1C],
-               [IIC_SprSLBIE, IIC_SprSLBMFEE, IIC_SprSLBMFEV, IIC_SprSLBMTE,
-                IIC_SprTLBIEL]>;
-
-  // IIC_VecFP is added here although many instructions with that itinerary
-  // use very different resources. It would appear that instructions were
-  // given that itinerary rather carelessly over time. Specific instructions
-  // that use different resources are listed in various InstrRW classes.
-  def : ItinRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
-               [IIC_FPGeneral, IIC_FPAddSub, IIC_VecFP]>;
-
-  def : ItinRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
-                DISP_1C, DISP_1C], [IIC_VecFPCompare]>;
-
-  def : ItinRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
-               [IIC_VecPerm]>;
-
-  def : ItinRW<[P9_DP_36C_10, IP_EXEC_1C], [IIC_FPSqrtD]>;
-  def : ItinRW<[P9_DP_26C_5, P9_DP_26C_5, IP_EXEC_1C, IP_EXEC_1C], [IIC_FPSqrtS]>;
-
-  def : ItinRW<[P9_DIV_12C, IP_EXECE_1C, DISP_1C, DISP_1C],
-               [IIC_SprMFMSR, IIC_SprMFPMR, IIC_SprMFSR, IIC_SprMFTB,
-                IIC_SprMTMSR, IIC_SprMTMSRD, IIC_SprMTPMR, IIC_SprMTSR]>;
-
-  def : ItinRW<[], [IIC_SprSTOP]>;
-
+  // Include the resource requirements of individual instructions.
   include "P9InstrResources.td"
 
 }




More information about the llvm-commits mailing list