[llvm] r327588 - [X86][Btver2] Add support for multiple pipelines stages for fpu schedules. NFCI.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 14 16:12:09 PDT 2018


Author: rksimon
Date: Wed Mar 14 16:12:09 2018
New Revision: 327588

URL: http://llvm.org/viewvc/llvm-project?rev=327588&view=rev
Log:
[X86][Btver2] Add support for multiple pipelines stages for fpu schedules. NFCI.

This allows us to use JWriteResFpuPair for complex schedule classes as well as single pipe instructions.

Modified:
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=327588&r1=327587&r2=327588&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Wed Mar 14 16:12:09 2018
@@ -94,20 +94,20 @@ multiclass JWriteResIntPair<X86FoldableS
 }
 
 multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
-                            ProcResourceKind ExePort,
-                            int Lat, int Res = 1, int UOps = 1> {
+                            list<ProcResourceKind> ExePorts,
+                            int Lat, list<int> Res = [1], int UOps = 1> {
   // Register variant is using a single cycle on ExePort.
-  def : WriteRes<SchedRW, [ExePort]> {
+  def : WriteRes<SchedRW, ExePorts> {
     let Latency = Lat;
-    let ResourceCycles = [Res];
+    let ResourceCycles = Res;
     let NumMicroOps = UOps;
   }
 
   // Memory variant also uses a cycle on JLAGU and adds 5 cycles to the
   // latency.
-  def : WriteRes<SchedRW.Folded, [JLAGU, ExePort]> {
+  def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
     let Latency = !add(Lat, 5);
-    let ResourceCycles = [1, Res];
+    let ResourceCycles = !listconcat([1], Res);
     let NumMicroOps = UOps;
   }
 }
@@ -255,123 +255,66 @@ def : WriteRes<WriteNop, []>;
 // FIXME: SS vs PS latencies
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : JWriteResFpuPair<WriteFAdd,        JFPU0,  3>;
-defm : JWriteResFpuPair<WriteFMul,        JFPU1,  2>;
-defm : JWriteResFpuPair<WriteFMA,         JFPU1,  2>; // NOTE: Doesn't exist on Jaguar.
-defm : JWriteResFpuPair<WriteFRcp,        JFPU1,  2>;
-defm : JWriteResFpuPair<WriteFRsqrt,      JFPU1,  2>;
-defm : JWriteResFpuPair<WriteFShuffle,   JFPU01,  1>;
-defm : JWriteResFpuPair<WriteFBlend,     JFPU01,  1>;
-defm : JWriteResFpuPair<WriteFVarBlend,  JFPU01,  2, 4, 3>;
-defm : JWriteResFpuPair<WriteFShuffle256, JFPU01, 1>;
-
-def : WriteRes<WriteFSqrt, [JFPU1, JFPM]> {
-  let Latency = 21;
-  let ResourceCycles = [1, 21];
-}
-def : WriteRes<WriteFSqrtLd, [JFPU1, JLAGU, JFPM]> {
-  let Latency = 26;
-  let ResourceCycles = [1, 1, 21];
-}
-
-def : WriteRes<WriteFDiv, [JFPU1, JFPM]> {
-  let Latency = 19;
-  let ResourceCycles = [1, 19];
-}
-def : WriteRes<WriteFDivLd, [JFPU1, JLAGU, JFPM]> {
-  let Latency = 24;
-  let ResourceCycles = [1, 1, 19];
-}
+defm : JWriteResFpuPair<WriteFAdd,         [JFPU0],  3>;
+defm : JWriteResFpuPair<WriteFMul,         [JFPU1],  2>;
+defm : JWriteResFpuPair<WriteFMA,          [JFPU1],  2>; // NOTE: Doesn't exist on Jaguar.
+defm : JWriteResFpuPair<WriteFRcp,         [JFPU1],  2>;
+defm : JWriteResFpuPair<WriteFRsqrt,       [JFPU1],  2>;
+defm : JWriteResFpuPair<WriteFDiv,   [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResFpuPair<WriteFSqrt,  [JFPU1, JFPM], 21, [1, 21]>;
+defm : JWriteResFpuPair<WriteFShuffle,    [JFPU01],  1>;
+defm : JWriteResFpuPair<WriteFBlend,      [JFPU01],  1>;
+defm : JWriteResFpuPair<WriteFVarBlend,   [JFPU01],  2, [4], 3>;
+defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01],  1>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Conversions.
 // FIXME: integer pipes
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : JWriteResFpuPair<WriteCvtF2I,    JFPU1,  3>; // Float -> Integer.
-defm : JWriteResFpuPair<WriteCvtI2F,    JFPU1,  3>; // Integer -> Float.
-defm : JWriteResFpuPair<WriteCvtF2F,    JFPU1,  3>; // Float -> Float size conversion.
+defm : JWriteResFpuPair<WriteCvtF2I,       [JFPU1], 3>; // Float -> Integer.
+defm : JWriteResFpuPair<WriteCvtI2F,       [JFPU1], 3>; // Integer -> Float.
+defm : JWriteResFpuPair<WriteCvtF2F,       [JFPU1], 3>; // Float -> Float size conversion.
 
 ////////////////////////////////////////////////////////////////////////////////
 // Vector integer operations.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : JWriteResFpuPair<WriteVecALU,      JFPU01, 1>;
-defm : JWriteResFpuPair<WriteVecShift,    JFPU01, 1>;
-defm : JWriteResFpuPair<WriteVecIMul,     JFPU0,  2>;
-defm : JWriteResFpuPair<WriteMPSAD,       JFPU0,  3, 2>;
-defm : JWriteResFpuPair<WriteShuffle,     JFPU01, 1>;
-defm : JWriteResFpuPair<WriteBlend,       JFPU01, 1>;
-defm : JWriteResFpuPair<WriteVarBlend,    JFPU01, 2, 4, 3>;
-defm : JWriteResFpuPair<WriteVecLogic,    JFPU01, 1>;
-defm : JWriteResFpuPair<WriteShuffle256,  JFPU01, 1>;
-defm : JWriteResFpuPair<WriteVarVecShift, JFPU01, 1>; // NOTE: Doesn't exist on Jaguar.
+defm : JWriteResFpuPair<WriteVecALU,      [JFPU01], 1>;
+defm : JWriteResFpuPair<WriteVecShift,    [JFPU01], 1>;
+defm : JWriteResFpuPair<WriteVecIMul,      [JFPU0], 2>;
+defm : JWriteResFpuPair<WriteMPSAD,        [JFPU0], 3, [2]>;
+defm : JWriteResFpuPair<WriteShuffle,     [JFPU01], 1>;
+defm : JWriteResFpuPair<WriteBlend,       [JFPU01], 1>;
+defm : JWriteResFpuPair<WriteVarBlend,    [JFPU01], 2, [4], 3>;
+defm : JWriteResFpuPair<WriteVecLogic,    [JFPU01], 1>;
+defm : JWriteResFpuPair<WriteShuffle256,  [JFPU01], 1>;
+defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01], 1>; // NOTE: Doesn't exist on Jaguar.
 
 ////////////////////////////////////////////////////////////////////////////////
-// String instructions.
-// Packed Compare Implicit Length Strings, Return Mask
+// SSE42 String instructions.
 // FIXME: approximate latencies + pipe dependencies
 ////////////////////////////////////////////////////////////////////////////////
 
-def : WriteRes<WritePCmpIStrM, [JFPU1,JFPU0]> {
-  let Latency = 8;
-  let ResourceCycles = [2, 2];
-  let NumMicroOps = 3;
-}
-def : WriteRes<WritePCmpIStrMLd, [JLAGU, JFPU1, JFPU0]> {
-  let Latency = 13;
-  let ResourceCycles = [1, 2, 2];
-  let NumMicroOps = 3;
-}
-
-// Packed Compare Explicit Length Strings, Return Mask
-def : WriteRes<WritePCmpEStrM, [JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> {
-  let Latency = 14;
-  let ResourceCycles = [5, 5, 5, 5, 5];
-  let NumMicroOps = 9;
-}
-def : WriteRes<WritePCmpEStrMLd, [JLAGU, JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> {
-  let Latency = 19;
-  let ResourceCycles = [1, 5, 5, 5, 5, 5];
-  let NumMicroOps = 9;
-}
-
-// Packed Compare Implicit Length Strings, Return Index
-def : WriteRes<WritePCmpIStrI, [JFPU1, JFPU0]> {
-  let Latency = 7;
-  let ResourceCycles = [2, 2];
-}
-def : WriteRes<WritePCmpIStrILd, [JLAGU, JFPU1, JFPU0]> {
-  let Latency = 12;
-  let ResourceCycles = [1, 2, 2];
-}
-
-// Packed Compare Explicit Length Strings, Return Index
-def : WriteRes<WritePCmpEStrI, [JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> {
-  let Latency = 14;
-  let ResourceCycles = [5, 5, 5, 5, 5];
-  let NumMicroOps = 9;
-}
-def : WriteRes<WritePCmpEStrILd, [JLAGU, JFPU1, JLAGU, JFPU01,JFPU1, JFPU0]> {
-  let Latency = 19;
-  let ResourceCycles = [1, 5, 5, 5, 5, 5];
-  let NumMicroOps = 9;
-}
+defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JFPU0], 7, [2, 2], 3>;
+defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JFPU0], 8, [2, 2], 3>;
+defm : JWriteResFpuPair<WritePCmpEStrI, [JFPU1, JLAGU, JFPU01, JFPU1, JFPU0], 14, [5, 5, 5, 5, 5], 9>;
+defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JLAGU, JFPU01, JFPU1, JFPU0], 14, [5, 5, 5, 5, 5], 9>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // AES Instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : JWriteResFpuPair<WriteAESIMC,    JVIMUL, 2>;
-defm : JWriteResFpuPair<WriteAESKeyGen, JVIMUL, 2>;
-defm : JWriteResFpuPair<WriteAESDecEnc, JVIMUL, 3>;
+defm : JWriteResFpuPair<WriteAESIMC,    [JVIMUL], 2>;
+defm : JWriteResFpuPair<WriteAESKeyGen, [JVIMUL], 2>;
+defm : JWriteResFpuPair<WriteAESDecEnc, [JVIMUL], 3>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Horizontal add/sub  instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : JWriteResFpuPair<WriteFHAdd,   JFPU0,  3>;
-defm : JWriteResFpuPair<WritePHAdd,   JFPU01, 1>;
+defm : JWriteResFpuPair<WriteFHAdd,      [JFPU0], 3>;
+defm : JWriteResFpuPair<WritePHAdd,     [JFPU01], 1>;
 
 def JWriteFHAddY: SchedWriteRes<[JFPU0]> {
   let Latency = 3;
@@ -389,7 +332,7 @@ def : InstRW<[JWriteFHAddYLd], (instrs V
 // Carry-less multiplication instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : JWriteResFpuPair<WriteCLMul,   JVIMUL, 2>;
+defm : JWriteResFpuPair<WriteCLMul,     [JVIMUL], 2>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // SSE4.1 instructions.




More information about the llvm-commits mailing list