[llvm] r326877 - [X86] Add IMUL scheduling info on sandybridge, fix it on >=haswell.

Clement Courbet via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 7 00:14:02 PST 2018


Author: courbet
Date: Wed Mar  7 00:14:02 2018
New Revision: 326877

URL: http://llvm.org/viewvc/llvm-project?rev=326877&view=rev
Log:
[X86] Add IMUL scheduling info on sandybridge, fix it on >=haswell.

Summary:
Only IMUL16rri uses an extra P0156. IMUL32* and IMUL16rr only use
P1.
This was computed using https://github.com/google/EXEgesis/blob/master/exegesis/tools/compute_itineraries.cc

This can easily be validated by running perf on the following code:

```
int main(int argc, char**argv) {
  int a = argc;
  int b = argc;
  int c = argc;
  int d = argc;

  for (int i = 0; i < LOOP_ITERATIONS; ++i) {
    asm volatile(
      R"(
        .rept 10000
        imull $0x2, %%edx, %%eax
        imull $0x2, %%ecx, %%ebx
        imull $0x2, %%eax, %%edx
        imull $0x2, %%ebx, %%ecx
        .endr
      )"
      : "+a"(a), "+b"(b), "+c"(c), "+d"(d)
      :
      :);
  }
  return a+b+c+d;
}
```
-> test.cc

perf stat -x, -e cycles --pfm-events=uops_executed_port:port_0:u,uops_executed_port:port_1:u,uops_executed_port:port_2:u,uops_executed_port:port_3:u,uops_executed_port:port_4:u,uops_executed_port:port_5:u,uops_executed_port:port_6:u,uops_executed_port:port_7:u test

Reviewers: craig.topper, RKSimon, gadi.haber

Subscribers: llvm-commits, gchatelet, chandlerc

Differential Revision: https://reviews.llvm.org/D43460

Modified:
    llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
    llvm/trunk/lib/Target/X86/X86SchedHaswell.td
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td

Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=326877&r1=326876&r2=326877&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Wed Mar  7 00:14:02 2018
@@ -1217,7 +1217,7 @@ def: InstRW<[BWWriteResGroup27], (instre
 def: InstRW<[BWWriteResGroup27], (instregex "CVTDQ2PSrr")>;
 def: InstRW<[BWWriteResGroup27], (instregex "CVTPS2DQrr")>;
 def: InstRW<[BWWriteResGroup27], (instregex "CVTTPS2DQrr")>;
-def: InstRW<[BWWriteResGroup27], (instrs IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>;
+def: InstRW<[BWWriteResGroup27], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>;
 def: InstRW<[BWWriteResGroup27], (instrs IMUL8r)>;
 def: InstRW<[BWWriteResGroup27], (instregex "LZCNT(16|32|64)rr")>;
 def: InstRW<[BWWriteResGroup27], (instregex "MAX(C?)PDrr")>;
@@ -1298,7 +1298,7 @@ def BWWriteResGroup27_16 : SchedWriteRes
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup27_16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>;
+def: InstRW<[BWWriteResGroup27_16], (instrs IMUL16rri, IMUL16rri8)>;
 
 def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
   let Latency = 3;

Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=326877&r1=326876&r2=326877&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Wed Mar  7 00:14:02 2018
@@ -2498,7 +2498,7 @@ def: InstRW<[HWWriteResGroup50], (instre
 def: InstRW<[HWWriteResGroup50], (instregex "CVTDQ2PSrr")>;
 def: InstRW<[HWWriteResGroup50], (instregex "CVTPS2DQrr")>;
 def: InstRW<[HWWriteResGroup50], (instregex "CVTTPS2DQrr")>;
-def: InstRW<[HWWriteResGroup50], (instrs IMUL64rr, IMUL64rri32, IMUL64rri8)>;
+def: InstRW<[HWWriteResGroup50], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>;
 def: InstRW<[HWWriteResGroup50], (instrs IMUL8r)>;
 def: InstRW<[HWWriteResGroup50], (instregex "LZCNT(16|32|64)rr")>;
 def: InstRW<[HWWriteResGroup50], (instregex "MAX(C?)PDrr")>;
@@ -2574,17 +2574,12 @@ def: InstRW<[HWWriteResGroup50], (instre
 def: InstRW<[HWWriteResGroup50], (instregex "VUCOMISDrr")>;
 def: InstRW<[HWWriteResGroup50], (instregex "VUCOMISSrr")>;
 
-def HWWriteResGroup50_16 : SchedWriteRes<[HWPort1, HWPort0156]> {
+def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
   let Latency = 3;
-  let NumMicroOps = 4;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup50_16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>;
-
-def HWWriteResGroup50_32 : SchedWriteRes<[HWPort1, HWPort0156]> {
-  let Latency = 3;
-  let NumMicroOps = 3;
-}
-def: InstRW<[HWWriteResGroup50_32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>;
+def: InstRW<[HWWriteResGroup50_16i], (instrs IMUL16rri, IMUL16rri8)>;
 
 def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> {
   let Latency = 3;

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=326877&r1=326876&r2=326877&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Wed Mar  7 00:14:02 2018
@@ -925,6 +925,7 @@ def: InstRW<[SBWriteResGroup21], (instre
 def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>;
 def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>;
 def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>;
+def: InstRW<[SBWriteResGroup21], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>;
 def: InstRW<[SBWriteResGroup21], (instregex "MAX(C?)PDrr")>;
 def: InstRW<[SBWriteResGroup21], (instregex "MAX(C?)PSrr")>;
 def: InstRW<[SBWriteResGroup21], (instregex "MAX(C?)SDrr")>;
@@ -1000,6 +1001,13 @@ def: InstRW<[SBWriteResGroup21], (instre
 def: InstRW<[SBWriteResGroup21], (instregex "VSUBSDrr")>;
 def: InstRW<[SBWriteResGroup21], (instregex "VSUBSSrr")>;
 
+def SBWriteResGroup21_16i : SchedWriteRes<[SBPort1, SBPort015]> {
+  let Latency = 3;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup21_16i], (instrs IMUL16rri, IMUL16rri8)>;
+
 def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
   let Latency = 3;
   let NumMicroOps = 2;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=326877&r1=326876&r2=326877&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Wed Mar  7 00:14:02 2018
@@ -1214,7 +1214,7 @@ def SKLWriteResGroup29 : SchedWriteRes<[
 }
 def: InstRW<[SKLWriteResGroup29], (instregex "BSF(16|32|64)rr")>;
 def: InstRW<[SKLWriteResGroup29], (instregex "BSR(16|32|64)rr")>;
-def: InstRW<[SKLWriteResGroup29], (instrs IMUL64rr, IMUL64rri32, IMUL64rri8)>;
+def: InstRW<[SKLWriteResGroup29], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>;
 def: InstRW<[SKLWriteResGroup29], (instrs IMUL8r)>;
 def: InstRW<[SKLWriteResGroup29], (instregex "LZCNT(16|32|64)rr")>;
 def: InstRW<[SKLWriteResGroup29], (instrs MUL8r)>;
@@ -1225,18 +1225,12 @@ def: InstRW<[SKLWriteResGroup29], (instr
 def: InstRW<[SKLWriteResGroup29], (instregex "SHRD(16|32|64)rri8")>;
 def: InstRW<[SKLWriteResGroup29], (instregex "TZCNT(16|32|64)rr")>;
 
-def SKLWriteResGroup29_16 : SchedWriteRes<[SKLPort1, SKLPort0156]> {
+def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> {
   let Latency = 3;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup29_16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>;
-
-def SKLWriteResGroup29_32 : SchedWriteRes<[SKLPort1]> {
-  let Latency = 3;
-  let NumMicroOps = 1;
-}
-def: InstRW<[SKLWriteResGroup29_32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>;
+def: InstRW<[SKLWriteResGroup29_16i], (instrs IMUL16rri, IMUL16rri8)>;
 
 def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> {
   let Latency = 3;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=326877&r1=326876&r2=326877&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Wed Mar  7 00:14:02 2018
@@ -1758,7 +1758,7 @@ def SKXWriteResGroup31 : SchedWriteRes<[
 }
 def: InstRW<[SKXWriteResGroup31], (instregex "BSF(16|32|64)rr")>;
 def: InstRW<[SKXWriteResGroup31], (instregex "BSR(16|32|64)rr")>;
-def: InstRW<[SKXWriteResGroup31], (instrs IMUL64rr, IMUL64rri32, IMUL64rri8)>;
+def: InstRW<[SKXWriteResGroup31], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>;
 def: InstRW<[SKXWriteResGroup31], (instrs IMUL8r)>;
 def: InstRW<[SKXWriteResGroup31], (instregex "LZCNT(16|32|64)rr")>;
 def: InstRW<[SKXWriteResGroup31], (instrs MUL8r)>;
@@ -1769,18 +1769,13 @@ def: InstRW<[SKXWriteResGroup31], (instr
 def: InstRW<[SKXWriteResGroup31], (instregex "SHRD(16|32|64)rri8")>;
 def: InstRW<[SKXWriteResGroup31], (instregex "TZCNT(16|32|64)rr")>;
 
-def SKXWriteResGroup31_16 : SchedWriteRes<[SKXPort1, SKXPort0156]> {
+def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> {
   let Latency = 3;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup31_16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>;
+def: InstRW<[SKXWriteResGroup31_16i], (instrs IMUL16rri, IMUL16rri8)>;
 
-def SKXWriteResGroup31_32 : SchedWriteRes<[SKXPort1]> {
-  let Latency = 3;
-  let NumMicroOps = 1;
-}
-def: InstRW<[SKXWriteResGroup31_32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>;
 
 def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
   let Latency = 3;




More information about the llvm-commits mailing list