[llvm] r328061 - [X86] Change PMULLD to 10 cycles on Skylake per Agner's tables and llvm-exegesis.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 20 16:39:48 PDT 2018


Author: ctopper
Date: Tue Mar 20 16:39:48 2018
New Revision: 328061

URL: http://llvm.org/viewvc/llvm-project?rev=328061&view=rev
Log:
[X86] Change PMULLD to 10 cycles on Skylake per Agner's tables and llvm-exegesis.

Also restrict to port 0 and 1 for SkylakeClient. It looks like the scheduler models don't account for client not having a full vector ALU on port 5 like server.

Fixes PR36808.

Modified:
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse41-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=328061&r1=328060&r2=328061&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Tue Mar 20 16:39:48 2018
@@ -2416,13 +2416,10 @@ def SKLWriteResGroup105 : SchedWriteRes<
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SKLWriteResGroup105], (instregex "PMULLDrr")>;
 def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDPDr")>;
 def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDPSr")>;
 def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDSDr")>;
 def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDSSr")>;
-def: InstRW<[SKLWriteResGroup105], (instregex "VPMULLDYrr")>;
-def: InstRW<[SKLWriteResGroup105], (instregex "VPMULLDrr")>;
 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDPDr")>;
 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDPSr")>;
 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDSDr")>;
@@ -2430,6 +2427,15 @@ def: InstRW<[SKLWriteResGroup105], (inst
 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDYPDr")>;
 def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDYPSr")>;
 
+def SKLWriteResGroup105_2 : SchedWriteRes<[SKLPort01]> {
+  let Latency = 10;
+  let NumMicroOps = 2;
+  let ResourceCycles = [2];
+}
+def: InstRW<[SKLWriteResGroup105_2], (instregex "PMULLDrr")>;
+def: InstRW<[SKLWriteResGroup105_2], (instregex "VPMULLDYrr")>;
+def: InstRW<[SKLWriteResGroup105_2], (instregex "VPMULLDrr")>;
+
 def SKLWriteResGroup106 : SchedWriteRes<[SKLPort0,SKLPort23]> {
   let Latency = 8;
   let NumMicroOps = 2;
@@ -3278,17 +3284,23 @@ def SKLWriteResGroup168 : SchedWriteRes<
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[SKLWriteResGroup168], (instregex "PMULLDrm")>;
 def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDPDm")>;
 def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDPSm")>;
 def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDSDm")>;
 def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDSSm")>;
-def: InstRW<[SKLWriteResGroup168], (instregex "VPMULLDrm")>;
 def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDPDm")>;
 def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDPSm")>;
 def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDSDm")>;
 def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDSSm")>;
 
+def SKLWriteResGroup168_2 : SchedWriteRes<[SKLPort23,SKLPort01]> {
+  let Latency = 16;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SKLWriteResGroup168_2], (instregex "PMULLDrm")>;
+def: InstRW<[SKLWriteResGroup168_2], (instregex "VPMULLDrm")>;
+
 def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
   let Latency = 14;
   let NumMicroOps = 3;
@@ -3318,10 +3330,16 @@ def SKLWriteResGroup172 : SchedWriteRes<
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[SKLWriteResGroup172], (instregex "VPMULLDYrm")>;
 def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDYPDm")>;
 def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDYPSm")>;
 
+def SKLWriteResGroup172_2 : SchedWriteRes<[SKLPort23,SKLPort01]> {
+  let Latency = 17;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SKLWriteResGroup172_2], (instregex "VPMULLDYrm")>;
+
 def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> {
   let Latency = 15;
   let NumMicroOps = 4;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=328061&r1=328060&r2=328061&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Tue Mar 20 16:39:48 2018
@@ -3869,16 +3869,10 @@ def SKXWriteResGroup116 : SchedWriteRes<
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SKXWriteResGroup116], (instregex "PMULLDrr")>;
 def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDPDr")>;
 def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDPSr")>;
 def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDSDr")>;
 def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDSSr")>;
-def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDYrr")>;
-def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZ128rr(b?)(k?)(z?)")>;
-def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZ256rr(b?)(k?)(z?)")>;
-def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZrr(b?)(k?)(z?)")>;
-def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDrr")>;
 def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ128rri(b?)(k?)(z?)")>;
 def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ256rri(b?)(k?)(z?)")>;
 def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZrri(b?)(k?)(z?)")>;
@@ -3894,6 +3888,18 @@ def: InstRW<[SKXWriteResGroup116], (inst
 def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDYPDr")>;
 def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDYPSr")>;
 
+def SKXWriteResGroup116_2 : SchedWriteRes<[SKXPort015]> {
+  let Latency = 10;
+  let NumMicroOps = 2;
+  let ResourceCycles = [2];
+}
+def: InstRW<[SKXWriteResGroup116_2], (instregex "PMULLDrr")>;
+def: InstRW<[SKXWriteResGroup116_2], (instregex "VPMULLDYrr")>;
+def: InstRW<[SKXWriteResGroup116_2], (instregex "VPMULLDZ128rr(b?)(k?)(z?)")>;
+def: InstRW<[SKXWriteResGroup116_2], (instregex "VPMULLDZ256rr(b?)(k?)(z?)")>;
+def: InstRW<[SKXWriteResGroup116_2], (instregex "VPMULLDZrr(b?)(k?)(z?)")>;
+def: InstRW<[SKXWriteResGroup116_2], (instregex "VPMULLDrr")>;
+
 def SKXWriteResGroup117 : SchedWriteRes<[SKXPort0,SKXPort23]> {
   let Latency = 8;
   let NumMicroOps = 2;
@@ -5541,13 +5547,10 @@ def SKXWriteResGroup186 : SchedWriteRes<
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[SKXWriteResGroup186], (instregex "PMULLDrm")>;
 def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDPDm")>;
 def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDPSm")>;
 def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDSDm")>;
 def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDSSm")>;
-def: InstRW<[SKXWriteResGroup186], (instregex "VPMULLDZ128rm(b?)(k?)(z?)")>;
-def: InstRW<[SKXWriteResGroup186], (instregex "VPMULLDrm")>;
 def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPDZ128rm(b?)i(k?)(z?)")>;
 def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPSZ128rm(b?)i(k?)(z?)")>;
 def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALESDm(b?)(k?)(z?)")>;
@@ -5557,6 +5560,15 @@ def: InstRW<[SKXWriteResGroup186], (inst
 def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDSDm")>;
 def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDSSm")>;
 
+def SKXWriteResGroup186_2 : SchedWriteRes<[SKXPort23,SKXPort015]> {
+  let Latency = 16;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SKXWriteResGroup186_2], (instregex "PMULLDrm")>;
+def: InstRW<[SKXWriteResGroup186_2], (instregex "VPMULLDZ128rm(b?)(k?)(z?)")>;
+def: InstRW<[SKXWriteResGroup186_2], (instregex "VPMULLDrm")>;
+
 def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
   let Latency = 14;
   let NumMicroOps = 3;
@@ -5609,9 +5621,6 @@ def SKXWriteResGroup192 : SchedWriteRes<
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDYrm")>;
-def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDZ256rm(b?)(k?)(z?)")>;
-def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDZrm(b?)(k?)(z?)")>;
 def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i(k?)(z?)")>;
 def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZrm(b?)i(k?)(z?)")>;
 def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPSZ256rm(b?)i(k?)(z?)")>;
@@ -5619,6 +5628,15 @@ def: InstRW<[SKXWriteResGroup192], (inst
 def: InstRW<[SKXWriteResGroup192], (instregex "VROUNDYPDm")>;
 def: InstRW<[SKXWriteResGroup192], (instregex "VROUNDYPSm")>;
 
+def SKXWriteResGroup192_2 : SchedWriteRes<[SKXPort23,SKXPort015]> {
+  let Latency = 17;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1,2];
+}
+def: InstRW<[SKXWriteResGroup192_2], (instregex "VPMULLDYrm")>;
+def: InstRW<[SKXWriteResGroup192_2], (instregex "VPMULLDZ256rm(b?)(k?)(z?)")>;
+def: InstRW<[SKXWriteResGroup192_2], (instregex "VPMULLDZrm(b?)(k?)(z?)")>;
+
 def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
   let Latency = 15;
   let NumMicroOps = 4;

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=328061&r1=328060&r2=328061&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Tue Mar 20 16:39:48 2018
@@ -4924,14 +4924,14 @@ define <8 x i32> @test_pmulld(<8 x i32>
 ;
 ; SKYLAKE-LABEL: test_pmulld:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
-; SKYLAKE-NEXT:    vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
+; SKYLAKE-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
+; SKYLAKE-NEXT:    vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmulld:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
-; SKX-NEXT:    vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
+; SKX-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:0.67]
+; SKX-NEXT:    vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; ZNVER1-LABEL: test_pmulld:

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=328061&r1=328060&r2=328061&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Tue Mar 20 16:39:48 2018
@@ -543,7 +543,7 @@ define <16 x i32> @vpmulld_test(<16 x i3
 ;
 ; SKX-LABEL: vpmulld_test:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 # sched: [8:0.67]
+; SKX-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x = mul <16 x i32> %i, %j
   ret <16 x i32> %x

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=328061&r1=328060&r2=328061&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Tue Mar 20 16:39:48 2018
@@ -2853,14 +2853,14 @@ define <4 x i32> @test_pmulld(<4 x i32>
 ;
 ; SKYLAKE-LABEL: test_pmulld:
 ; SKYLAKE:       # %bb.0:
-; SKYLAKE-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67]
-; SKYLAKE-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
+; SKYLAKE-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
+; SKYLAKE-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pmulld:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67]
-; SKX-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
+; SKX-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67]
+; SKX-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pmulld:




More information about the llvm-commits mailing list