[llvm] 02f03a6 - [X86] Match vpmullq latency to uops.info. Correct port usage for 512-bit memory form

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 3 12:25:10 PST 2020


Author: Craig Topper
Date: 2020-03-03T12:16:03-08:00
New Revision: 02f03a6fd4cd64730e6229e0202404d90079b8d1

URL: https://github.com/llvm/llvm-project/commit/02f03a6fd4cd64730e6229e0202404d90079b8d1
DIFF: https://github.com/llvm/llvm-project/commit/02f03a6fd4cd64730e6229e0202404d90079b8d1.diff

LOG: [X86] Match vpmullq latency to uops.info. Correct port usage for 512-bit memory form

uops.info says these should be 15 cycle instructions. Uops.info also shows the 512-bit form uses port 0 and 5 for both register and memory. We had memory using 0 and 1.

Differential Revision: https://reviews.llvm.org/D75549

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s
    llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index a58a67c8efc6..1b36a3b7e127 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1963,14 +1963,14 @@ def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
 def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
 
 def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> {
-  let Latency = 12;
+  let Latency = 15;
   let NumMicroOps = 3;
   let ResourceCycles = [3];
 }
 def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
 
 def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> {
-  let Latency = 12;
+  let Latency = 15;
   let NumMicroOps = 3;
   let ResourceCycles = [3];
 }
@@ -2131,8 +2131,8 @@ def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKX
 }
 def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>;
 
-def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> {
-  let Latency = 18;
+def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort01]> {
+  let Latency = 21;
   let NumMicroOps = 4;
   let ResourceCycles = [1,3];
 }
@@ -2159,13 +2159,19 @@ def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 }
 def : SchedAlias<WriteFDiv64Ld,  SKXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair
 
-def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> {
-  let Latency = 19;
+def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort01]> {
+  let Latency = 22;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1,3];
+}
+def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>;
+
+def SKXWriteResGroup211_1 : SchedWriteRes<[SKXPort23,SKXPort05]> {
+  let Latency = 22;
   let NumMicroOps = 4;
   let ResourceCycles = [1,3];
 }
-def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)",
-                                              "VPMULLQZrm(b?)")>;
+def: InstRW<[SKXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>;
 
 def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> {
   let Latency = 20;

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s
index 95f49ffe4496..de2e5847a8e6 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s
@@ -252,12 +252,12 @@ vxorps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  2      9     1.00    *                   vfpclassss	$171, (%rax), %k1
 # CHECK-NEXT:  1      4     1.00                        vfpclassss	$171, %xmm16, %k1 {%k2}
 # CHECK-NEXT:  2      9     1.00    *                   vfpclassss	$171, (%rax), %k1 {%k2}
-# CHECK-NEXT:  3      12    1.50                        vpmullq	%zmm16, %zmm17, %zmm19
-# CHECK-NEXT:  4      19    1.00    *                   vpmullq	(%rax), %zmm17, %zmm19
-# CHECK-NEXT:  3      12    1.50                        vpmullq	%zmm16, %zmm17, %zmm19 {%k1}
-# CHECK-NEXT:  4      19    1.00    *                   vpmullq	(%rax), %zmm17, %zmm19 {%k1}
-# CHECK-NEXT:  3      12    1.50                        vpmullq	%zmm16, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT:  4      19    1.00    *                   vpmullq	(%rax), %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  3      15    1.50                        vpmullq	%zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  4      22    1.50    *                   vpmullq	(%rax), %zmm17, %zmm19
+# CHECK-NEXT:  3      15    1.50                        vpmullq	%zmm16, %zmm17, %zmm19 {%k1}
+# CHECK-NEXT:  4      22    1.50    *                   vpmullq	(%rax), %zmm17, %zmm19 {%k1}
+# CHECK-NEXT:  3      15    1.50                        vpmullq	%zmm16, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  4      22    1.50    *                   vpmullq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      1     0.50                        vxorpd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      8     0.50    *                   vxorpd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      8     0.50    *                   vxorpd	(%rax){1to8}, %zmm17, %zmm19
@@ -291,7 +291,7 @@ vxorps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     55.50  7.00   37.50  37.50   -     81.50   -      -
+# CHECK-NEXT:  -      -     57.00  4.00   37.50  37.50   -     83.00   -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -397,11 +397,11 @@ vxorps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vfpclassss	$171, %xmm16, %k1 {%k2}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vfpclassss	$171, (%rax), %k1 {%k2}
 # CHECK-NEXT:  -      -     1.50    -      -      -      -     1.50    -      -     vpmullq	%zmm16, %zmm17, %zmm19
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vpmullq	(%rax), %zmm17, %zmm19
+# CHECK-NEXT:  -      -     1.50    -     0.50   0.50    -     1.50    -      -     vpmullq	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.50    -      -      -      -     1.50    -      -     vpmullq	%zmm16, %zmm17, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vpmullq	(%rax), %zmm17, %zmm19 {%k1}
+# CHECK-NEXT:  -      -     1.50    -     0.50   0.50    -     1.50    -      -     vpmullq	(%rax), %zmm17, %zmm19 {%k1}
 # CHECK-NEXT:  -      -     1.50    -      -      -      -     1.50    -      -     vpmullq	%zmm16, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vpmullq	(%rax), %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50    -     0.50   0.50    -     1.50    -      -     vpmullq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     vxorpd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vxorpd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vxorpd	(%rax){1to8}, %zmm17, %zmm19

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s
index cff188701926..4e383e595e89 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s
@@ -344,18 +344,18 @@ vxorps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      4     1.00                        vfpclassps	$171, %ymm16, %k1 {%k2}
 # CHECK-NEXT:  2      11    1.00    *                   vfpclasspsy	$171, (%rax), %k1 {%k2}
 # CHECK-NEXT:  2      11    1.00    *                   vfpclassps	$171, (%rax){1to8}, %k1 {%k2}
-# CHECK-NEXT:  3      12    1.50                        vpmullq	%xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  4      18    1.00    *                   vpmullq	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  3      12    1.50                        vpmullq	%xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  4      18    1.00    *                   vpmullq	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  3      12    1.50                        vpmullq	%xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  4      18    1.00    *                   vpmullq	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  3      12    1.50                        vpmullq	%ymm16, %ymm17, %ymm19
-# CHECK-NEXT:  4      19    1.00    *                   vpmullq	(%rax), %ymm17, %ymm19
-# CHECK-NEXT:  3      12    1.50                        vpmullq	%ymm16, %ymm17, %ymm19 {%k1}
-# CHECK-NEXT:  4      19    1.00    *                   vpmullq	(%rax), %ymm17, %ymm19 {%k1}
-# CHECK-NEXT:  3      12    1.50                        vpmullq	%ymm16, %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT:  4      19    1.00    *                   vpmullq	(%rax), %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  3      15    1.50                        vpmullq	%xmm16, %xmm17, %xmm19
+# CHECK-NEXT:  4      21    1.50    *                   vpmullq	(%rax), %xmm17, %xmm19
+# CHECK-NEXT:  3      15    1.50                        vpmullq	%xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT:  4      21    1.50    *                   vpmullq	(%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT:  3      15    1.50                        vpmullq	%xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT:  4      21    1.50    *                   vpmullq	(%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT:  3      15    1.50                        vpmullq	%ymm16, %ymm17, %ymm19
+# CHECK-NEXT:  4      22    1.50    *                   vpmullq	(%rax), %ymm17, %ymm19
+# CHECK-NEXT:  3      15    1.50                        vpmullq	%ymm16, %ymm17, %ymm19 {%k1}
+# CHECK-NEXT:  4      22    1.50    *                   vpmullq	(%rax), %ymm17, %ymm19 {%k1}
+# CHECK-NEXT:  3      15    1.50                        vpmullq	%ymm16, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  4      22    1.50    *                   vpmullq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      1     0.33                        vxorpd	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  2      7     0.50    *                   vxorpd	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  2      7     0.50    *                   vxorpd	(%rax){1to2}, %xmm17, %xmm19
@@ -407,7 +407,7 @@ vxorps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     64.00  64.00  59.00  59.00   -     76.00   -      -
+# CHECK-NEXT:  -      -     67.00  67.00  59.00  59.00   -     70.00   -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -544,17 +544,17 @@ vxorps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vfpclasspsy	$171, (%rax), %k1 {%k2}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vfpclassps	$171, (%rax){1to8}, %k1 {%k2}
 # CHECK-NEXT:  -      -     1.50   1.50    -      -      -      -      -      -     vpmullq	%xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vpmullq	(%rax), %xmm17, %xmm19
+# CHECK-NEXT:  -      -     1.50   1.50   0.50   0.50    -      -      -      -     vpmullq	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  -      -     1.50   1.50    -      -      -      -      -      -     vpmullq	%xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vpmullq	(%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT:  -      -     1.50   1.50   0.50   0.50    -      -      -      -     vpmullq	(%rax), %xmm17, %xmm19 {%k1}
 # CHECK-NEXT:  -      -     1.50   1.50    -      -      -      -      -      -     vpmullq	%xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vpmullq	(%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   1.50   0.50   0.50    -      -      -      -     vpmullq	(%rax), %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.50   1.50    -      -      -      -      -      -     vpmullq	%ymm16, %ymm17, %ymm19
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vpmullq	(%rax), %ymm17, %ymm19
+# CHECK-NEXT:  -      -     1.50   1.50   0.50   0.50    -      -      -      -     vpmullq	(%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -     1.50   1.50    -      -      -      -      -      -     vpmullq	%ymm16, %ymm17, %ymm19 {%k1}
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vpmullq	(%rax), %ymm17, %ymm19 {%k1}
+# CHECK-NEXT:  -      -     1.50   1.50   0.50   0.50    -      -      -      -     vpmullq	(%rax), %ymm17, %ymm19 {%k1}
 # CHECK-NEXT:  -      -     1.50   1.50    -      -      -      -      -      -     vpmullq	%ymm16, %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vpmullq	(%rax), %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   1.50   0.50   0.50    -      -      -      -     vpmullq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33    -      -      -     0.33    -      -     vxorpd	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vxorpd	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vxorpd	(%rax){1to2}, %xmm17, %xmm19


        


More information about the llvm-commits mailing list