[llvm] 65ad09d - [X86][SLM] Fix DIVPD/DIVPS/RCPPS/RSQRTPS/SQRTPD/SQRTPS/DPPD/DPPS uops, latency and throughput

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 13 00:52:24 PDT 2021


Author: Simon Pilgrim
Date: 2021-09-13T08:36:43+01:00
New Revision: 65ad09da0ea7b947ce4bfa96dee00a53230f4cf9

URL: https://github.com/llvm/llvm-project/commit/65ad09da0ea7b947ce4bfa96dee00a53230f4cf9
DIFF: https://github.com/llvm/llvm-project/commit/65ad09da0ea7b947ce4bfa96dee00a53230f4cf9.diff

LOG: [X86][SLM] Fix DIVPD/DIVPS/RCPPS/RSQRTPS/SQRTPD/SQRTPS/DPPD/DPPS uops, latency and throughput

The packed variants of the instructions had been modelled as the same as the scalar variants.

Reported during a run of llvm-exegesis on a cheap SLM box and matches what Agner / InstLatX64 report as well.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleSLM.td
    llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s
    llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
    llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 36d0cadf62962..b0a2c75801c74 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -233,33 +233,33 @@ defm : X86WriteResPairUnsupported<WriteFMAX>;
 defm : X86WriteResPairUnsupported<WriteFMAY>;
 defm : X86WriteResPairUnsupported<WriteFMAZ>;
 defm : SLMWriteResPair<WriteFDiv,     [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
-defm : SLMWriteResPair<WriteFDivX,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
-defm : SLMWriteResPair<WriteFDivY,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivX,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39], 6, 1>;
+defm : X86WriteResPairUnsupported<WriteFDivY>;
 defm : X86WriteResPairUnsupported<WriteFDivZ>;
 defm : SLMWriteResPair<WriteFDiv64,   [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
-defm : SLMWriteResPair<WriteFDiv64X,  [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
-defm : SLMWriteResPair<WriteFDiv64Y,  [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64X,  [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69], 6, 1>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
 defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
-defm : SLMWriteResPair<WriteFRcp,     [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpX,    [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpY,    [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcp,     [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteFRcpX,    [SLM_FPC_RSV0], 9, [8], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFRcpY>;
 defm : X86WriteResPairUnsupported<WriteFRcpZ>;
-defm : SLMWriteResPair<WriteFRsqrt,   [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtX,  [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtY,  [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrt,   [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteFRsqrtX,  [SLM_FPC_RSV0], 9, [8], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
 defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-defm : SLMWriteResPair<WriteFSqrt,    [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20]>;
-defm : SLMWriteResPair<WriteFSqrtX,   [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40]>;
-defm : SLMWriteResPair<WriteFSqrtY,   [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40]>;
+defm : SLMWriteResPair<WriteFSqrt,    [SLM_FPC_RSV0, SLMFPDivider], 20, [1,20]>;
+defm : SLMWriteResPair<WriteFSqrtX,   [SLM_FPC_RSV0, SLMFPDivider], 41, [1,40], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrtY>;
 defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
-defm : SLMWriteResPair<WriteFSqrt64,  [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35]>;
-defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70]>;
-defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70]>;
+defm : SLMWriteResPair<WriteFSqrt64,  [SLM_FPC_RSV0, SLMFPDivider], 35, [1,35]>;
+defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0, SLMFPDivider], 71, [1,70], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
 defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
 defm : SLMWriteResPair<WriteFSqrt80,  [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
-defm : SLMWriteResPair<WriteDPPD,   [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteDPPS,   [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteDPPSY,  [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteDPPD,   [SLM_FPC_RSV1], 12,  [8], 5, 1>;
+defm : SLMWriteResPair<WriteDPPS,   [SLM_FPC_RSV1], 15, [12], 9, 1>;
+defm : X86WriteResPairUnsupported<WriteDPPSY>;
 defm : X86WriteResPairUnsupported<WriteDPPSZ>;
 defm : SLMWriteResPair<WriteFSign,  [SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WriteFRnd,   [SLM_FPC_RSV1], 3>;

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s
index c9a32714d7541..8c0023ef8d0fb 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse1.s
@@ -226,8 +226,8 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  1      5     1.00                        cvttss2si	%xmm0, %rcx
 # CHECK-NEXT:  1      8     1.00    *                   cvttss2si	(%rax), %ecx
 # CHECK-NEXT:  1      8     1.00    *                   cvttss2si	(%rax), %rcx
-# CHECK-NEXT:  1      39    39.00                       divps	%xmm0, %xmm2
-# CHECK-NEXT:  1      42    39.00   *                   divps	(%rax), %xmm2
+# CHECK-NEXT:  6      39    39.00                       divps	%xmm0, %xmm2
+# CHECK-NEXT:  7      42    39.00   *                   divps	(%rax), %xmm2
 # CHECK-NEXT:  1      19    17.00                       divss	%xmm0, %xmm2
 # CHECK-NEXT:  1      22    17.00   *                   divss	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00    *      *      U     ldmxcsr	(%rax)
@@ -290,19 +290,19 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  1      7     1.00    *                   psadbw	(%rax), %mm2
 # CHECK-NEXT:  1      1     1.00                        pshufw	$1, %mm0, %mm2
 # CHECK-NEXT:  1      4     1.00    *                   pshufw	$1, (%rax), %mm2
-# CHECK-NEXT:  1      5     1.00                        rcpps	%xmm0, %xmm2
-# CHECK-NEXT:  1      8     1.00    *                   rcpps	(%rax), %xmm2
-# CHECK-NEXT:  1      5     1.00                        rcpss	%xmm0, %xmm2
-# CHECK-NEXT:  1      8     1.00    *                   rcpss	(%rax), %xmm2
-# CHECK-NEXT:  1      5     1.00                        rsqrtps	%xmm0, %xmm2
-# CHECK-NEXT:  1      8     1.00    *                   rsqrtps	(%rax), %xmm2
-# CHECK-NEXT:  1      5     1.00                        rsqrtss	%xmm0, %xmm2
-# CHECK-NEXT:  1      8     1.00    *                   rsqrtss	(%rax), %xmm2
+# CHECK-NEXT:  5      9     8.00                        rcpps	%xmm0, %xmm2
+# CHECK-NEXT:  6      12    8.00    *                   rcpps	(%rax), %xmm2
+# CHECK-NEXT:  1      4     1.00                        rcpss	%xmm0, %xmm2
+# CHECK-NEXT:  1      7     1.00    *                   rcpss	(%rax), %xmm2
+# CHECK-NEXT:  5      9     8.00                        rsqrtps	%xmm0, %xmm2
+# CHECK-NEXT:  6      12    8.00    *                   rsqrtps	(%rax), %xmm2
+# CHECK-NEXT:  1      4     1.00                        rsqrtss	%xmm0, %xmm2
+# CHECK-NEXT:  1      7     1.00    *                   rsqrtss	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00    *      *      U     sfence
 # CHECK-NEXT:  1      1     1.00                        shufps	$1, %xmm0, %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   shufps	$1, (%rax), %xmm2
-# CHECK-NEXT:  1      41    40.00                       sqrtps	%xmm0, %xmm2
-# CHECK-NEXT:  1      44    40.00   *                   sqrtps	(%rax), %xmm2
+# CHECK-NEXT:  5      41    40.00                       sqrtps	%xmm0, %xmm2
+# CHECK-NEXT:  6      44    40.00   *                   sqrtps	(%rax), %xmm2
 # CHECK-NEXT:  1      20    20.00                       sqrtss	%xmm0, %xmm2
 # CHECK-NEXT:  1      23    20.00   *                   sqrtss	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00           *      U     stmxcsr	(%rax)
@@ -331,7 +331,7 @@ xorps       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -     232.00 8.00   80.00  37.00  0.50   0.50   67.00
+# CHECK-NEXT:  -     232.00 8.00   108.00 37.00  0.50   0.50   67.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
@@ -431,12 +431,12 @@ xorps       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   psadbw	(%rax), %mm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     pshufw	$1, %mm0, %mm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   pshufw	$1, (%rax), %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     rcpps	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   rcpps	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     8.00    -      -      -      -     rcpps	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     8.00    -      -      -     1.00   rcpps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     rcpss	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   rcpss	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     rsqrtps	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   rsqrtps	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     8.00    -      -      -      -     rsqrtps	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     8.00    -      -      -     1.00   rsqrtps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     rsqrtss	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   rsqrtss	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00   sfence

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
index e00613e7f82b0..6c88d2bdc977f 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
@@ -460,8 +460,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      5     1.00                        cvttsd2si	%xmm0, %rcx
 # CHECK-NEXT:  1      8     1.00    *                   cvttsd2si	(%rax), %ecx
 # CHECK-NEXT:  1      8     1.00    *                   cvttsd2si	(%rax), %rcx
-# CHECK-NEXT:  1      69    69.00                       divpd	%xmm0, %xmm2
-# CHECK-NEXT:  1      72    69.00   *                   divpd	(%rax), %xmm2
+# CHECK-NEXT:  6      69    69.00                       divpd	%xmm0, %xmm2
+# CHECK-NEXT:  7      72    69.00   *                   divpd	(%rax), %xmm2
 # CHECK-NEXT:  1      34    32.00                       divsd	%xmm0, %xmm2
 # CHECK-NEXT:  1      37    32.00   *                   divsd	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00    *      *      U     lfence
@@ -658,8 +658,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   pxor	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        shufpd	$1, %xmm0, %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   shufpd	$1, (%rax), %xmm2
-# CHECK-NEXT:  1      71    70.00                       sqrtpd	%xmm0, %xmm2
-# CHECK-NEXT:  1      74    70.00   *                   sqrtpd	(%rax), %xmm2
+# CHECK-NEXT:  5      71    70.00                       sqrtpd	%xmm0, %xmm2
+# CHECK-NEXT:  6      74    70.00   *                   sqrtpd	(%rax), %xmm2
 # CHECK-NEXT:  1      35    35.00                       sqrtsd	%xmm0, %xmm2
 # CHECK-NEXT:  1      38    35.00   *                   sqrtsd	(%rax), %xmm2
 # CHECK-NEXT:  1      4     2.00                        subpd	%xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
index 6fb8d19d1fa97..3779e5a7895bc 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
@@ -163,10 +163,10 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  3      7     4.00    *                   blendvpd	%xmm0, (%rax), %xmm2
 # CHECK-NEXT:  2      4     4.00                        blendvps	%xmm0, %xmm0, %xmm2
 # CHECK-NEXT:  3      7     4.00    *                   blendvps	%xmm0, (%rax), %xmm2
-# CHECK-NEXT:  1      3     1.00                        dppd	$22, %xmm0, %xmm2
-# CHECK-NEXT:  1      6     1.00    *                   dppd	$22, (%rax), %xmm2
-# CHECK-NEXT:  1      3     1.00                        dpps	$22, %xmm0, %xmm2
-# CHECK-NEXT:  1      6     1.00    *                   dpps	$22, (%rax), %xmm2
+# CHECK-NEXT:  5      12    8.00                        dppd	$22, %xmm0, %xmm2
+# CHECK-NEXT:  6      15    8.00    *                   dppd	$22, (%rax), %xmm2
+# CHECK-NEXT:  9      15    12.00                       dpps	$22, %xmm0, %xmm2
+# CHECK-NEXT:  10     18    12.00   *                   dpps	$22, (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        extractps	$1, %xmm0, %ecx
 # CHECK-NEXT:  2      4     2.00           *            extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        insertps	$1, %xmm0, %xmm2
@@ -264,7 +264,7 @@ roundss     $1, (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -      -      -     104.00 25.00   -      -     54.00
+# CHECK-NEXT:  -      -      -     104.00 61.00   -      -     54.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
@@ -276,10 +276,10 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     4.00    -      -      -     1.00   blendvpd	%xmm0, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     4.00    -      -      -      -     blendvps	%xmm0, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     4.00    -      -      -     1.00   blendvps	%xmm0, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     dppd	$22, %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   dppd	$22, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     dpps	$22, %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   dpps	$22, (%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     8.00    -      -      -     dppd	$22, %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     8.00    -      -     1.00   dppd	$22, (%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     12.00   -      -      -     dpps	$22, %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     12.00   -      -     1.00   dpps	$22, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     extractps	$1, %xmm0, %ecx
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     2.00   extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     insertps	$1, %xmm0, %xmm2


        


More information about the llvm-commits mailing list