[llvm] ad70d5f - [X86] Fix SLM v2f64 ADD/MUL + FP BLEND/HADD instruction schedules

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 6 11:08:37 PST 2019


Author: Simon Pilgrim
Date: 2019-11-06T19:08:15Z
New Revision: ad70d5f39ae99d9f5be582ad8979830f588e6802

URL: https://github.com/llvm/llvm-project/commit/ad70d5f39ae99d9f5be582ad8979830f588e6802
DIFF: https://github.com/llvm/llvm-project/commit/ad70d5f39ae99d9f5be582ad8979830f588e6802.diff

LOG: [X86] Fix SLM v2f64 ADD/MUL + FP BLEND/HADD instruction schedules

Noticed while fixing the reduction costs for D59710 - the SLM model doesn't account for the poor throughput of v2f64/v2i64 ops.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleSLM.td
    llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
    llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
    llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 8e3ce721f1a1..84aac01ab381 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -202,8 +202,8 @@ defm : SLMWriteResPair<WriteFAddX,    [SLM_FPC_RSV1], 3>;
 defm : SLMWriteResPair<WriteFAddY,    [SLM_FPC_RSV1], 3>;
 defm : X86WriteResPairUnsupported<WriteFAddZ>;
 defm : SLMWriteResPair<WriteFAdd64,   [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFAdd64X,  [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFAdd64Y,  [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAdd64X,  [SLM_FPC_RSV1], 4, [2]>;
+defm : SLMWriteResPair<WriteFAdd64Y,  [SLM_FPC_RSV1], 4, [2]>;
 defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
 defm : SLMWriteResPair<WriteFCmp,     [SLM_FPC_RSV1], 3>;
 defm : SLMWriteResPair<WriteFCmpX,    [SLM_FPC_RSV1], 3>;
@@ -219,8 +219,8 @@ defm : SLMWriteResPair<WriteFMulX,    [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>
 defm : SLMWriteResPair<WriteFMulY,    [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
 defm : X86WriteResPairUnsupported<WriteFMulZ>;
 defm : SLMWriteResPair<WriteFMul64,   [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
-defm : SLMWriteResPair<WriteFMul64X,  [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
-defm : SLMWriteResPair<WriteFMul64Y,  [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMul64X,  [SLM_FPC_RSV0, SLMFPMultiplier], 7, [1,4]>;
+defm : SLMWriteResPair<WriteFMul64Y,  [SLM_FPC_RSV0, SLMFPMultiplier], 7, [1,4]>;
 defm : X86WriteResPairUnsupported<WriteFMul64Z>;
 defm : SLMWriteResPair<WriteFDiv,     [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
 defm : SLMWriteResPair<WriteFDivX,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
@@ -380,8 +380,8 @@ def  : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
 // Horizontal add/sub  instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : SLMWriteResPair<WriteFHAdd,   [SLM_FPC_RSV01], 3, [2]>;
-defm : SLMWriteResPair<WriteFHAddY,  [SLM_FPC_RSV01], 3, [2]>;
+defm : SLMWriteResPair<WriteFHAdd,   [SLM_FPC_RSV01], 6, [6], 4>;
+defm : SLMWriteResPair<WriteFHAddY,  [SLM_FPC_RSV01], 6, [6], 4>;
 defm : X86WriteResPairUnsupported<WriteFHAddZ>;
 defm : SLMWriteResPair<WritePHAdd,   [SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WritePHAddX,  [SLM_FPC_RSV01], 1>;
@@ -486,7 +486,7 @@ defm : X86WriteResPairUnsupported<WriteFBlendZ>;
 defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
 defm : X86WriteResPairUnsupported<WriteVarBlendY>;
 defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
-defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 4, [4], 3>;
 defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
 defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
 defm : X86WriteResPairUnsupported<WriteFShuffle256>;

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
index 477a9ce97ece..92f33a208393 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
@@ -407,8 +407,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      3     1.00                        addpd	%xmm0, %xmm2
-# CHECK-NEXT:  1      6     1.00    *                   addpd	(%rax), %xmm2
+# CHECK-NEXT:  1      4     2.00                        addpd	%xmm0, %xmm2
+# CHECK-NEXT:  1      7     2.00    *                   addpd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        addsd	%xmm0, %xmm2
 # CHECK-NEXT:  1      6     1.00    *                   addsd	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        andnpd	%xmm0, %xmm2
@@ -510,8 +510,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        movupd	%xmm0, %xmm2
 # CHECK-NEXT:  1      1     1.00           *            movupd	%xmm0, (%rax)
 # CHECK-NEXT:  1      3     1.00    *                   movupd	(%rax), %xmm2
-# CHECK-NEXT:  1      5     2.00                        mulpd	%xmm0, %xmm2
-# CHECK-NEXT:  1      8     2.00    *                   mulpd	(%rax), %xmm2
+# CHECK-NEXT:  1      7     4.00                        mulpd	%xmm0, %xmm2
+# CHECK-NEXT:  1      10    4.00    *                   mulpd	(%rax), %xmm2
 # CHECK-NEXT:  1      5     2.00                        mulsd	%xmm0, %xmm2
 # CHECK-NEXT:  1      8     2.00    *                   mulsd	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        orpd	%xmm0, %xmm2
@@ -662,8 +662,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      74    70.00   *                   sqrtpd	(%rax), %xmm2
 # CHECK-NEXT:  1      35    35.00                       sqrtsd	%xmm0, %xmm2
 # CHECK-NEXT:  1      38    35.00   *                   sqrtsd	(%rax), %xmm2
-# CHECK-NEXT:  1      3     1.00                        subpd	%xmm0, %xmm2
-# CHECK-NEXT:  1      6     1.00    *                   subpd	(%rax), %xmm2
+# CHECK-NEXT:  1      4     2.00                        subpd	%xmm0, %xmm2
+# CHECK-NEXT:  1      7     2.00    *                   subpd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        subsd	%xmm0, %xmm2
 # CHECK-NEXT:  1      6     1.00    *                   subsd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        ucomisd	%xmm0, %xmm1
@@ -687,12 +687,12 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -     412.00 8.00   152.50 86.50  3.00   3.00   134.00
+# CHECK-NEXT:  -     412.00 12.00  152.50 90.50  3.00   3.00   134.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     addpd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   addpd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     2.00    -      -      -     addpd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     1.00   addpd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     addsd	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   addsd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     andnpd	%xmm0, %xmm2
@@ -794,8 +794,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     movupd	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00   movupd	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00   movupd	(%rax), %xmm2
-# CHECK-NEXT:  -      -     2.00   1.00    -      -      -      -     mulpd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00   mulpd	(%rax), %xmm2
+# CHECK-NEXT:  -      -     4.00   1.00    -      -      -      -     mulpd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -     4.00   1.00    -      -      -     1.00   mulpd	(%rax), %xmm2
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -      -     mulsd	%xmm0, %xmm2
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00   mulsd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     orpd	%xmm0, %xmm2
@@ -946,8 +946,8 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -     70.00   -     1.00    -      -      -     1.00   sqrtpd	(%rax), %xmm2
 # CHECK-NEXT:  -     35.00   -     1.00    -      -      -      -     sqrtsd	%xmm0, %xmm2
 # CHECK-NEXT:  -     35.00   -     1.00    -      -      -     1.00   sqrtsd	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     subpd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   subpd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     2.00    -      -      -     subpd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     1.00   subpd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     subsd	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   subsd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     ucomisd	%xmm0, %xmm1

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
index 484c353b07bf..23949737b3ca 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
@@ -43,18 +43,18 @@ mwait
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      3     1.00                        addsubpd	%xmm0, %xmm2
-# CHECK-NEXT:  1      6     1.00    *                   addsubpd	(%rax), %xmm2
+# CHECK-NEXT:  1      4     2.00                        addsubpd	%xmm0, %xmm2
+# CHECK-NEXT:  1      7     2.00    *                   addsubpd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        addsubps	%xmm0, %xmm2
 # CHECK-NEXT:  1      6     1.00    *                   addsubps	(%rax), %xmm2
-# CHECK-NEXT:  1      3     1.00                        haddpd	%xmm0, %xmm2
-# CHECK-NEXT:  1      6     1.00    *                   haddpd	(%rax), %xmm2
-# CHECK-NEXT:  1      3     1.00                        haddps	%xmm0, %xmm2
-# CHECK-NEXT:  1      6     1.00    *                   haddps	(%rax), %xmm2
-# CHECK-NEXT:  1      3     1.00                        hsubpd	%xmm0, %xmm2
-# CHECK-NEXT:  1      6     1.00    *                   hsubpd	(%rax), %xmm2
-# CHECK-NEXT:  1      3     1.00                        hsubps	%xmm0, %xmm2
-# CHECK-NEXT:  1      6     1.00    *                   hsubps	(%rax), %xmm2
+# CHECK-NEXT:  4      6     3.00                        haddpd	%xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   haddpd	(%rax), %xmm2
+# CHECK-NEXT:  4      6     3.00                        haddps	%xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   haddps	(%rax), %xmm2
+# CHECK-NEXT:  4      6     3.00                        hsubpd	%xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   hsubpd	(%rax), %xmm2
+# CHECK-NEXT:  4      6     3.00                        hsubps	%xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   hsubps	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00    *                   lddqu	(%rax), %xmm2
 # CHECK-NEXT:  1      100   1.00                  U     monitor
 # CHECK-NEXT:  1      1     1.00                        movddup	%xmm0, %xmm2
@@ -77,22 +77,22 @@ mwait
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -      -      -     16.00  12.00   -      -     10.00
+# CHECK-NEXT:  -      -      -     32.00  30.00   -      -     10.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     addsubpd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   addsubpd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     2.00    -      -      -     addsubpd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     2.00    -      -     1.00   addsubpd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     addsubps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   addsubps	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -      -      -     haddpd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -      -     1.00   haddpd	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -      -      -     haddps	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -      -     1.00   haddps	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -      -      -     hsubpd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -      -     1.00   hsubpd	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -      -      -     hsubps	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   1.00    -      -     1.00   hsubps	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     haddpd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   haddpd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     haddps	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   haddps	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     hsubpd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   hsubpd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     hsubps	%xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   hsubps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00   lddqu	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     monitor
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     movddup	%xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
index ce74a7625e7a..0dc83a0fb627 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
@@ -159,10 +159,10 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   blendpd	$11, (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        blendps	$11, %xmm0, %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   blendps	$11, (%rax), %xmm2
-# CHECK-NEXT:  1      1     1.00                        blendvpd	%xmm0, %xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   blendvpd	%xmm0, (%rax), %xmm2
-# CHECK-NEXT:  1      1     1.00                        blendvps	%xmm0, %xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   blendvps	%xmm0, (%rax), %xmm2
+# CHECK-NEXT:  3      4     4.00                        blendvpd	%xmm0, %xmm0, %xmm2
+# CHECK-NEXT:  3      7     4.00    *                   blendvpd	%xmm0, (%rax), %xmm2
+# CHECK-NEXT:  3      4     4.00                        blendvps	%xmm0, %xmm0, %xmm2
+# CHECK-NEXT:  3      7     4.00    *                   blendvps	%xmm0, (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        dppd	$22, %xmm0, %xmm2
 # CHECK-NEXT:  1      6     1.00    *                   dppd	$22, (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        dpps	$22, %xmm0, %xmm2
@@ -264,7 +264,7 @@ roundss     $1, (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -      -      -     73.00  22.00   -      -     54.00
+# CHECK-NEXT:  -      -      -     85.00  22.00   -      -     54.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
@@ -272,10 +272,10 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   blendpd	$11, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     blendps	$11, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   blendps	$11, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     blendvpd	%xmm0, %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   blendvpd	%xmm0, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     blendvps	%xmm0, %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   blendvps	%xmm0, (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     4.00    -      -      -      -     blendvpd	%xmm0, %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     4.00    -      -      -     1.00   blendvpd	%xmm0, (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     4.00    -      -      -      -     blendvps	%xmm0, %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     4.00    -      -      -     1.00   blendvps	%xmm0, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     dppd	$22, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   dppd	$22, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     dpps	$22, %xmm0, %xmm2


        


More information about the llvm-commits mailing list