[llvm] 11b0568 - [X86] AMD Zen 3: same-reg SBB is a dependency-breaking instruction

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Sun May 9 14:04:10 PDT 2021


Author: Roman Lebedev
Date: 2021-05-10T00:03:20+03:00
New Revision: 11b0568dce5a72d45780d07398650693537bfa67

URL: https://github.com/llvm/llvm-project/commit/11b0568dce5a72d45780d07398650693537bfa67
DIFF: https://github.com/llvm/llvm-project/commit/11b0568dce5a72d45780d07398650693537bfa67.diff

LOG: [X86] AMD Zen 3: same-reg SBB is a dependency-breaking instruction

As confirmed by exegesis measurements, and ref docs.
It does actually execute.

While there, bump latency for MULX32rr, that seems to match measurements.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleZnver3.td
    llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s
    llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index f4a0328a542f..38d359e0cf8b 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -611,7 +611,7 @@ defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Intege
 defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>;    // Integer 32-bit multiplication.
 
 def Zn3MULX32rr : SchedWriteRes<[Zn3Multiplier]> {
-  let Latency = 3;
+  let Latency = 4;
   let ResourceCycles = [1];
   let NumMicroOps = 2;
 }
@@ -1529,4 +1529,10 @@ def : IsZeroIdiomFunction<[
                      SUB64rr, SUB64rr_REV ], ZeroIdiomPredicate>,
 ]>;
 
+def : IsDepBreakingFunction<[
+  // GPR
+  DepBreakingClass<[ SBB32rr, SBB32rr_REV,
+                     SBB64rr, SBB64rr_REV ], ZeroIdiomPredicate>,
+]>;
+
 } // SchedModel

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s b/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s
index bcf2435ef19c..d74a52d6fa88 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s
@@ -15,12 +15,12 @@ mulxq %rax, %rax, %rax
 
 # CHECK:      Iterations:        1000
 # CHECK-NEXT: Instructions:      2000
-# CHECK-NEXT: Total Cycles:      4003
+# CHECK-NEXT: Total Cycles:      1259
 # CHECK-NEXT: Total uOps:        3000
 
 # CHECK:      Dispatch Width:    6
-# CHECK-NEXT: uOps Per Cycle:    0.75
-# CHECK-NEXT: IPC:               0.50
+# CHECK-NEXT: uOps Per Cycle:    2.38
+# CHECK-NEXT: IPC:               1.59
 # CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
@@ -33,7 +33,7 @@ mulxq %rax, %rax, %rax
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      1     1.00                        sbbl	%eax, %eax
-# CHECK-NEXT:  2      3     1.00                        mulxl	%eax, %eax, %eax
+# CHECK-NEXT:  2      4     1.00                        mulxl	%eax, %eax, %eax
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn3AGU0
@@ -62,21 +62,20 @@ mulxq %rax, %rax, %rax
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -     1.33   1.00   1.33   1.34    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     1.25   1.25   1.25   1.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -     1.33    -     1.33   1.34    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sbbl	%eax, %eax
+# CHECK-NEXT:  -      -      -     1.25   0.25   1.25   1.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sbbl	%eax, %eax
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mulxl	%eax, %eax, %eax
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0
-# CHECK-NEXT: Index     0123456789
+# CHECK-NEXT: Index     012345678
 
-# CHECK:      [0,0]     DeER .    .   sbbl	%eax, %eax
-# CHECK-NEXT: [0,1]     D=eeeER   .   mulxl	%eax, %eax, %eax
-# CHECK-NEXT: [1,0]     D====eER  .   sbbl	%eax, %eax
-# CHECK-NEXT: [1,1]     D=====eeeER   mulxl	%eax, %eax, %eax
+# CHECK:      [0,0]     DeER .  .   sbbl	%eax, %eax
+# CHECK-NEXT: [0,1]     D=eeeeER.   mulxl	%eax, %eax, %eax
+# CHECK-NEXT: [1,0]     D=eE---R.   sbbl	%eax, %eax
+# CHECK-NEXT: [1,1]     D==eeeeER   mulxl	%eax, %eax, %eax
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -85,20 +84,20 @@ mulxq %rax, %rax, %rax
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     3.0    0.5    0.0       sbbl	%eax, %eax
-# CHECK-NEXT: 1.     2     4.0    0.0    0.0       mulxl	%eax, %eax, %eax
-# CHECK-NEXT:        2     3.5    0.3    0.0       <total>
+# CHECK-NEXT: 0.     2     1.5    0.5    1.5       sbbl	%eax, %eax
+# CHECK-NEXT: 1.     2     2.5    0.0    0.0       mulxl	%eax, %eax, %eax
+# CHECK-NEXT:        2     2.0    0.3    0.8       <total>
 
 # CHECK:      [1] Code Region
 
 # CHECK:      Iterations:        1000
 # CHECK-NEXT: Instructions:      2000
-# CHECK-NEXT: Total Cycles:      5003
+# CHECK-NEXT: Total Cycles:      1259
 # CHECK-NEXT: Total uOps:        3000
 
 # CHECK:      Dispatch Width:    6
-# CHECK-NEXT: uOps Per Cycle:    0.60
-# CHECK-NEXT: IPC:               0.40
+# CHECK-NEXT: uOps Per Cycle:    2.38
+# CHECK-NEXT: IPC:               1.59
 # CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
@@ -140,21 +139,20 @@ mulxq %rax, %rax, %rax
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -     1.33   1.00   1.33   1.34    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     1.25   1.25   1.25   1.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -     1.33    -     1.33   1.34    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sbbq	%rax, %rax
+# CHECK-NEXT:  -      -      -     1.25   0.25   1.25   1.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sbbq	%rax, %rax
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mulxq	%rax, %rax, %rax
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     012
-# CHECK-NEXT: Index     0123456789
+# CHECK-NEXT: Index     012345678
 
-# CHECK:      [0,0]     DeER .    . .   sbbq	%rax, %rax
-# CHECK-NEXT: [0,1]     D=eeeeER  . .   mulxq	%rax, %rax, %rax
-# CHECK-NEXT: [1,0]     D=====eER . .   sbbq	%rax, %rax
-# CHECK-NEXT: [1,1]     D======eeeeER   mulxq	%rax, %rax, %rax
+# CHECK:      [0,0]     DeER .  .   sbbq	%rax, %rax
+# CHECK-NEXT: [0,1]     D=eeeeER.   mulxq	%rax, %rax, %rax
+# CHECK-NEXT: [1,0]     D=eE---R.   sbbq	%rax, %rax
+# CHECK-NEXT: [1,1]     D==eeeeER   mulxq	%rax, %rax, %rax
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -163,6 +161,6 @@ mulxq %rax, %rax, %rax
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     3.5    0.5    0.0       sbbq	%rax, %rax
-# CHECK-NEXT: 1.     2     4.5    0.0    0.0       mulxq	%rax, %rax, %rax
-# CHECK-NEXT:        2     4.0    0.3    0.0       <total>
+# CHECK-NEXT: 0.     2     1.5    0.5    1.5       sbbq	%rax, %rax
+# CHECK-NEXT: 1.     2     2.5    0.0    0.0       mulxq	%rax, %rax, %rax
+# CHECK-NEXT:        2     2.0    0.3    0.8       <total>

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s
index 5a92920952b3..1c2ccfc7cf83 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s
@@ -62,8 +62,8 @@ shrx        %rax, (%rbx), %rcx
 # CHECK-NEXT:  2      5     0.50    *                   bzhil	%eax, (%rbx), %ecx
 # CHECK-NEXT:  1      1     0.50                        bzhiq	%rax, %rbx, %rcx
 # CHECK-NEXT:  2      5     0.50    *                   bzhiq	%rax, (%rbx), %rcx
-# CHECK-NEXT:  2      3     1.00                        mulxl	%eax, %ebx, %ecx
-# CHECK-NEXT:  2      7     2.00    *                   mulxl	(%rax), %ebx, %ecx
+# CHECK-NEXT:  2      4     1.00                        mulxl	%eax, %ebx, %ecx
+# CHECK-NEXT:  2      8     2.00    *                   mulxl	(%rax), %ebx, %ecx
 # CHECK-NEXT:  2      4     1.00                        mulxq	%rax, %rbx, %rcx
 # CHECK-NEXT:  2      8     2.00    *                   mulxq	(%rax), %rbx, %rcx
 # CHECK-NEXT:  1      3     1.00                        pdepl	%eax, %ebx, %ecx


        


More information about the llvm-commits mailing list