[llvm] eed8552 - [X86] AMD Zen 3: same-register XOR/SUB are GPR dependency breaking zero-idioms

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Sun May 9 14:04:06 PDT 2021


Author: Roman Lebedev
Date: 2021-05-10T00:03:20+03:00
New Revision: eed8552787d8e2e7c4fd257a8b5ddd78682a55fa

URL: https://github.com/llvm/llvm-project/commit/eed8552787d8e2e7c4fd257a8b5ddd78682a55fa
DIFF: https://github.com/llvm/llvm-project/commit/eed8552787d8e2e7c4fd257a8b5ddd78682a55fa.diff

LOG: [X86] AMD Zen 3: same-register XOR/SUB are GPR dependency breaking zero-idioms

As measured by exegesis and confirmed in reference docs.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleZnver3.td
    llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index e3413c1d0912..f4a0328a542f 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -1443,12 +1443,12 @@ defm : Zn3WriteResInt<WriteNop, [Zn3ALU0123], 0, [1], 1>; // FIXME: latency not
 // Zero Cycle Move
 ///////////////////////////////////////////////////////////////////////////////
 
-def Zn3WriteMoveRenameable : SchedWriteRes<[]> {
+def Zn3WriteZeroLatency : SchedWriteRes<[]> {
   let Latency = 0;
   let ResourceCycles = [];
   let NumMicroOps = 1;
 }
-def : InstRW<[Zn3WriteMoveRenameable], (instrs MOV32rr, MOV32rr_REV,
+def : InstRW<[Zn3WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV,
                                                MOV64rr, MOV64rr_REV,
                                                MOVSX32rr32)>;
 
@@ -1508,4 +1508,25 @@ def : IsOptimizableRegisterMove<[
   ], TruePred >
 ]>;
 
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def Zn3WriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteALU]>
+]>;
+def : InstRW<[Zn3WriteZeroIdiom], (instrs XOR32rr, XOR32rr_REV,
+                                          XOR64rr, XOR64rr_REV,
+                                          SUB32rr, SUB32rr_REV,
+                                          SUB64rr, SUB64rr_REV)>;
+
+def : IsZeroIdiomFunction<[
+  // GPR Zero-idioms.
+  DepBreakingClass<[ XOR32rr, XOR32rr_REV,
+                     XOR64rr, XOR64rr_REV,
+                     SUB32rr, SUB32rr_REV,
+                     SUB64rr, SUB64rr_REV ], ZeroIdiomPredicate>,
+]>;
+
 } // SchedModel

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s
index 875a267e8a1b..018adc261b08 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -timeline -register-file-stats -iterations=500 < %s | FileCheck %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -timeline -register-file-stats -iterations=1000 < %s | FileCheck %s
 
 # LLVM-MCA-BEGIN
 xorl %eax, %eax
@@ -23,15 +23,15 @@ addq %rax, %rax
 
 # CHECK:      [0] Code Region
 
-# CHECK:      Iterations:        500
-# CHECK-NEXT: Instructions:      1000
-# CHECK-NEXT: Total Cycles:      1003
-# CHECK-NEXT: Total uOps:        1000
+# CHECK:      Iterations:        1000
+# CHECK-NEXT: Instructions:      2000
+# CHECK-NEXT: Total Cycles:      337
+# CHECK-NEXT: Total uOps:        2000
 
 # CHECK:      Dispatch Width:    6
-# CHECK-NEXT: uOps Per Cycle:    1.00
-# CHECK-NEXT: IPC:               1.00
-# CHECK-NEXT: Block RThroughput: 0.5
+# CHECK-NEXT: uOps Per Cycle:    5.93
+# CHECK-NEXT: IPC:               5.93
+# CHECK-NEXT: Block RThroughput: 0.3
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -42,12 +42,12 @@ addq %rax, %rax
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.25                        xorl	%eax, %eax
+# CHECK-NEXT:  1      0     0.17                        xorl	%eax, %eax
 # CHECK-NEXT:  1      1     0.25                        addl	%eax, %eax
 
 # CHECK:      Register File statistics:
 # CHECK-NEXT: Total number of mappings created:    2000
-# CHECK-NEXT: Max number of mappings used:         192
+# CHECK-NEXT: Max number of mappings used:         18
 
 # CHECK:      *  Register File #1 -- Zn3FpPRF:
 # CHECK-NEXT:    Number of physical registers:     160
@@ -57,7 +57,7 @@ addq %rax, %rax
 # CHECK:      *  Register File #2 -- Zn3IntegerPRF:
 # CHECK-NEXT:    Number of physical registers:     192
 # CHECK-NEXT:    Total number of mappings created: 2000
-# CHECK-NEXT:    Max number of mappings used:      192
+# CHECK-NEXT:    Max number of mappings used:      18
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn3AGU0
@@ -86,37 +86,36 @@ addq %rax, %rax
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -      -     0.50    -     0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     xorl	%eax, %eax
-# CHECK-NEXT:  -      -      -     0.50    -     0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     xorl	%eax, %eax
+# CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addl	%eax, %eax
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789
-# CHECK-NEXT: Index     0123456789          012
-
-# CHECK:      [0,0]     DeER .    .    .    . .   xorl	%eax, %eax
-# CHECK-NEXT: [0,1]     D=eER.    .    .    . .   addl	%eax, %eax
-# CHECK-NEXT: [1,0]     D==eER    .    .    . .   xorl	%eax, %eax
-# CHECK-NEXT: [1,1]     D===eER   .    .    . .   addl	%eax, %eax
-# CHECK-NEXT: [2,0]     D====eER  .    .    . .   xorl	%eax, %eax
-# CHECK-NEXT: [2,1]     D=====eER .    .    . .   addl	%eax, %eax
-# CHECK-NEXT: [3,0]     .D=====eER.    .    . .   xorl	%eax, %eax
-# CHECK-NEXT: [3,1]     .D======eER    .    . .   addl	%eax, %eax
-# CHECK-NEXT: [4,0]     .D=======eER   .    . .   xorl	%eax, %eax
-# CHECK-NEXT: [4,1]     .D========eER  .    . .   addl	%eax, %eax
-# CHECK-NEXT: [5,0]     .D=========eER .    . .   xorl	%eax, %eax
-# CHECK-NEXT: [5,1]     .D==========eER.    . .   addl	%eax, %eax
-# CHECK-NEXT: [6,0]     . D==========eER    . .   xorl	%eax, %eax
-# CHECK-NEXT: [6,1]     . D===========eER   . .   addl	%eax, %eax
-# CHECK-NEXT: [7,0]     . D============eER  . .   xorl	%eax, %eax
-# CHECK-NEXT: [7,1]     . D=============eER . .   addl	%eax, %eax
-# CHECK-NEXT: [8,0]     . D==============eER. .   xorl	%eax, %eax
-# CHECK-NEXT: [8,1]     . D===============eER .   addl	%eax, %eax
-# CHECK-NEXT: [9,0]     .  D===============eER.   xorl	%eax, %eax
-# CHECK-NEXT: [9,1]     .  D================eER   addl	%eax, %eax
+# CHECK-NEXT: Index     0123456
+
+# CHECK:      [0,0]     DR   ..   xorl	%eax, %eax
+# CHECK-NEXT: [0,1]     DeER ..   addl	%eax, %eax
+# CHECK-NEXT: [1,0]     D--R ..   xorl	%eax, %eax
+# CHECK-NEXT: [1,1]     DeER ..   addl	%eax, %eax
+# CHECK-NEXT: [2,0]     D--R ..   xorl	%eax, %eax
+# CHECK-NEXT: [2,1]     DeER ..   addl	%eax, %eax
+# CHECK-NEXT: [3,0]     .D-R ..   xorl	%eax, %eax
+# CHECK-NEXT: [3,1]     .DeER..   addl	%eax, %eax
+# CHECK-NEXT: [4,0]     .D--R..   xorl	%eax, %eax
+# CHECK-NEXT: [4,1]     .DeER..   addl	%eax, %eax
+# CHECK-NEXT: [5,0]     .D--R..   xorl	%eax, %eax
+# CHECK-NEXT: [5,1]     .DeER..   addl	%eax, %eax
+# CHECK-NEXT: [6,0]     . D-R..   xorl	%eax, %eax
+# CHECK-NEXT: [6,1]     . DeER.   addl	%eax, %eax
+# CHECK-NEXT: [7,0]     . D--R.   xorl	%eax, %eax
+# CHECK-NEXT: [7,1]     . DeER.   addl	%eax, %eax
+# CHECK-NEXT: [8,0]     . D--R.   xorl	%eax, %eax
+# CHECK-NEXT: [8,1]     . DeER.   addl	%eax, %eax
+# CHECK-NEXT: [9,0]     .  D-R.   xorl	%eax, %eax
+# CHECK-NEXT: [9,1]     .  DeER   addl	%eax, %eax
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -125,21 +124,21 @@ addq %rax, %rax
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     10    8.8    0.1    0.0       xorl	%eax, %eax
-# CHECK-NEXT: 1.     10    9.8    0.0    0.0       addl	%eax, %eax
-# CHECK-NEXT:        10    9.3    0.1    0.0       <total>
+# CHECK-NEXT: 0.     10    0.0    0.0    1.5       xorl	%eax, %eax
+# CHECK-NEXT: 1.     10    1.0    1.0    0.0       addl	%eax, %eax
+# CHECK-NEXT:        10    0.5    0.5    0.8       <total>
 
 # CHECK:      [1] Code Region
 
-# CHECK:      Iterations:        500
-# CHECK-NEXT: Instructions:      1000
-# CHECK-NEXT: Total Cycles:      1003
-# CHECK-NEXT: Total uOps:        1000
+# CHECK:      Iterations:        1000
+# CHECK-NEXT: Instructions:      2000
+# CHECK-NEXT: Total Cycles:      337
+# CHECK-NEXT: Total uOps:        2000
 
 # CHECK:      Dispatch Width:    6
-# CHECK-NEXT: uOps Per Cycle:    1.00
-# CHECK-NEXT: IPC:               1.00
-# CHECK-NEXT: Block RThroughput: 0.5
+# CHECK-NEXT: uOps Per Cycle:    5.93
+# CHECK-NEXT: IPC:               5.93
+# CHECK-NEXT: Block RThroughput: 0.3
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -150,12 +149,12 @@ addq %rax, %rax
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.25                        xorq	%rax, %rax
+# CHECK-NEXT:  1      0     0.17                        xorq	%rax, %rax
 # CHECK-NEXT:  1      1     0.25                        addq	%rax, %rax
 
 # CHECK:      Register File statistics:
 # CHECK-NEXT: Total number of mappings created:    2000
-# CHECK-NEXT: Max number of mappings used:         192
+# CHECK-NEXT: Max number of mappings used:         18
 
 # CHECK:      *  Register File #1 -- Zn3FpPRF:
 # CHECK-NEXT:    Number of physical registers:     160
@@ -165,7 +164,7 @@ addq %rax, %rax
 # CHECK:      *  Register File #2 -- Zn3IntegerPRF:
 # CHECK-NEXT:    Number of physical registers:     192
 # CHECK-NEXT:    Total number of mappings created: 2000
-# CHECK-NEXT:    Max number of mappings used:      192
+# CHECK-NEXT:    Max number of mappings used:      18
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn3AGU0
@@ -194,37 +193,36 @@ addq %rax, %rax
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -      -     0.50    -     0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     xorq	%rax, %rax
-# CHECK-NEXT:  -      -      -     0.50    -     0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     xorq	%rax, %rax
+# CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addq	%rax, %rax
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789
-# CHECK-NEXT: Index     0123456789          012
-
-# CHECK:      [0,0]     DeER .    .    .    . .   xorq	%rax, %rax
-# CHECK-NEXT: [0,1]     D=eER.    .    .    . .   addq	%rax, %rax
-# CHECK-NEXT: [1,0]     D==eER    .    .    . .   xorq	%rax, %rax
-# CHECK-NEXT: [1,1]     D===eER   .    .    . .   addq	%rax, %rax
-# CHECK-NEXT: [2,0]     D====eER  .    .    . .   xorq	%rax, %rax
-# CHECK-NEXT: [2,1]     D=====eER .    .    . .   addq	%rax, %rax
-# CHECK-NEXT: [3,0]     .D=====eER.    .    . .   xorq	%rax, %rax
-# CHECK-NEXT: [3,1]     .D======eER    .    . .   addq	%rax, %rax
-# CHECK-NEXT: [4,0]     .D=======eER   .    . .   xorq	%rax, %rax
-# CHECK-NEXT: [4,1]     .D========eER  .    . .   addq	%rax, %rax
-# CHECK-NEXT: [5,0]     .D=========eER .    . .   xorq	%rax, %rax
-# CHECK-NEXT: [5,1]     .D==========eER.    . .   addq	%rax, %rax
-# CHECK-NEXT: [6,0]     . D==========eER    . .   xorq	%rax, %rax
-# CHECK-NEXT: [6,1]     . D===========eER   . .   addq	%rax, %rax
-# CHECK-NEXT: [7,0]     . D============eER  . .   xorq	%rax, %rax
-# CHECK-NEXT: [7,1]     . D=============eER . .   addq	%rax, %rax
-# CHECK-NEXT: [8,0]     . D==============eER. .   xorq	%rax, %rax
-# CHECK-NEXT: [8,1]     . D===============eER .   addq	%rax, %rax
-# CHECK-NEXT: [9,0]     .  D===============eER.   xorq	%rax, %rax
-# CHECK-NEXT: [9,1]     .  D================eER   addq	%rax, %rax
+# CHECK-NEXT: Index     0123456
+
+# CHECK:      [0,0]     DR   ..   xorq	%rax, %rax
+# CHECK-NEXT: [0,1]     DeER ..   addq	%rax, %rax
+# CHECK-NEXT: [1,0]     D--R ..   xorq	%rax, %rax
+# CHECK-NEXT: [1,1]     DeER ..   addq	%rax, %rax
+# CHECK-NEXT: [2,0]     D--R ..   xorq	%rax, %rax
+# CHECK-NEXT: [2,1]     DeER ..   addq	%rax, %rax
+# CHECK-NEXT: [3,0]     .D-R ..   xorq	%rax, %rax
+# CHECK-NEXT: [3,1]     .DeER..   addq	%rax, %rax
+# CHECK-NEXT: [4,0]     .D--R..   xorq	%rax, %rax
+# CHECK-NEXT: [4,1]     .DeER..   addq	%rax, %rax
+# CHECK-NEXT: [5,0]     .D--R..   xorq	%rax, %rax
+# CHECK-NEXT: [5,1]     .DeER..   addq	%rax, %rax
+# CHECK-NEXT: [6,0]     . D-R..   xorq	%rax, %rax
+# CHECK-NEXT: [6,1]     . DeER.   addq	%rax, %rax
+# CHECK-NEXT: [7,0]     . D--R.   xorq	%rax, %rax
+# CHECK-NEXT: [7,1]     . DeER.   addq	%rax, %rax
+# CHECK-NEXT: [8,0]     . D--R.   xorq	%rax, %rax
+# CHECK-NEXT: [8,1]     . DeER.   addq	%rax, %rax
+# CHECK-NEXT: [9,0]     .  D-R.   xorq	%rax, %rax
+# CHECK-NEXT: [9,1]     .  DeER   addq	%rax, %rax
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -233,21 +231,21 @@ addq %rax, %rax
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     10    8.8    0.1    0.0       xorq	%rax, %rax
-# CHECK-NEXT: 1.     10    9.8    0.0    0.0       addq	%rax, %rax
-# CHECK-NEXT:        10    9.3    0.1    0.0       <total>
+# CHECK-NEXT: 0.     10    0.0    0.0    1.5       xorq	%rax, %rax
+# CHECK-NEXT: 1.     10    1.0    1.0    0.0       addq	%rax, %rax
+# CHECK-NEXT:        10    0.5    0.5    0.8       <total>
 
 # CHECK:      [2] Code Region
 
-# CHECK:      Iterations:        500
-# CHECK-NEXT: Instructions:      1000
-# CHECK-NEXT: Total Cycles:      1003
-# CHECK-NEXT: Total uOps:        1000
+# CHECK:      Iterations:        1000
+# CHECK-NEXT: Instructions:      2000
+# CHECK-NEXT: Total Cycles:      337
+# CHECK-NEXT: Total uOps:        2000
 
 # CHECK:      Dispatch Width:    6
-# CHECK-NEXT: uOps Per Cycle:    1.00
-# CHECK-NEXT: IPC:               1.00
-# CHECK-NEXT: Block RThroughput: 0.5
+# CHECK-NEXT: uOps Per Cycle:    5.93
+# CHECK-NEXT: IPC:               5.93
+# CHECK-NEXT: Block RThroughput: 0.3
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -258,12 +256,12 @@ addq %rax, %rax
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.25                        subl	%eax, %eax
+# CHECK-NEXT:  1      0     0.17                        subl	%eax, %eax
 # CHECK-NEXT:  1      1     0.25                        addl	%eax, %eax
 
 # CHECK:      Register File statistics:
 # CHECK-NEXT: Total number of mappings created:    2000
-# CHECK-NEXT: Max number of mappings used:         192
+# CHECK-NEXT: Max number of mappings used:         18
 
 # CHECK:      *  Register File #1 -- Zn3FpPRF:
 # CHECK-NEXT:    Number of physical registers:     160
@@ -273,7 +271,7 @@ addq %rax, %rax
 # CHECK:      *  Register File #2 -- Zn3IntegerPRF:
 # CHECK-NEXT:    Number of physical registers:     192
 # CHECK-NEXT:    Total number of mappings created: 2000
-# CHECK-NEXT:    Max number of mappings used:      192
+# CHECK-NEXT:    Max number of mappings used:      18
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn3AGU0
@@ -302,37 +300,36 @@ addq %rax, %rax
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -      -     0.50    -     0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     subl	%eax, %eax
-# CHECK-NEXT:  -      -      -     0.50    -     0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     subl	%eax, %eax
+# CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addl	%eax, %eax
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789
-# CHECK-NEXT: Index     0123456789          012
-
-# CHECK:      [0,0]     DeER .    .    .    . .   subl	%eax, %eax
-# CHECK-NEXT: [0,1]     D=eER.    .    .    . .   addl	%eax, %eax
-# CHECK-NEXT: [1,0]     D==eER    .    .    . .   subl	%eax, %eax
-# CHECK-NEXT: [1,1]     D===eER   .    .    . .   addl	%eax, %eax
-# CHECK-NEXT: [2,0]     D====eER  .    .    . .   subl	%eax, %eax
-# CHECK-NEXT: [2,1]     D=====eER .    .    . .   addl	%eax, %eax
-# CHECK-NEXT: [3,0]     .D=====eER.    .    . .   subl	%eax, %eax
-# CHECK-NEXT: [3,1]     .D======eER    .    . .   addl	%eax, %eax
-# CHECK-NEXT: [4,0]     .D=======eER   .    . .   subl	%eax, %eax
-# CHECK-NEXT: [4,1]     .D========eER  .    . .   addl	%eax, %eax
-# CHECK-NEXT: [5,0]     .D=========eER .    . .   subl	%eax, %eax
-# CHECK-NEXT: [5,1]     .D==========eER.    . .   addl	%eax, %eax
-# CHECK-NEXT: [6,0]     . D==========eER    . .   subl	%eax, %eax
-# CHECK-NEXT: [6,1]     . D===========eER   . .   addl	%eax, %eax
-# CHECK-NEXT: [7,0]     . D============eER  . .   subl	%eax, %eax
-# CHECK-NEXT: [7,1]     . D=============eER . .   addl	%eax, %eax
-# CHECK-NEXT: [8,0]     . D==============eER. .   subl	%eax, %eax
-# CHECK-NEXT: [8,1]     . D===============eER .   addl	%eax, %eax
-# CHECK-NEXT: [9,0]     .  D===============eER.   subl	%eax, %eax
-# CHECK-NEXT: [9,1]     .  D================eER   addl	%eax, %eax
+# CHECK-NEXT: Index     0123456
+
+# CHECK:      [0,0]     DR   ..   subl	%eax, %eax
+# CHECK-NEXT: [0,1]     DeER ..   addl	%eax, %eax
+# CHECK-NEXT: [1,0]     D--R ..   subl	%eax, %eax
+# CHECK-NEXT: [1,1]     DeER ..   addl	%eax, %eax
+# CHECK-NEXT: [2,0]     D--R ..   subl	%eax, %eax
+# CHECK-NEXT: [2,1]     DeER ..   addl	%eax, %eax
+# CHECK-NEXT: [3,0]     .D-R ..   subl	%eax, %eax
+# CHECK-NEXT: [3,1]     .DeER..   addl	%eax, %eax
+# CHECK-NEXT: [4,0]     .D--R..   subl	%eax, %eax
+# CHECK-NEXT: [4,1]     .DeER..   addl	%eax, %eax
+# CHECK-NEXT: [5,0]     .D--R..   subl	%eax, %eax
+# CHECK-NEXT: [5,1]     .DeER..   addl	%eax, %eax
+# CHECK-NEXT: [6,0]     . D-R..   subl	%eax, %eax
+# CHECK-NEXT: [6,1]     . DeER.   addl	%eax, %eax
+# CHECK-NEXT: [7,0]     . D--R.   subl	%eax, %eax
+# CHECK-NEXT: [7,1]     . DeER.   addl	%eax, %eax
+# CHECK-NEXT: [8,0]     . D--R.   subl	%eax, %eax
+# CHECK-NEXT: [8,1]     . DeER.   addl	%eax, %eax
+# CHECK-NEXT: [9,0]     .  D-R.   subl	%eax, %eax
+# CHECK-NEXT: [9,1]     .  DeER   addl	%eax, %eax
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -341,21 +338,21 @@ addq %rax, %rax
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     10    8.8    0.1    0.0       subl	%eax, %eax
-# CHECK-NEXT: 1.     10    9.8    0.0    0.0       addl	%eax, %eax
-# CHECK-NEXT:        10    9.3    0.1    0.0       <total>
+# CHECK-NEXT: 0.     10    0.0    0.0    1.5       subl	%eax, %eax
+# CHECK-NEXT: 1.     10    1.0    1.0    0.0       addl	%eax, %eax
+# CHECK-NEXT:        10    0.5    0.5    0.8       <total>
 
 # CHECK:      [3] Code Region
 
-# CHECK:      Iterations:        500
-# CHECK-NEXT: Instructions:      1000
-# CHECK-NEXT: Total Cycles:      1003
-# CHECK-NEXT: Total uOps:        1000
+# CHECK:      Iterations:        1000
+# CHECK-NEXT: Instructions:      2000
+# CHECK-NEXT: Total Cycles:      337
+# CHECK-NEXT: Total uOps:        2000
 
 # CHECK:      Dispatch Width:    6
-# CHECK-NEXT: uOps Per Cycle:    1.00
-# CHECK-NEXT: IPC:               1.00
-# CHECK-NEXT: Block RThroughput: 0.5
+# CHECK-NEXT: uOps Per Cycle:    5.93
+# CHECK-NEXT: IPC:               5.93
+# CHECK-NEXT: Block RThroughput: 0.3
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -366,12 +363,12 @@ addq %rax, %rax
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.25                        subq	%rax, %rax
+# CHECK-NEXT:  1      0     0.17                        subq	%rax, %rax
 # CHECK-NEXT:  1      1     0.25                        addq	%rax, %rax
 
 # CHECK:      Register File statistics:
 # CHECK-NEXT: Total number of mappings created:    2000
-# CHECK-NEXT: Max number of mappings used:         192
+# CHECK-NEXT: Max number of mappings used:         18
 
 # CHECK:      *  Register File #1 -- Zn3FpPRF:
 # CHECK-NEXT:    Number of physical registers:     160
@@ -381,7 +378,7 @@ addq %rax, %rax
 # CHECK:      *  Register File #2 -- Zn3IntegerPRF:
 # CHECK-NEXT:    Number of physical registers:     192
 # CHECK-NEXT:    Total number of mappings created: 2000
-# CHECK-NEXT:    Max number of mappings used:      192
+# CHECK-NEXT:    Max number of mappings used:      18
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn3AGU0
@@ -410,37 +407,36 @@ addq %rax, %rax
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -      -     0.50    -     0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     subq	%rax, %rax
-# CHECK-NEXT:  -      -      -     0.50    -     0.50    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     subq	%rax, %rax
+# CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addq	%rax, %rax
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789
-# CHECK-NEXT: Index     0123456789          012
-
-# CHECK:      [0,0]     DeER .    .    .    . .   subq	%rax, %rax
-# CHECK-NEXT: [0,1]     D=eER.    .    .    . .   addq	%rax, %rax
-# CHECK-NEXT: [1,0]     D==eER    .    .    . .   subq	%rax, %rax
-# CHECK-NEXT: [1,1]     D===eER   .    .    . .   addq	%rax, %rax
-# CHECK-NEXT: [2,0]     D====eER  .    .    . .   subq	%rax, %rax
-# CHECK-NEXT: [2,1]     D=====eER .    .    . .   addq	%rax, %rax
-# CHECK-NEXT: [3,0]     .D=====eER.    .    . .   subq	%rax, %rax
-# CHECK-NEXT: [3,1]     .D======eER    .    . .   addq	%rax, %rax
-# CHECK-NEXT: [4,0]     .D=======eER   .    . .   subq	%rax, %rax
-# CHECK-NEXT: [4,1]     .D========eER  .    . .   addq	%rax, %rax
-# CHECK-NEXT: [5,0]     .D=========eER .    . .   subq	%rax, %rax
-# CHECK-NEXT: [5,1]     .D==========eER.    . .   addq	%rax, %rax
-# CHECK-NEXT: [6,0]     . D==========eER    . .   subq	%rax, %rax
-# CHECK-NEXT: [6,1]     . D===========eER   . .   addq	%rax, %rax
-# CHECK-NEXT: [7,0]     . D============eER  . .   subq	%rax, %rax
-# CHECK-NEXT: [7,1]     . D=============eER . .   addq	%rax, %rax
-# CHECK-NEXT: [8,0]     . D==============eER. .   subq	%rax, %rax
-# CHECK-NEXT: [8,1]     . D===============eER .   addq	%rax, %rax
-# CHECK-NEXT: [9,0]     .  D===============eER.   subq	%rax, %rax
-# CHECK-NEXT: [9,1]     .  D================eER   addq	%rax, %rax
+# CHECK-NEXT: Index     0123456
+
+# CHECK:      [0,0]     DR   ..   subq	%rax, %rax
+# CHECK-NEXT: [0,1]     DeER ..   addq	%rax, %rax
+# CHECK-NEXT: [1,0]     D--R ..   subq	%rax, %rax
+# CHECK-NEXT: [1,1]     DeER ..   addq	%rax, %rax
+# CHECK-NEXT: [2,0]     D--R ..   subq	%rax, %rax
+# CHECK-NEXT: [2,1]     DeER ..   addq	%rax, %rax
+# CHECK-NEXT: [3,0]     .D-R ..   subq	%rax, %rax
+# CHECK-NEXT: [3,1]     .DeER..   addq	%rax, %rax
+# CHECK-NEXT: [4,0]     .D--R..   subq	%rax, %rax
+# CHECK-NEXT: [4,1]     .DeER..   addq	%rax, %rax
+# CHECK-NEXT: [5,0]     .D--R..   subq	%rax, %rax
+# CHECK-NEXT: [5,1]     .DeER..   addq	%rax, %rax
+# CHECK-NEXT: [6,0]     . D-R..   subq	%rax, %rax
+# CHECK-NEXT: [6,1]     . DeER.   addq	%rax, %rax
+# CHECK-NEXT: [7,0]     . D--R.   subq	%rax, %rax
+# CHECK-NEXT: [7,1]     . DeER.   addq	%rax, %rax
+# CHECK-NEXT: [8,0]     . D--R.   subq	%rax, %rax
+# CHECK-NEXT: [8,1]     . DeER.   addq	%rax, %rax
+# CHECK-NEXT: [9,0]     .  D-R.   subq	%rax, %rax
+# CHECK-NEXT: [9,1]     .  DeER   addq	%rax, %rax
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -449,6 +445,6 @@ addq %rax, %rax
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     10    8.8    0.1    0.0       subq	%rax, %rax
-# CHECK-NEXT: 1.     10    9.8    0.0    0.0       addq	%rax, %rax
-# CHECK-NEXT:        10    9.3    0.1    0.0       <total>
+# CHECK-NEXT: 0.     10    0.0    0.0    1.5       subq	%rax, %rax
+# CHECK-NEXT: 1.     10    1.0    1.0    0.0       addq	%rax, %rax
+# CHECK-NEXT:        10    0.5    0.5    0.8       <total>


        


More information about the llvm-commits mailing list