[llvm] eed8552 - [X86] AMD Zen 3: same-register XOR/SUB are GPR dependency breaking zero-idioms
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Sun May 9 14:04:06 PDT 2021
Author: Roman Lebedev
Date: 2021-05-10T00:03:20+03:00
New Revision: eed8552787d8e2e7c4fd257a8b5ddd78682a55fa
URL: https://github.com/llvm/llvm-project/commit/eed8552787d8e2e7c4fd257a8b5ddd78682a55fa
DIFF: https://github.com/llvm/llvm-project/commit/eed8552787d8e2e7c4fd257a8b5ddd78682a55fa.diff
LOG: [X86] AMD Zen 3: same-register XOR/SUB are GPR dependency breaking zero-idioms
As measured by exegesis and confirmed in reference docs.
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleZnver3.td
llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index e3413c1d0912..f4a0328a542f 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -1443,12 +1443,12 @@ defm : Zn3WriteResInt<WriteNop, [Zn3ALU0123], 0, [1], 1>; // FIXME: latency not
// Zero Cycle Move
///////////////////////////////////////////////////////////////////////////////
-def Zn3WriteMoveRenameable : SchedWriteRes<[]> {
+def Zn3WriteZeroLatency : SchedWriteRes<[]> {
let Latency = 0;
let ResourceCycles = [];
let NumMicroOps = 1;
}
-def : InstRW<[Zn3WriteMoveRenameable], (instrs MOV32rr, MOV32rr_REV,
+def : InstRW<[Zn3WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV,
MOV64rr, MOV64rr_REV,
MOVSX32rr32)>;
@@ -1508,4 +1508,25 @@ def : IsOptimizableRegisterMove<[
], TruePred >
]>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def Zn3WriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn3WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[Zn3WriteZeroIdiom], (instrs XOR32rr, XOR32rr_REV,
+ XOR64rr, XOR64rr_REV,
+ SUB32rr, SUB32rr_REV,
+ SUB64rr, SUB64rr_REV)>;
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ XOR32rr, XOR32rr_REV,
+ XOR64rr, XOR64rr_REV,
+ SUB32rr, SUB32rr_REV,
+ SUB64rr, SUB64rr_REV ], ZeroIdiomPredicate>,
+]>;
+
} // SchedModel
diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s
index 875a267e8a1b..018adc261b08 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -timeline -register-file-stats -iterations=500 < %s | FileCheck %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -timeline -register-file-stats -iterations=1000 < %s | FileCheck %s
# LLVM-MCA-BEGIN
xorl %eax, %eax
@@ -23,15 +23,15 @@ addq %rax, %rax
# CHECK: [0] Code Region
-# CHECK: Iterations: 500
-# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 1003
-# CHECK-NEXT: Total uOps: 1000
+# CHECK: Iterations: 1000
+# CHECK-NEXT: Instructions: 2000
+# CHECK-NEXT: Total Cycles: 337
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 1.00
-# CHECK-NEXT: IPC: 1.00
-# CHECK-NEXT: Block RThroughput: 0.5
+# CHECK-NEXT: uOps Per Cycle: 5.93
+# CHECK-NEXT: IPC: 5.93
+# CHECK-NEXT: Block RThroughput: 0.3
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -42,12 +42,12 @@ addq %rax, %rax
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 1 0.25 xorl %eax, %eax
+# CHECK-NEXT: 1 0 0.17 xorl %eax, %eax
# CHECK-NEXT: 1 1 0.25 addl %eax, %eax
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 2000
-# CHECK-NEXT: Max number of mappings used: 192
+# CHECK-NEXT: Max number of mappings used: 18
# CHECK: * Register File #1 -- Zn3FpPRF:
# CHECK-NEXT: Number of physical registers: 160
@@ -57,7 +57,7 @@ addq %rax, %rax
# CHECK: * Register File #2 -- Zn3IntegerPRF:
# CHECK-NEXT: Number of physical registers: 192
# CHECK-NEXT: Total number of mappings created: 2000
-# CHECK-NEXT: Max number of mappings used: 192
+# CHECK-NEXT: Max number of mappings used: 18
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn3AGU0
@@ -86,37 +86,36 @@ addq %rax, %rax
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - -
+# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - - - 0.50 - 0.50 - - - - - - - - - - - - - - - - xorl %eax, %eax
-# CHECK-NEXT: - - - 0.50 - 0.50 - - - - - - - - - - - - - - - - - addl %eax, %eax
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - xorl %eax, %eax
+# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addl %eax, %eax
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 012
-
-# CHECK: [0,0] DeER . . . . . xorl %eax, %eax
-# CHECK-NEXT: [0,1] D=eER. . . . . addl %eax, %eax
-# CHECK-NEXT: [1,0] D==eER . . . . xorl %eax, %eax
-# CHECK-NEXT: [1,1] D===eER . . . . addl %eax, %eax
-# CHECK-NEXT: [2,0] D====eER . . . . xorl %eax, %eax
-# CHECK-NEXT: [2,1] D=====eER . . . . addl %eax, %eax
-# CHECK-NEXT: [3,0] .D=====eER. . . . xorl %eax, %eax
-# CHECK-NEXT: [3,1] .D======eER . . . addl %eax, %eax
-# CHECK-NEXT: [4,0] .D=======eER . . . xorl %eax, %eax
-# CHECK-NEXT: [4,1] .D========eER . . . addl %eax, %eax
-# CHECK-NEXT: [5,0] .D=========eER . . . xorl %eax, %eax
-# CHECK-NEXT: [5,1] .D==========eER. . . addl %eax, %eax
-# CHECK-NEXT: [6,0] . D==========eER . . xorl %eax, %eax
-# CHECK-NEXT: [6,1] . D===========eER . . addl %eax, %eax
-# CHECK-NEXT: [7,0] . D============eER . . xorl %eax, %eax
-# CHECK-NEXT: [7,1] . D=============eER . . addl %eax, %eax
-# CHECK-NEXT: [8,0] . D==============eER. . xorl %eax, %eax
-# CHECK-NEXT: [8,1] . D===============eER . addl %eax, %eax
-# CHECK-NEXT: [9,0] . D===============eER. xorl %eax, %eax
-# CHECK-NEXT: [9,1] . D================eER addl %eax, %eax
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DR .. xorl %eax, %eax
+# CHECK-NEXT: [0,1] DeER .. addl %eax, %eax
+# CHECK-NEXT: [1,0] D--R .. xorl %eax, %eax
+# CHECK-NEXT: [1,1] DeER .. addl %eax, %eax
+# CHECK-NEXT: [2,0] D--R .. xorl %eax, %eax
+# CHECK-NEXT: [2,1] DeER .. addl %eax, %eax
+# CHECK-NEXT: [3,0] .D-R .. xorl %eax, %eax
+# CHECK-NEXT: [3,1] .DeER.. addl %eax, %eax
+# CHECK-NEXT: [4,0] .D--R.. xorl %eax, %eax
+# CHECK-NEXT: [4,1] .DeER.. addl %eax, %eax
+# CHECK-NEXT: [5,0] .D--R.. xorl %eax, %eax
+# CHECK-NEXT: [5,1] .DeER.. addl %eax, %eax
+# CHECK-NEXT: [6,0] . D-R.. xorl %eax, %eax
+# CHECK-NEXT: [6,1] . DeER. addl %eax, %eax
+# CHECK-NEXT: [7,0] . D--R. xorl %eax, %eax
+# CHECK-NEXT: [7,1] . DeER. addl %eax, %eax
+# CHECK-NEXT: [8,0] . D--R. xorl %eax, %eax
+# CHECK-NEXT: [8,1] . DeER. addl %eax, %eax
+# CHECK-NEXT: [9,0] . D-R. xorl %eax, %eax
+# CHECK-NEXT: [9,1] . DeER addl %eax, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -125,21 +124,21 @@ addq %rax, %rax
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 10 8.8 0.1 0.0 xorl %eax, %eax
-# CHECK-NEXT: 1. 10 9.8 0.0 0.0 addl %eax, %eax
-# CHECK-NEXT: 10 9.3 0.1 0.0 <total>
+# CHECK-NEXT: 0. 10 0.0 0.0 1.5 xorl %eax, %eax
+# CHECK-NEXT: 1. 10 1.0 1.0 0.0 addl %eax, %eax
+# CHECK-NEXT: 10 0.5 0.5 0.8 <total>
# CHECK: [1] Code Region
-# CHECK: Iterations: 500
-# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 1003
-# CHECK-NEXT: Total uOps: 1000
+# CHECK: Iterations: 1000
+# CHECK-NEXT: Instructions: 2000
+# CHECK-NEXT: Total Cycles: 337
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 1.00
-# CHECK-NEXT: IPC: 1.00
-# CHECK-NEXT: Block RThroughput: 0.5
+# CHECK-NEXT: uOps Per Cycle: 5.93
+# CHECK-NEXT: IPC: 5.93
+# CHECK-NEXT: Block RThroughput: 0.3
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -150,12 +149,12 @@ addq %rax, %rax
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 1 0.25 xorq %rax, %rax
+# CHECK-NEXT: 1 0 0.17 xorq %rax, %rax
# CHECK-NEXT: 1 1 0.25 addq %rax, %rax
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 2000
-# CHECK-NEXT: Max number of mappings used: 192
+# CHECK-NEXT: Max number of mappings used: 18
# CHECK: * Register File #1 -- Zn3FpPRF:
# CHECK-NEXT: Number of physical registers: 160
@@ -165,7 +164,7 @@ addq %rax, %rax
# CHECK: * Register File #2 -- Zn3IntegerPRF:
# CHECK-NEXT: Number of physical registers: 192
# CHECK-NEXT: Total number of mappings created: 2000
-# CHECK-NEXT: Max number of mappings used: 192
+# CHECK-NEXT: Max number of mappings used: 18
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn3AGU0
@@ -194,37 +193,36 @@ addq %rax, %rax
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - -
+# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - - - 0.50 - 0.50 - - - - - - - - - - - - - - - - xorq %rax, %rax
-# CHECK-NEXT: - - - 0.50 - 0.50 - - - - - - - - - - - - - - - - - addq %rax, %rax
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - xorq %rax, %rax
+# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addq %rax, %rax
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 012
-
-# CHECK: [0,0] DeER . . . . . xorq %rax, %rax
-# CHECK-NEXT: [0,1] D=eER. . . . . addq %rax, %rax
-# CHECK-NEXT: [1,0] D==eER . . . . xorq %rax, %rax
-# CHECK-NEXT: [1,1] D===eER . . . . addq %rax, %rax
-# CHECK-NEXT: [2,0] D====eER . . . . xorq %rax, %rax
-# CHECK-NEXT: [2,1] D=====eER . . . . addq %rax, %rax
-# CHECK-NEXT: [3,0] .D=====eER. . . . xorq %rax, %rax
-# CHECK-NEXT: [3,1] .D======eER . . . addq %rax, %rax
-# CHECK-NEXT: [4,0] .D=======eER . . . xorq %rax, %rax
-# CHECK-NEXT: [4,1] .D========eER . . . addq %rax, %rax
-# CHECK-NEXT: [5,0] .D=========eER . . . xorq %rax, %rax
-# CHECK-NEXT: [5,1] .D==========eER. . . addq %rax, %rax
-# CHECK-NEXT: [6,0] . D==========eER . . xorq %rax, %rax
-# CHECK-NEXT: [6,1] . D===========eER . . addq %rax, %rax
-# CHECK-NEXT: [7,0] . D============eER . . xorq %rax, %rax
-# CHECK-NEXT: [7,1] . D=============eER . . addq %rax, %rax
-# CHECK-NEXT: [8,0] . D==============eER. . xorq %rax, %rax
-# CHECK-NEXT: [8,1] . D===============eER . addq %rax, %rax
-# CHECK-NEXT: [9,0] . D===============eER. xorq %rax, %rax
-# CHECK-NEXT: [9,1] . D================eER addq %rax, %rax
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DR .. xorq %rax, %rax
+# CHECK-NEXT: [0,1] DeER .. addq %rax, %rax
+# CHECK-NEXT: [1,0] D--R .. xorq %rax, %rax
+# CHECK-NEXT: [1,1] DeER .. addq %rax, %rax
+# CHECK-NEXT: [2,0] D--R .. xorq %rax, %rax
+# CHECK-NEXT: [2,1] DeER .. addq %rax, %rax
+# CHECK-NEXT: [3,0] .D-R .. xorq %rax, %rax
+# CHECK-NEXT: [3,1] .DeER.. addq %rax, %rax
+# CHECK-NEXT: [4,0] .D--R.. xorq %rax, %rax
+# CHECK-NEXT: [4,1] .DeER.. addq %rax, %rax
+# CHECK-NEXT: [5,0] .D--R.. xorq %rax, %rax
+# CHECK-NEXT: [5,1] .DeER.. addq %rax, %rax
+# CHECK-NEXT: [6,0] . D-R.. xorq %rax, %rax
+# CHECK-NEXT: [6,1] . DeER. addq %rax, %rax
+# CHECK-NEXT: [7,0] . D--R. xorq %rax, %rax
+# CHECK-NEXT: [7,1] . DeER. addq %rax, %rax
+# CHECK-NEXT: [8,0] . D--R. xorq %rax, %rax
+# CHECK-NEXT: [8,1] . DeER. addq %rax, %rax
+# CHECK-NEXT: [9,0] . D-R. xorq %rax, %rax
+# CHECK-NEXT: [9,1] . DeER addq %rax, %rax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -233,21 +231,21 @@ addq %rax, %rax
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 10 8.8 0.1 0.0 xorq %rax, %rax
-# CHECK-NEXT: 1. 10 9.8 0.0 0.0 addq %rax, %rax
-# CHECK-NEXT: 10 9.3 0.1 0.0 <total>
+# CHECK-NEXT: 0. 10 0.0 0.0 1.5 xorq %rax, %rax
+# CHECK-NEXT: 1. 10 1.0 1.0 0.0 addq %rax, %rax
+# CHECK-NEXT: 10 0.5 0.5 0.8 <total>
# CHECK: [2] Code Region
-# CHECK: Iterations: 500
-# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 1003
-# CHECK-NEXT: Total uOps: 1000
+# CHECK: Iterations: 1000
+# CHECK-NEXT: Instructions: 2000
+# CHECK-NEXT: Total Cycles: 337
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 1.00
-# CHECK-NEXT: IPC: 1.00
-# CHECK-NEXT: Block RThroughput: 0.5
+# CHECK-NEXT: uOps Per Cycle: 5.93
+# CHECK-NEXT: IPC: 5.93
+# CHECK-NEXT: Block RThroughput: 0.3
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -258,12 +256,12 @@ addq %rax, %rax
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 1 0.25 subl %eax, %eax
+# CHECK-NEXT: 1 0 0.17 subl %eax, %eax
# CHECK-NEXT: 1 1 0.25 addl %eax, %eax
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 2000
-# CHECK-NEXT: Max number of mappings used: 192
+# CHECK-NEXT: Max number of mappings used: 18
# CHECK: * Register File #1 -- Zn3FpPRF:
# CHECK-NEXT: Number of physical registers: 160
@@ -273,7 +271,7 @@ addq %rax, %rax
# CHECK: * Register File #2 -- Zn3IntegerPRF:
# CHECK-NEXT: Number of physical registers: 192
# CHECK-NEXT: Total number of mappings created: 2000
-# CHECK-NEXT: Max number of mappings used: 192
+# CHECK-NEXT: Max number of mappings used: 18
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn3AGU0
@@ -302,37 +300,36 @@ addq %rax, %rax
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - -
+# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - - - 0.50 - 0.50 - - - - - - - - - - - - - - - - subl %eax, %eax
-# CHECK-NEXT: - - - 0.50 - 0.50 - - - - - - - - - - - - - - - - - addl %eax, %eax
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - subl %eax, %eax
+# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addl %eax, %eax
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 012
-
-# CHECK: [0,0] DeER . . . . . subl %eax, %eax
-# CHECK-NEXT: [0,1] D=eER. . . . . addl %eax, %eax
-# CHECK-NEXT: [1,0] D==eER . . . . subl %eax, %eax
-# CHECK-NEXT: [1,1] D===eER . . . . addl %eax, %eax
-# CHECK-NEXT: [2,0] D====eER . . . . subl %eax, %eax
-# CHECK-NEXT: [2,1] D=====eER . . . . addl %eax, %eax
-# CHECK-NEXT: [3,0] .D=====eER. . . . subl %eax, %eax
-# CHECK-NEXT: [3,1] .D======eER . . . addl %eax, %eax
-# CHECK-NEXT: [4,0] .D=======eER . . . subl %eax, %eax
-# CHECK-NEXT: [4,1] .D========eER . . . addl %eax, %eax
-# CHECK-NEXT: [5,0] .D=========eER . . . subl %eax, %eax
-# CHECK-NEXT: [5,1] .D==========eER. . . addl %eax, %eax
-# CHECK-NEXT: [6,0] . D==========eER . . subl %eax, %eax
-# CHECK-NEXT: [6,1] . D===========eER . . addl %eax, %eax
-# CHECK-NEXT: [7,0] . D============eER . . subl %eax, %eax
-# CHECK-NEXT: [7,1] . D=============eER . . addl %eax, %eax
-# CHECK-NEXT: [8,0] . D==============eER. . subl %eax, %eax
-# CHECK-NEXT: [8,1] . D===============eER . addl %eax, %eax
-# CHECK-NEXT: [9,0] . D===============eER. subl %eax, %eax
-# CHECK-NEXT: [9,1] . D================eER addl %eax, %eax
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DR .. subl %eax, %eax
+# CHECK-NEXT: [0,1] DeER .. addl %eax, %eax
+# CHECK-NEXT: [1,0] D--R .. subl %eax, %eax
+# CHECK-NEXT: [1,1] DeER .. addl %eax, %eax
+# CHECK-NEXT: [2,0] D--R .. subl %eax, %eax
+# CHECK-NEXT: [2,1] DeER .. addl %eax, %eax
+# CHECK-NEXT: [3,0] .D-R .. subl %eax, %eax
+# CHECK-NEXT: [3,1] .DeER.. addl %eax, %eax
+# CHECK-NEXT: [4,0] .D--R.. subl %eax, %eax
+# CHECK-NEXT: [4,1] .DeER.. addl %eax, %eax
+# CHECK-NEXT: [5,0] .D--R.. subl %eax, %eax
+# CHECK-NEXT: [5,1] .DeER.. addl %eax, %eax
+# CHECK-NEXT: [6,0] . D-R.. subl %eax, %eax
+# CHECK-NEXT: [6,1] . DeER. addl %eax, %eax
+# CHECK-NEXT: [7,0] . D--R. subl %eax, %eax
+# CHECK-NEXT: [7,1] . DeER. addl %eax, %eax
+# CHECK-NEXT: [8,0] . D--R. subl %eax, %eax
+# CHECK-NEXT: [8,1] . DeER. addl %eax, %eax
+# CHECK-NEXT: [9,0] . D-R. subl %eax, %eax
+# CHECK-NEXT: [9,1] . DeER addl %eax, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -341,21 +338,21 @@ addq %rax, %rax
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 10 8.8 0.1 0.0 subl %eax, %eax
-# CHECK-NEXT: 1. 10 9.8 0.0 0.0 addl %eax, %eax
-# CHECK-NEXT: 10 9.3 0.1 0.0 <total>
+# CHECK-NEXT: 0. 10 0.0 0.0 1.5 subl %eax, %eax
+# CHECK-NEXT: 1. 10 1.0 1.0 0.0 addl %eax, %eax
+# CHECK-NEXT: 10 0.5 0.5 0.8 <total>
# CHECK: [3] Code Region
-# CHECK: Iterations: 500
-# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 1003
-# CHECK-NEXT: Total uOps: 1000
+# CHECK: Iterations: 1000
+# CHECK-NEXT: Instructions: 2000
+# CHECK-NEXT: Total Cycles: 337
+# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 1.00
-# CHECK-NEXT: IPC: 1.00
-# CHECK-NEXT: Block RThroughput: 0.5
+# CHECK-NEXT: uOps Per Cycle: 5.93
+# CHECK-NEXT: IPC: 5.93
+# CHECK-NEXT: Block RThroughput: 0.3
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -366,12 +363,12 @@ addq %rax, %rax
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 1 0.25 subq %rax, %rax
+# CHECK-NEXT: 1 0 0.17 subq %rax, %rax
# CHECK-NEXT: 1 1 0.25 addq %rax, %rax
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 2000
-# CHECK-NEXT: Max number of mappings used: 192
+# CHECK-NEXT: Max number of mappings used: 18
# CHECK: * Register File #1 -- Zn3FpPRF:
# CHECK-NEXT: Number of physical registers: 160
@@ -381,7 +378,7 @@ addq %rax, %rax
# CHECK: * Register File #2 -- Zn3IntegerPRF:
# CHECK-NEXT: Number of physical registers: 192
# CHECK-NEXT: Total number of mappings created: 2000
-# CHECK-NEXT: Max number of mappings used: 192
+# CHECK-NEXT: Max number of mappings used: 18
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn3AGU0
@@ -410,37 +407,36 @@ addq %rax, %rax
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - -
+# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - - - 0.50 - 0.50 - - - - - - - - - - - - - - - - subq %rax, %rax
-# CHECK-NEXT: - - - 0.50 - 0.50 - - - - - - - - - - - - - - - - - addq %rax, %rax
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - subq %rax, %rax
+# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addq %rax, %rax
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 012
-
-# CHECK: [0,0] DeER . . . . . subq %rax, %rax
-# CHECK-NEXT: [0,1] D=eER. . . . . addq %rax, %rax
-# CHECK-NEXT: [1,0] D==eER . . . . subq %rax, %rax
-# CHECK-NEXT: [1,1] D===eER . . . . addq %rax, %rax
-# CHECK-NEXT: [2,0] D====eER . . . . subq %rax, %rax
-# CHECK-NEXT: [2,1] D=====eER . . . . addq %rax, %rax
-# CHECK-NEXT: [3,0] .D=====eER. . . . subq %rax, %rax
-# CHECK-NEXT: [3,1] .D======eER . . . addq %rax, %rax
-# CHECK-NEXT: [4,0] .D=======eER . . . subq %rax, %rax
-# CHECK-NEXT: [4,1] .D========eER . . . addq %rax, %rax
-# CHECK-NEXT: [5,0] .D=========eER . . . subq %rax, %rax
-# CHECK-NEXT: [5,1] .D==========eER. . . addq %rax, %rax
-# CHECK-NEXT: [6,0] . D==========eER . . subq %rax, %rax
-# CHECK-NEXT: [6,1] . D===========eER . . addq %rax, %rax
-# CHECK-NEXT: [7,0] . D============eER . . subq %rax, %rax
-# CHECK-NEXT: [7,1] . D=============eER . . addq %rax, %rax
-# CHECK-NEXT: [8,0] . D==============eER. . subq %rax, %rax
-# CHECK-NEXT: [8,1] . D===============eER . addq %rax, %rax
-# CHECK-NEXT: [9,0] . D===============eER. subq %rax, %rax
-# CHECK-NEXT: [9,1] . D================eER addq %rax, %rax
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DR .. subq %rax, %rax
+# CHECK-NEXT: [0,1] DeER .. addq %rax, %rax
+# CHECK-NEXT: [1,0] D--R .. subq %rax, %rax
+# CHECK-NEXT: [1,1] DeER .. addq %rax, %rax
+# CHECK-NEXT: [2,0] D--R .. subq %rax, %rax
+# CHECK-NEXT: [2,1] DeER .. addq %rax, %rax
+# CHECK-NEXT: [3,0] .D-R .. subq %rax, %rax
+# CHECK-NEXT: [3,1] .DeER.. addq %rax, %rax
+# CHECK-NEXT: [4,0] .D--R.. subq %rax, %rax
+# CHECK-NEXT: [4,1] .DeER.. addq %rax, %rax
+# CHECK-NEXT: [5,0] .D--R.. subq %rax, %rax
+# CHECK-NEXT: [5,1] .DeER.. addq %rax, %rax
+# CHECK-NEXT: [6,0] . D-R.. subq %rax, %rax
+# CHECK-NEXT: [6,1] . DeER. addq %rax, %rax
+# CHECK-NEXT: [7,0] . D--R. subq %rax, %rax
+# CHECK-NEXT: [7,1] . DeER. addq %rax, %rax
+# CHECK-NEXT: [8,0] . D--R. subq %rax, %rax
+# CHECK-NEXT: [8,1] . DeER. addq %rax, %rax
+# CHECK-NEXT: [9,0] . D-R. subq %rax, %rax
+# CHECK-NEXT: [9,1] . DeER addq %rax, %rax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -449,6 +445,6 @@ addq %rax, %rax
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 10 8.8 0.1 0.0 subq %rax, %rax
-# CHECK-NEXT: 1. 10 9.8 0.0 0.0 addq %rax, %rax
-# CHECK-NEXT: 10 9.3 0.1 0.0 <total>
+# CHECK-NEXT: 0. 10 0.0 0.0 1.5 subq %rax, %rax
+# CHECK-NEXT: 1. 10 1.0 1.0 0.0 addq %rax, %rax
+# CHECK-NEXT: 10 0.5 0.5 0.8 <total>
More information about the llvm-commits
mailing list