[llvm] 6181427 - [X86][MCA] Add more tests for MULX (PR51495).
Andrea Di Biagio via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 25 13:29:54 PDT 2021
Author: Andrea Di Biagio
Date: 2021-08-25T21:28:21+01:00
New Revision: 6181427bb97fccde5e71323aef4524bf9b1376f4
URL: https://github.com/llvm/llvm-project/commit/6181427bb97fccde5e71323aef4524bf9b1376f4
DIFF: https://github.com/llvm/llvm-project/commit/6181427bb97fccde5e71323aef4524bf9b1376f4.diff
LOG: [X86][MCA] Add more tests for MULX (PR51495).
llvm-mca still reports a wrong latency for the case where
the two destination registers of MULX are the same.
Added:
llvm/test/tools/llvm-mca/X86/Haswell/mulx-lo-reg-use.s
llvm/test/tools/llvm-mca/X86/Haswell/mulx-same-regs.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-lo-reg-use.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-same-regs.s
Modified:
Removed:
################################################################################
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/mulx-lo-reg-use.s b/llvm/test/tools/llvm-mca/X86/Haswell/mulx-lo-reg-use.s
new file mode 100644
index 000000000000..180b97d688db
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/mulx-lo-reg-use.s
@@ -0,0 +1,126 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -timeline -iterations=2 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN
+mulxl %eax, %eax, %ecx
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+mulxq %rax, %rax, %rcx
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total uOps: 8
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 4 4 1.00 mulxl %eax, %eax, %ecx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - HWDivider
+# CHECK-NEXT: [1] - HWFPDivider
+# CHECK-NEXT: [2] - HWPort0
+# CHECK-NEXT: [3] - HWPort1
+# CHECK-NEXT: [4] - HWPort2
+# CHECK-NEXT: [5] - HWPort3
+# CHECK-NEXT: [6] - HWPort4
+# CHECK-NEXT: [7] - HWPort5
+# CHECK-NEXT: [8] - HWPort6
+# CHECK-NEXT: [9] - HWPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - 0.50 1.00 - - - 0.50 1.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - 0.50 1.00 - - - 0.50 1.00 - mulxl %eax, %eax, %ecx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . mulxl %eax, %eax, %ecx
+# CHECK-NEXT: [1,0] .D==eeeeER mulxl %eax, %eax, %ecx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 2.0 0.5 0.0 mulxl %eax, %eax, %ecx
+
+# CHECK: [1] Code Region
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total uOps: 6
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rax, %rcx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - HWDivider
+# CHECK-NEXT: [1] - HWFPDivider
+# CHECK-NEXT: [2] - HWPort0
+# CHECK-NEXT: [3] - HWPort1
+# CHECK-NEXT: [4] - HWPort2
+# CHECK-NEXT: [5] - HWPort3
+# CHECK-NEXT: [6] - HWPort4
+# CHECK-NEXT: [7] - HWPort5
+# CHECK-NEXT: [8] - HWPort6
+# CHECK-NEXT: [9] - HWPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - - 1.00 - - - - 1.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - 1.00 - mulxq %rax, %rax, %rcx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . mulxq %rax, %rax, %rcx
+# CHECK-NEXT: [1,0] .D==eeeeER mulxq %rax, %rax, %rcx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 2.0 0.5 0.0 mulxq %rax, %rax, %rcx
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/mulx-same-regs.s b/llvm/test/tools/llvm-mca/X86/Haswell/mulx-same-regs.s
new file mode 100644
index 000000000000..b83ddded6957
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/mulx-same-regs.s
@@ -0,0 +1,129 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -timeline -iterations=2 < %s | FileCheck %s
+
+# PR51495: If the two destination registers are the same, the destination will
+# contain teh high half of the multiplication result.
+
+# LLVM-MCA-BEGIN
+mulxl %eax, %eax, %eax
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+mulxq %rax, %rax, %rax
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total uOps: 8
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 4 4 1.00 mulxl %eax, %eax, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - HWDivider
+# CHECK-NEXT: [1] - HWFPDivider
+# CHECK-NEXT: [2] - HWPort0
+# CHECK-NEXT: [3] - HWPort1
+# CHECK-NEXT: [4] - HWPort2
+# CHECK-NEXT: [5] - HWPort3
+# CHECK-NEXT: [6] - HWPort4
+# CHECK-NEXT: [7] - HWPort5
+# CHECK-NEXT: [8] - HWPort6
+# CHECK-NEXT: [9] - HWPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - 0.50 1.00 - - - 0.50 1.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - 0.50 1.00 - - - 0.50 1.00 - mulxl %eax, %eax, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . mulxl %eax, %eax, %eax
+# CHECK-NEXT: [1,0] .D==eeeeER mulxl %eax, %eax, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 2.0 0.5 0.0 mulxl %eax, %eax, %eax
+
+# CHECK: [1] Code Region
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total uOps: 6
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rax, %rax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - HWDivider
+# CHECK-NEXT: [1] - HWFPDivider
+# CHECK-NEXT: [2] - HWPort0
+# CHECK-NEXT: [3] - HWPort1
+# CHECK-NEXT: [4] - HWPort2
+# CHECK-NEXT: [5] - HWPort3
+# CHECK-NEXT: [6] - HWPort4
+# CHECK-NEXT: [7] - HWPort5
+# CHECK-NEXT: [8] - HWPort6
+# CHECK-NEXT: [9] - HWPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - - 1.00 - - - - 1.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - 1.00 - mulxq %rax, %rax, %rax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . mulxq %rax, %rax, %rax
+# CHECK-NEXT: [1,0] .D==eeeeER mulxq %rax, %rax, %rax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 2.0 0.5 0.0 mulxq %rax, %rax, %rax
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-lo-reg-use.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-lo-reg-use.s
new file mode 100644
index 000000000000..cf200b39eeca
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-lo-reg-use.s
@@ -0,0 +1,126 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -iterations=2 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN
+mulxl %eax, %eax, %ecx
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+mulxq %rax, %rax, %rcx
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total uOps: 8
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 4 4 1.00 mulxl %eax, %eax, %ecx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SKLDivider
+# CHECK-NEXT: [1] - SKLFPDivider
+# CHECK-NEXT: [2] - SKLPort0
+# CHECK-NEXT: [3] - SKLPort1
+# CHECK-NEXT: [4] - SKLPort2
+# CHECK-NEXT: [5] - SKLPort3
+# CHECK-NEXT: [6] - SKLPort4
+# CHECK-NEXT: [7] - SKLPort5
+# CHECK-NEXT: [8] - SKLPort6
+# CHECK-NEXT: [9] - SKLPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - 0.50 1.00 - - - 0.50 1.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - 0.50 1.00 - - - 0.50 1.00 - mulxl %eax, %eax, %ecx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . mulxl %eax, %eax, %ecx
+# CHECK-NEXT: [1,0] .D==eeeeER mulxl %eax, %eax, %ecx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 2.0 0.5 0.0 mulxl %eax, %eax, %ecx
+
+# CHECK: [1] Code Region
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total uOps: 6
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rax, %rcx
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SKLDivider
+# CHECK-NEXT: [1] - SKLFPDivider
+# CHECK-NEXT: [2] - SKLPort0
+# CHECK-NEXT: [3] - SKLPort1
+# CHECK-NEXT: [4] - SKLPort2
+# CHECK-NEXT: [5] - SKLPort3
+# CHECK-NEXT: [6] - SKLPort4
+# CHECK-NEXT: [7] - SKLPort5
+# CHECK-NEXT: [8] - SKLPort6
+# CHECK-NEXT: [9] - SKLPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - - 1.00 - - - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - 1.00 - - mulxq %rax, %rax, %rcx
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . mulxq %rax, %rax, %rcx
+# CHECK-NEXT: [1,0] D===eeeeER mulxq %rax, %rax, %rcx
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxq %rax, %rax, %rcx
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-same-regs.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-same-regs.s
new file mode 100644
index 000000000000..34b1ef351d8e
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-same-regs.s
@@ -0,0 +1,129 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -iterations=2 < %s | FileCheck %s
+
+# PR51495: If the two destination registers are the same, the destination will
+# contain teh high half of the multiplication result.
+
+# LLVM-MCA-BEGIN
+mulxl %eax, %eax, %eax
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+mulxq %rax, %rax, %rax
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total uOps: 8
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 0.80
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 4 4 1.00 mulxl %eax, %eax, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SKLDivider
+# CHECK-NEXT: [1] - SKLFPDivider
+# CHECK-NEXT: [2] - SKLPort0
+# CHECK-NEXT: [3] - SKLPort1
+# CHECK-NEXT: [4] - SKLPort2
+# CHECK-NEXT: [5] - SKLPort3
+# CHECK-NEXT: [6] - SKLPort4
+# CHECK-NEXT: [7] - SKLPort5
+# CHECK-NEXT: [8] - SKLPort6
+# CHECK-NEXT: [9] - SKLPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - 0.50 1.00 - - - 0.50 1.00 -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - 0.50 1.00 - - - 0.50 1.00 - mulxl %eax, %eax, %eax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . mulxl %eax, %eax, %eax
+# CHECK-NEXT: [1,0] .D==eeeeER mulxl %eax, %eax, %eax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 2.0 0.5 0.0 mulxl %eax, %eax, %eax
+
+# CHECK: [1] Code Region
+
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total uOps: 6
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rax, %rax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SKLDivider
+# CHECK-NEXT: [1] - SKLFPDivider
+# CHECK-NEXT: [2] - SKLPort0
+# CHECK-NEXT: [3] - SKLPort1
+# CHECK-NEXT: [4] - SKLPort2
+# CHECK-NEXT: [5] - SKLPort3
+# CHECK-NEXT: [6] - SKLPort4
+# CHECK-NEXT: [7] - SKLPort5
+# CHECK-NEXT: [8] - SKLPort6
+# CHECK-NEXT: [9] - SKLPort7
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+# CHECK-NEXT: - - - 1.00 - - - 1.00 - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - 1.00 - - mulxq %rax, %rax, %rax
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . mulxq %rax, %rax, %rax
+# CHECK-NEXT: [1,0] D===eeeeER mulxq %rax, %rax, %rax
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxq %rax, %rax, %rax
More information about the llvm-commits
mailing list