[llvm] r333095 - [llvm-mca] Print the "Block RThroughput" in the SummaryView.
Andrea Di Biagio via llvm-commits
llvm-commits at lists.llvm.org
Wed May 23 08:59:28 PDT 2018
Author: adibiagio
Date: Wed May 23 08:59:27 2018
New Revision: 333095
URL: http://llvm.org/viewvc/llvm-project?rev=333095&view=rev
Log:
[llvm-mca] Print the "Block RThroughput" in the SummaryView.
This patch implements the "block reciprocal throughput" computation in the
SummaryView.
The block reciprocal throughput is computed as the MAX of:
- NumMicroOps / DispatchWidth
- Resource Cycles / #Units (for every resource consumed).
The block throughput is bounded from above by the hardware dispatch throughput.
That is because the DispatchWidth is an upper bound on how many opcodes can be part
of a single dispatch group.
The block throughput is also limited by the amount of hardware parallelism. The
number of available resource units affects how the resource pressure is
distributed, and also how many blocks can be delivered every cycle.
Modified:
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/dot-product.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/simple-test.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
llvm/trunk/tools/llvm-mca/SummaryView.cpp
llvm/trunk/tools/llvm-mca/SummaryView.h
llvm/trunk/tools/llvm-mca/llvm-mca.cpp
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/dot-product.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/dot-product.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/dot-product.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/dot-product.s Wed May 23 08:59:27 2018
@@ -5,11 +5,12 @@ vmulps %xmm0, %xmm1, %xmm2
vhaddps %xmm2, %xmm2, %xmm3
vhaddps %xmm3, %xmm3, %xmm4
-# CHECK: Iterations: 300
-# CHECK-NEXT: Instructions: 900
-# CHECK-NEXT: Total Cycles: 610
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 1.48
+# CHECK: Iterations: 300
+# CHECK-NEXT: Instructions: 900
+# CHECK-NEXT: Total Cycles: 610
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 1.48
+# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
vshufps $0, %xmm0, %xmm1, %xmm1
vhaddps (%rdi), %xmm1, %xmm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 11
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.18
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 11
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
vshufps $0, %xmm0, %xmm1, %xmm1
vhaddps (%rdi), %ymm1, %ymm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 12
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.17
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 12
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s Wed May 23 08:59:27 2018
@@ -10,11 +10,12 @@ vhaddps %xmm3, %xmm3, %xmm4
# DISABLED-NOT: Instruction Info:
-# ENABLED: Iterations: 100
-# ENABLED-NEXT: Instructions: 300
-# ENABLED-NEXT: Total Cycles: 209
-# ENABLED-NEXT: Dispatch Width: 2
-# ENABLED-NEXT: IPC: 1.44
+# ENABLED: Iterations: 100
+# ENABLED-NEXT: Instructions: 300
+# ENABLED-NEXT: Total Cycles: 209
+# ENABLED-NEXT: Dispatch Width: 2
+# ENABLED-NEXT: IPC: 1.44
+# ENABLED-NEXT: Block RThroughput: 2.0
# ENABLED: Instruction Info:
# ENABLED-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s Wed May 23 08:59:27 2018
@@ -10,11 +10,12 @@ vmovaps %xmm0, 32(%rdi)
vmovaps 48(%rsi), %xmm0
vmovaps %xmm0, 48(%rdi)
-# CHECK: Iterations: 100
-# CHECK-NEXT: Instructions: 800
-# CHECK-NEXT: Total Cycles: 2403
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.33
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 2403
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s Wed May 23 08:59:27 2018
@@ -10,11 +10,12 @@ vmovaps %xmm0, 32(%rdi)
vmovaps 48(%rsi), %xmm0
vmovaps %xmm0, 48(%rdi)
-# CHECK: Iterations: 100
-# CHECK-NEXT: Instructions: 800
-# CHECK-NEXT: Total Cycles: 408
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 1.96
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 408
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 1.96
+# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s Wed May 23 08:59:27 2018
@@ -17,11 +17,12 @@ vsqrtps %xmm0, %xmm2
vaddps %ymm0, %ymm1, %ymm2
vsqrtps %ymm0, %ymm2
-# CHECK: Iterations: 100
-# CHECK-NEXT: Instructions: 800
-# CHECK-NEXT: Total Cycles: 6306
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.13
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 800
+# CHECK-NEXT: Total Cycles: 6306
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.13
+# CHECK-NEXT: Block RThroughput: 63.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s Wed May 23 08:59:27 2018
@@ -18,11 +18,12 @@
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 16
-# CHECK-NEXT: Total Cycles: 31
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.52
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 16
+# CHECK-NEXT: Total Cycles: 31
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.52
+# CHECK-NEXT: Block RThroughput: 21.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s Wed May 23 08:59:27 2018
@@ -8,11 +8,12 @@
vaddps %xmm0, %xmm0, %xmm1
vmulps (%rdi), %xmm1, %xmm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.20
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s Wed May 23 08:59:27 2018
@@ -7,11 +7,12 @@
# The second integer multiply can start at cycle 2 because the implicit reads
# can start after the load operand is evaluated.
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.20
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s Wed May 23 08:59:27 2018
@@ -5,11 +5,12 @@
add (%rsp), %rsi
add %rdx, %r8
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 3
-# CHECK-NEXT: Total Cycles: 7
-# CHECK-NEXT: Dispatch Width: 3
-# CHECK-NEXT: IPC: 0.43
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 7
+# CHECK-NEXT: Dispatch Width: 3
+# CHECK-NEXT: IPC: 0.43
+# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-1.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-1.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
vaddps %xmm0, %xmm0, %xmm0
vmulps %xmm0, %xmm0, %xmm0
-# CHECK: Iterations: 5
-# CHECK-NEXT: Instructions: 10
-# CHECK-NEXT: Total Cycles: 28
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.36
+# CHECK: Iterations: 5
+# CHECK-NEXT: Instructions: 10
+# CHECK-NEXT: Total Cycles: 28
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-2.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-2.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
vaddps %xmm0, %xmm0, %xmm0
vmulps %xmm0, %xmm0, %xmm0
-# CHECK: Iterations: 5
-# CHECK-NEXT: Instructions: 10
-# CHECK-NEXT: Total Cycles: 28
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.36
+# CHECK: Iterations: 5
+# CHECK-NEXT: Instructions: 10
+# CHECK-NEXT: Total Cycles: 28
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 13
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-3.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-3.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-3.s Wed May 23 08:59:27 2018
@@ -3,11 +3,12 @@
idiv %eax
-# CHECK: Iterations: 2
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 55
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.04
+# CHECK: Iterations: 2
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 55
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.04
+# CHECK-NEXT: Block RThroughput: 25.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-4.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-4.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-4.s Wed May 23 08:59:27 2018
@@ -3,11 +3,12 @@
idiv %eax
-# CHECK: Iterations: 22
-# CHECK-NEXT: Instructions: 22
-# CHECK-NEXT: Total Cycles: 553
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.04
+# CHECK: Iterations: 22
+# CHECK-NEXT: Instructions: 22
+# CHECK-NEXT: Total Cycles: 553
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.04
+# CHECK-NEXT: Block RThroughput: 25.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-5.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-5.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-5.s Wed May 23 08:59:27 2018
@@ -35,11 +35,12 @@
vaddps %ymm3, %ymm0, %ymm5
vaddps %ymm3, %ymm0, %ymm6
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 33
-# CHECK-NEXT: Total Cycles: 70
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.47
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 33
+# CHECK-NEXT: Total Cycles: 70
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.47
+# CHECK-NEXT: Block RThroughput: 64.0
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
vmulps (%rsi), %xmm0, %xmm0
add %rsi, %rsi
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.20
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/simple-test.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/simple-test.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/simple-test.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/simple-test.s Wed May 23 08:59:27 2018
@@ -3,11 +3,12 @@
add %edi, %eax
-# CHECK: Iterations: 100
-# CHECK-NEXT: Instructions: 100
-# CHECK-NEXT: Total Cycles: 103
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.97
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 100
+# CHECK-NEXT: Total Cycles: 103
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.97
+# CHECK-NEXT: Block RThroughput: 0.5
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
vaddps %xmm0, %xmm0, %xmm1
vandps (%rdi), %xmm1, %xmm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 9
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.22
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 9
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
vaddps %ymm0, %ymm0, %ymm1
vandps (%rdi), %ymm1, %ymm2
-# CHECK: Iterations: 1
-# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC: 0.20
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 2
+# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Dispatch Width: 2
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
Modified: llvm/trunk/tools/llvm-mca/SummaryView.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/SummaryView.cpp?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/SummaryView.cpp (original)
+++ llvm/trunk/tools/llvm-mca/SummaryView.cpp Wed May 23 08:59:27 2018
@@ -14,6 +14,8 @@
//===----------------------------------------------------------------------===//
#include "SummaryView.h"
+#include "Support.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Format.h"
namespace mca {
@@ -22,19 +24,83 @@ namespace mca {
using namespace llvm;
+void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
+ // We are only interested in the "instruction dispatched" events generated by
+ // the dispatch stage for instructions that are part of iteration #0.
+ if (Event.Type != HWInstructionEvent::Dispatched)
+ return;
+
+ if (Event.IR.getSourceIndex() >= Source.size())
+ return;
+
+ // Update the cumulative number of resource cycles based on the processor
+ // resource usage information available from the instruction descriptor. We need to
+ // compute the cumulative number of resource cycles for every processor
+ // resource which is consumed by an instruction of the block.
+ const Instruction &Inst = *Event.IR.getInstruction();
+ const InstrDesc &Desc = Inst.getDesc();
+ NumMicroOps += Desc.NumMicroOps;
+ for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
+ if (!RU.second.size())
+ continue;
+
+ assert(RU.second.NumUnits && "Expected more than one unit used!");
+ if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) {
+ ProcResourceUsage[RU.first] = RU.second.size();
+ continue;
+ }
+
+ ProcResourceUsage[RU.first] += RU.second.size();
+ }
+}
+
+double SummaryView::getBlockRThroughput() const {
+ assert(NumMicroOps && "Expected at least one micro opcode!");
+
+ SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds());
+ computeProcResourceMasks(SM, Masks);
+
+ // The block throughput is bounded from above by the hardware dispatch
+ // throughput. That is because the DispatchWidth is an upper bound on the
+ // number of opcodes that can be part of a single dispatch group.
+ double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
+
+ // The block throughput is also limited by the amount of hardware parallelism.
+ // The number of available resource units affects the resource pressure
+ // distributed, as well as how many blocks can be executed every cycle.
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ uint64_t Mask = Masks[I];
+ const auto It = ProcResourceUsage.find_as(Mask);
+ if (It != ProcResourceUsage.end()) {
+ const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
+ unsigned NumUnits = MCDesc.NumUnits;
+ double Throughput = static_cast<double>(It->second) / NumUnits;
+ Max = std::max(Max, Throughput);
+ }
+ }
+
+ // The block reciprocal throughput is computed as the MAX of:
+ // - (#uOps / DispatchWidth)
+ // - (#units / resource cycles) for every consumed processor resource.
+ return Max;
+}
+
void SummaryView::printView(raw_ostream &OS) const {
unsigned Iterations = Source.getNumIterations();
unsigned Instructions = Source.size();
unsigned TotalInstructions = Instructions * Iterations;
double IPC = (double)TotalInstructions / TotalCycles;
+ double BlockRThroughput = getBlockRThroughput();
std::string Buffer;
raw_string_ostream TempStream(Buffer);
- TempStream << "Iterations: " << Iterations;
- TempStream << "\nInstructions: " << TotalInstructions;
- TempStream << "\nTotal Cycles: " << TotalCycles;
- TempStream << "\nDispatch Width: " << DispatchWidth;
- TempStream << "\nIPC: " << format("%.2f", IPC) << '\n';
+ TempStream << "Iterations: " << Iterations;
+ TempStream << "\nInstructions: " << TotalInstructions;
+ TempStream << "\nTotal Cycles: " << TotalCycles;
+ TempStream << "\nDispatch Width: " << DispatchWidth;
+ TempStream << "\nIPC: " << format("%.2f", IPC);
+ TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput)
+ << '\n';
TempStream.flush();
OS << Buffer;
}
Modified: llvm/trunk/tools/llvm-mca/SummaryView.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/SummaryView.h?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/SummaryView.h (original)
+++ llvm/trunk/tools/llvm-mca/SummaryView.h Wed May 23 08:59:27 2018
@@ -14,12 +14,12 @@
/// performance throughput. Below is an example of summary view:
///
///
-/// Iterations: 300
-/// Instructions: 900
-/// Total Cycles: 610
-/// Dispatch Width: 2
-/// IPC: 1.48
-///
+/// Iterations: 300
+/// Instructions: 900
+/// Total Cycles: 610
+/// Dispatch Width: 2
+/// IPC: 1.48
+/// Block RThroughput: 2.0
///
/// The summary view collects a few performance numbers. The two main
/// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle).
@@ -31,22 +31,41 @@
#include "SourceMgr.h"
#include "View.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/raw_ostream.h"
namespace mca {
/// A view that collects and prints a few performance numbers.
class SummaryView : public View {
+ const llvm::MCSchedModel &SM;
const SourceMgr &Source;
const unsigned DispatchWidth;
unsigned TotalCycles;
+ // The total number of micro opcodes contributed by a block of instructions.
+ unsigned NumMicroOps;
+ // For each processor resource, this map stores the cumulative number of
+ // resource cycles consumed by a block of instructions. The resource mask ID
+ // is used as the key value to access elements of this map.
+ llvm::DenseMap<uint64_t, unsigned> ProcResourceUsage;
+
+ // Compute the reciprocal throughput for the analyzed code block.
+ // The reciprocal block throughput is computed as the MAX between:
+ // - NumMicroOps / DispatchWidth
+ // - Total Resource Cycles / #Units (for every resource consumed).
+ double getBlockRThroughput() const;
public:
- SummaryView(const SourceMgr &S, unsigned Width)
- : Source(S), DispatchWidth(Width), TotalCycles(0) {}
+ SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+ unsigned Width)
+ : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
+ NumMicroOps(0) {}
void onCycleEnd() override { ++TotalCycles; }
+ void onInstructionEvent(const HWInstructionEvent &Event) override;
+
void printView(llvm::raw_ostream &OS) const override;
};
} // namespace mca
Modified: llvm/trunk/tools/llvm-mca/llvm-mca.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/llvm-mca.cpp?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/llvm-mca.cpp (original)
+++ llvm/trunk/tools/llvm-mca/llvm-mca.cpp Wed May 23 08:59:27 2018
@@ -495,7 +495,7 @@ int main(int argc, char **argv) {
LoadQueueSize, StoreQueueSize, AssumeNoAlias);
mca::BackendPrinter Printer(B);
- Printer.addView(llvm::make_unique<mca::SummaryView>(S, Width));
+ Printer.addView(llvm::make_unique<mca::SummaryView>(SM, S, Width));
if (PrintInstructionInfoView)
Printer.addView(
llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP));
More information about the llvm-commits
mailing list