[llvm] r333095 - [llvm-mca] Print the "Block RThroughput" in the SummaryView.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Wed May 23 08:59:28 PDT 2018


Author: adibiagio
Date: Wed May 23 08:59:27 2018
New Revision: 333095

URL: http://llvm.org/viewvc/llvm-project?rev=333095&view=rev
Log:
[llvm-mca] Print the "Block RThroughput" in the SummaryView.

This patch implements the "block reciprocal throughput" computation in the
SummaryView.

The block reciprocal throughput is computed as the MAX of:
  - NumMicroOps / DispatchWidth
  - Resource Cycles / #Units   (for every resource consumed).

The block throughput is bounded from above by the hardware dispatch throughput.
That is because the DispatchWidth is an upper bound on how many opcodes can be part
of a single dispatch group.

The block throughput is also limited by the amount of hardware parallelism. The
number of available resource units affects how the resource pressure is
distributed, and also how many blocks can be delivered every cycle.

Modified:
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/dot-product.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/simple-test.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
    llvm/trunk/tools/llvm-mca/SummaryView.cpp
    llvm/trunk/tools/llvm-mca/SummaryView.h
    llvm/trunk/tools/llvm-mca/llvm-mca.cpp

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/dot-product.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/dot-product.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/dot-product.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/dot-product.s Wed May 23 08:59:27 2018
@@ -5,11 +5,12 @@ vmulps   %xmm0, %xmm1, %xmm2
 vhaddps  %xmm2, %xmm2, %xmm3
 vhaddps  %xmm3, %xmm3, %xmm4
 
-# CHECK:      Iterations:     300
-# CHECK-NEXT: Instructions:   900
-# CHECK-NEXT: Total Cycles:   610
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            1.48
+# CHECK:      Iterations:        300
+# CHECK-NEXT: Instructions:      900
+# CHECK-NEXT: Total Cycles:      610
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               1.48
+# CHECK-NEXT: Block RThroughput: 2.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
 vshufps $0, %xmm0, %xmm1, %xmm1
 vhaddps (%rdi), %xmm1, %xmm2
 
-# CHECK:      Iterations:     1
-# CHECK-NEXT: Instructions:   2
-# CHECK-NEXT: Total Cycles:   11
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.18
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      11
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.18
+# CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
 vshufps $0, %xmm0, %xmm1, %xmm1
 vhaddps (%rdi), %ymm1, %ymm2
 
-# CHECK:      Iterations:     1
-# CHECK-NEXT: Instructions:   2
-# CHECK-NEXT: Total Cycles:   12
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.17
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      12
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.17
+# CHECK-NEXT: Block RThroughput: 2.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/instruction-info-view.s Wed May 23 08:59:27 2018
@@ -10,11 +10,12 @@ vhaddps  %xmm3, %xmm3, %xmm4
 
 # DISABLED-NOT: Instruction Info:
 
-# ENABLED:      Iterations:     100
-# ENABLED-NEXT: Instructions:   300
-# ENABLED-NEXT: Total Cycles:   209
-# ENABLED-NEXT: Dispatch Width: 2
-# ENABLED-NEXT: IPC:            1.44
+# ENABLED:      Iterations:        100
+# ENABLED-NEXT: Instructions:      300
+# ENABLED-NEXT: Total Cycles:      209
+# ENABLED-NEXT: Dispatch Width:    2
+# ENABLED-NEXT: IPC:               1.44
+# ENABLED-NEXT: Block RThroughput: 2.0
 
 # ENABLED:      Instruction Info:
 # ENABLED-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s Wed May 23 08:59:27 2018
@@ -10,11 +10,12 @@ vmovaps %xmm0, 32(%rdi)
 vmovaps 48(%rsi), %xmm0
 vmovaps %xmm0, 48(%rdi)
 
-# CHECK:      Iterations:     100
-# CHECK-NEXT: Instructions:   800
-# CHECK-NEXT: Total Cycles:   2403
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.33
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      800
+# CHECK-NEXT: Total Cycles:      2403
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.33
+# CHECK-NEXT: Block RThroughput: 4.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s Wed May 23 08:59:27 2018
@@ -10,11 +10,12 @@ vmovaps %xmm0, 32(%rdi)
 vmovaps 48(%rsi), %xmm0
 vmovaps %xmm0, 48(%rdi)
 
-# CHECK:      Iterations:     100
-# CHECK-NEXT: Instructions:   800
-# CHECK-NEXT: Total Cycles:   408
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            1.96
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      800
+# CHECK-NEXT: Total Cycles:      408
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               1.96
+# CHECK-NEXT: Block RThroughput: 4.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s Wed May 23 08:59:27 2018
@@ -17,11 +17,12 @@ vsqrtps     %xmm0, %xmm2
 vaddps      %ymm0, %ymm1, %ymm2
 vsqrtps     %ymm0, %ymm2
 
-# CHECK:      Iterations:     100
-# CHECK-NEXT: Instructions:   800
-# CHECK-NEXT: Total Cycles:   6306
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.13
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      800
+# CHECK-NEXT: Total Cycles:      6306
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.13
+# CHECK-NEXT: Block RThroughput: 63.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/rcu-statistics.s Wed May 23 08:59:27 2018
@@ -18,11 +18,12 @@
   vaddps  %xmm0, %xmm1, %xmm2
   vaddps  %xmm0, %xmm1, %xmm2
 
-# CHECK:      Iterations:     1
-# CHECK-NEXT: Instructions:   16
-# CHECK-NEXT: Total Cycles:   31
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.52
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      16
+# CHECK-NEXT: Total Cycles:      31
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.52
+# CHECK-NEXT: Block RThroughput: 21.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s Wed May 23 08:59:27 2018
@@ -8,11 +8,12 @@
 vaddps  %xmm0, %xmm0, %xmm1
 vmulps  (%rdi), %xmm1, %xmm2
 
-# CHECK:      Iterations:     1
-# CHECK-NEXT: Instructions:   2
-# CHECK-NEXT: Total Cycles:   10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.20
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.20
+# CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s Wed May 23 08:59:27 2018
@@ -7,11 +7,12 @@
 # The second integer multiply can start at cycle 2 because the implicit reads
 # can start after the load operand is evaluated.
 
-# CHECK:      Iterations:     1
-# CHECK-NEXT: Instructions:   2
-# CHECK-NEXT: Total Cycles:   10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.20
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.20
+# CHECK-NEXT: Block RThroughput: 2.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s Wed May 23 08:59:27 2018
@@ -5,11 +5,12 @@
   add (%rsp), %rsi
   add %rdx, %r8
 
-# CHECK:      Iterations:     1
-# CHECK-NEXT: Instructions:   3
-# CHECK-NEXT: Total Cycles:   7
-# CHECK-NEXT: Dispatch Width: 3
-# CHECK-NEXT: IPC:            0.43
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      3
+# CHECK-NEXT: Total Cycles:      7
+# CHECK-NEXT: Dispatch Width:    3
+# CHECK-NEXT: IPC:               0.43
+# CHECK-NEXT: Block RThroughput: 1.5
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-1.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-1.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
 vaddps %xmm0, %xmm0, %xmm0
 vmulps %xmm0, %xmm0, %xmm0
 
-# CHECK:      Iterations:     5
-# CHECK-NEXT: Instructions:   10
-# CHECK-NEXT: Total Cycles:   28
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.36
+# CHECK:      Iterations:        5
+# CHECK-NEXT: Instructions:      10
+# CHECK-NEXT: Total Cycles:      28
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.36
+# CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Dynamic Dispatch Stall Cycles:
 # CHECK-NEXT: RAT     - Register unavailable:                      0

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-2.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-2.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
 vaddps %xmm0, %xmm0, %xmm0
 vmulps %xmm0, %xmm0, %xmm0
 
-# CHECK:      Iterations:     5
-# CHECK-NEXT: Instructions:   10
-# CHECK-NEXT: Total Cycles:   28
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.36
+# CHECK:      Iterations:        5
+# CHECK-NEXT: Instructions:      10
+# CHECK-NEXT: Total Cycles:      28
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.36
+# CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Dynamic Dispatch Stall Cycles:
 # CHECK-NEXT: RAT     - Register unavailable:                      13

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-3.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-3.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-3.s Wed May 23 08:59:27 2018
@@ -3,11 +3,12 @@
 
 idiv %eax
 
-# CHECK:      Iterations:     2
-# CHECK-NEXT: Instructions:   2
-# CHECK-NEXT: Total Cycles:   55
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.04
+# CHECK:      Iterations:        2
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      55
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.04
+# CHECK-NEXT: Block RThroughput: 25.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-4.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-4.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-4.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-4.s Wed May 23 08:59:27 2018
@@ -3,11 +3,12 @@
 
 idiv %eax
 
-# CHECK:      Iterations:     22
-# CHECK-NEXT: Instructions:   22
-# CHECK-NEXT: Total Cycles:   553
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.04
+# CHECK:      Iterations:        22
+# CHECK-NEXT: Instructions:      22
+# CHECK-NEXT: Total Cycles:      553
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.04
+# CHECK-NEXT: Block RThroughput: 25.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-5.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-5.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-5.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/register-files-5.s Wed May 23 08:59:27 2018
@@ -35,11 +35,12 @@
   vaddps %ymm3, %ymm0, %ymm5
   vaddps %ymm3, %ymm0, %ymm6
 
-# CHECK:      Iterations:     1
-# CHECK-NEXT: Instructions:   33
-# CHECK-NEXT: Total Cycles:   70
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.47
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      33
+# CHECK-NEXT: Total Cycles:      70
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.47
+# CHECK-NEXT: Block RThroughput: 64.0
 
 # CHECK:      Dynamic Dispatch Stall Cycles:
 # CHECK-NEXT: RAT     - Register unavailable:                      0

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
 vmulps (%rsi), %xmm0, %xmm0
 add  %rsi, %rsi
 
-# CHECK:      Iterations:     1
-# CHECK-NEXT: Instructions:   2
-# CHECK-NEXT: Total Cycles:   10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.20
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.20
+# CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/simple-test.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/simple-test.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/simple-test.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/simple-test.s Wed May 23 08:59:27 2018
@@ -3,11 +3,12 @@
 
 add %edi, %eax
 
-# CHECK:      Iterations:     100
-# CHECK-NEXT: Instructions:   100
-# CHECK-NEXT: Total Cycles:   103
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.97
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      100
+# CHECK-NEXT: Total Cycles:      103
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.97
+# CHECK-NEXT: Block RThroughput: 0.5
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
 vaddps %xmm0, %xmm0, %xmm1
 vandps (%rdi), %xmm1, %xmm2
 
-# CHECK:      Iterations:     1
-# CHECK-NEXT: Instructions:   2
-# CHECK-NEXT: Total Cycles:   9
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.22
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      9
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.22
+# CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s Wed May 23 08:59:27 2018
@@ -4,11 +4,12 @@
 vaddps %ymm0, %ymm0, %ymm1
 vandps (%rdi), %ymm1, %ymm2
 
-# CHECK:      Iterations:     1
-# CHECK-NEXT: Instructions:   2
-# CHECK-NEXT: Total Cycles:   10
-# CHECK-NEXT: Dispatch Width: 2
-# CHECK-NEXT: IPC:            0.20
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Dispatch Width:    2
+# CHECK-NEXT: IPC:               0.20
+# CHECK-NEXT: Block RThroughput: 2.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps

Modified: llvm/trunk/tools/llvm-mca/SummaryView.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/SummaryView.cpp?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/SummaryView.cpp (original)
+++ llvm/trunk/tools/llvm-mca/SummaryView.cpp Wed May 23 08:59:27 2018
@@ -14,6 +14,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "SummaryView.h"
+#include "Support.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Format.h"
 
 namespace mca {
@@ -22,19 +24,83 @@ namespace mca {
 
 using namespace llvm;
 
+void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
+  // We are only interested in the "instruction dispatched" events generated by
+  // the dispatch stage for instructions that are part of iteration #0.
+  if (Event.Type != HWInstructionEvent::Dispatched)
+    return;
+
+  if (Event.IR.getSourceIndex() >= Source.size())
+    return;
+
+  // Update the cumulative number of resource cycles based on the processor
+  // resource usage information available from the instruction descriptor. We need to
+  // compute the cumulative number of resource cycles for every processor
+  // resource which is consumed by an instruction of the block.
+  const Instruction &Inst = *Event.IR.getInstruction();
+  const InstrDesc &Desc = Inst.getDesc();
+  NumMicroOps += Desc.NumMicroOps;
+  for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
+    if (!RU.second.size())
+      continue;
+
+    assert(RU.second.NumUnits && "Expected more than one unit used!");
+    if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) {
+      ProcResourceUsage[RU.first] = RU.second.size();
+      continue;
+    }
+
+    ProcResourceUsage[RU.first] += RU.second.size();
+  }
+}
+
+double SummaryView::getBlockRThroughput() const {
+  assert(NumMicroOps && "Expected at least one micro opcode!");
+
+  SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds());
+  computeProcResourceMasks(SM, Masks);
+
+  // The block throughput is bounded from above by the hardware dispatch
+  // throughput. That is because the DispatchWidth is an upper bound on the
+  // number of opcodes that can be part of a single dispatch group.
+  double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
+
+  // The block throughput is also limited by the amount of hardware parallelism.
+  // The number of available resource units affects the resource pressure
+  // distributed, as well as how many blocks can be executed every cycle.
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    uint64_t Mask = Masks[I];
+    const auto It = ProcResourceUsage.find_as(Mask);
+    if (It != ProcResourceUsage.end()) {
+      const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
+      unsigned NumUnits = MCDesc.NumUnits;
+      double Throughput = static_cast<double>(It->second) / NumUnits;
+      Max = std::max(Max, Throughput);
+    }
+  }
+
+  // The block reciprocal throughput is computed as the MAX of:
+  //  -  (#uOps / DispatchWidth)
+  //  -  (#units / resource cycles) for every consumed processor resource.
+  return Max;
+}
+
 void SummaryView::printView(raw_ostream &OS) const {
   unsigned Iterations = Source.getNumIterations();
   unsigned Instructions = Source.size();
   unsigned TotalInstructions = Instructions * Iterations;
   double IPC = (double)TotalInstructions / TotalCycles;
+  double BlockRThroughput = getBlockRThroughput();
 
   std::string Buffer;
   raw_string_ostream TempStream(Buffer);
-  TempStream << "Iterations:     " << Iterations;
-  TempStream << "\nInstructions:   " << TotalInstructions;
-  TempStream << "\nTotal Cycles:   " << TotalCycles;
-  TempStream << "\nDispatch Width: " << DispatchWidth;
-  TempStream << "\nIPC:            " << format("%.2f", IPC) << '\n';
+  TempStream << "Iterations:        " << Iterations;
+  TempStream << "\nInstructions:      " << TotalInstructions;
+  TempStream << "\nTotal Cycles:      " << TotalCycles;
+  TempStream << "\nDispatch Width:    " << DispatchWidth;
+  TempStream << "\nIPC:               " << format("%.2f", IPC);
+  TempStream << "\nBlock RThroughput: " << format("%.1f", BlockRThroughput)
+             << '\n';
   TempStream.flush();
   OS << Buffer;
 }

Modified: llvm/trunk/tools/llvm-mca/SummaryView.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/SummaryView.h?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/SummaryView.h (original)
+++ llvm/trunk/tools/llvm-mca/SummaryView.h Wed May 23 08:59:27 2018
@@ -14,12 +14,12 @@
 /// performance throughput. Below is an example of summary view:
 ///
 ///
-/// Iterations:     300
-/// Instructions:   900
-/// Total Cycles:   610
-/// Dispatch Width: 2
-/// IPC:            1.48
-///
+/// Iterations:        300
+/// Instructions:      900
+/// Total Cycles:      610
+/// Dispatch Width:    2
+/// IPC:               1.48
+/// Block RThroughput: 2.0
 ///
 /// The summary view collects a few performance numbers. The two main
 /// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle).
@@ -31,22 +31,41 @@
 
 #include "SourceMgr.h"
 #include "View.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCSchedule.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace mca {
 
 /// A view that collects and prints a few performance numbers.
 class SummaryView : public View {
+  const llvm::MCSchedModel &SM;
   const SourceMgr &Source;
   const unsigned DispatchWidth;
   unsigned TotalCycles;
+  // The total number of micro opcodes contributed by a block of instructions.
+  unsigned NumMicroOps;
+  // For each processor resource, this map stores the cumulative number of
+  // resource cycles consumed by a block of instructions. The resource mask ID
+  // is used as the key value to access elements of this map.
+  llvm::DenseMap<uint64_t, unsigned> ProcResourceUsage;
+
+  // Compute the reciprocal throughput for the analyzed code block.
+  // The reciprocal block throughput is computed as the MAX between:
+  //   - NumMicroOps / DispatchWidth
+  //   - Total Resource Cycles / #Units   (for every resource consumed).
+  double getBlockRThroughput() const;
 
 public:
-  SummaryView(const SourceMgr &S, unsigned Width)
-      : Source(S), DispatchWidth(Width), TotalCycles(0) {}
+  SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+              unsigned Width)
+      : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
+        NumMicroOps(0) {}
 
   void onCycleEnd() override { ++TotalCycles; }
 
+  void onInstructionEvent(const HWInstructionEvent &Event) override;
+
   void printView(llvm::raw_ostream &OS) const override;
 };
 } // namespace mca

Modified: llvm/trunk/tools/llvm-mca/llvm-mca.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/llvm-mca.cpp?rev=333095&r1=333094&r2=333095&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/llvm-mca.cpp (original)
+++ llvm/trunk/tools/llvm-mca/llvm-mca.cpp Wed May 23 08:59:27 2018
@@ -495,7 +495,7 @@ int main(int argc, char **argv) {
                    LoadQueueSize, StoreQueueSize, AssumeNoAlias);
     mca::BackendPrinter Printer(B);
 
-    Printer.addView(llvm::make_unique<mca::SummaryView>(S, Width));
+    Printer.addView(llvm::make_unique<mca::SummaryView>(SM, S, Width));
     if (PrintInstructionInfoView)
       Printer.addView(
           llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP));




More information about the llvm-commits mailing list