[llvm] r346404 - [NFC][BdVer2] Tests for load and store throughput (PR39465)

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 8 06:48:56 PST 2018


Author: lebedevri
Date: Thu Nov  8 06:48:56 2018
New Revision: 346404

URL: http://llvm.org/viewvc/llvm-project?rev=346404&view=rev
Log:
[NFC][BdVer2] Tests for load and store throughput (PR39465)

During review it was noted that while it appears that
the Piledriver can do two [consecutive] loads per cycle,
it can only do one store per cycle. It was suggested
that the sched model incorrectly models that,
but it was opted to fix this afterwards.

These tests show that the two consecutive loads are
modelled correctly, and one consecutive stores is not
modelled incorrectly. Unless i'm missing the point.

https://bugs.llvm.org/show_bug.cgi?id=39465

Added:
    llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-throughput.s
    llvm/trunk/test/tools/llvm-mca/X86/BdVer2/store-throughput.s

Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-throughput.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-throughput.s?rev=346404&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-throughput.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-throughput.s Thu Nov  8 06:48:56 2018
@@ -0,0 +1,604 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN
+movb (%rax), %spl
+movb (%rcx), %bpl
+movb (%rdx), %sil
+movb (%rbx), %dil
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movw (%rax), %sp
+movw (%rcx), %bp
+movw (%rdx), %si
+movw (%rbx), %di
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movl (%rax), %esp
+movl (%rcx), %ebp
+movl (%rdx), %esi
+movl (%rbx), %edi
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movq (%rax), %rsp
+movq (%rcx), %rbp
+movq (%rdx), %rsi
+movq (%rbx), %rdi
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movd (%rax), %mm0
+movd (%rcx), %mm1
+movd (%rdx), %mm2
+movd (%rbx), %mm3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movaps (%rax), %xmm0
+movaps (%rcx), %xmm1
+movaps (%rdx), %xmm2
+movaps (%rbx), %xmm3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+vmovaps (%rax), %ymm0
+vmovaps (%rcx), %ymm1
+vmovaps (%rdx), %ymm2
+vmovaps (%rbx), %ymm3
+# LLVM-MCA-END
+
+# CHECK:      [0] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      207
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    1.93
+# CHECK-NEXT: IPC:               1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      5     0.50    *                   movb	(%rax), %spl
+# CHECK-NEXT:  1      5     0.50    *                   movb	(%rcx), %bpl
+# CHECK-NEXT:  1      5     0.50    *                   movb	(%rdx), %sil
+# CHECK-NEXT:  1      5     0.50    *                   movb	(%rbx), %dil
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT: 2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movb	(%rax), %spl
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movb	(%rcx), %bpl
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movb	(%rdx), %sil
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movb	(%rbx), %dil
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345678
+
+# CHECK:      [0,0]     DeeeeeER.   movb	(%rax), %spl
+# CHECK-NEXT: [0,1]     DeeeeeER.   movb	(%rcx), %bpl
+# CHECK-NEXT: [0,2]     D=eeeeeER   movb	(%rdx), %sil
+# CHECK-NEXT: [0,3]     D=eeeeeER   movb	(%rbx), %dil
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movb	(%rax), %spl
+# CHECK-NEXT: 1.     1     1.0    1.0    0.0       movb	(%rcx), %bpl
+# CHECK-NEXT: 2.     1     2.0    2.0    0.0       movb	(%rdx), %sil
+# CHECK-NEXT: 3.     1     2.0    2.0    0.0       movb	(%rbx), %dil
+
+# CHECK:      [1] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      207
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    1.93
+# CHECK-NEXT: IPC:               1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      5     0.50    *                   movw	(%rax), %sp
+# CHECK-NEXT:  1      5     0.50    *                   movw	(%rcx), %bp
+# CHECK-NEXT:  1      5     0.50    *                   movw	(%rdx), %si
+# CHECK-NEXT:  1      5     0.50    *                   movw	(%rbx), %di
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT: 2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movw	(%rax), %sp
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movw	(%rcx), %bp
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movw	(%rdx), %si
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movw	(%rbx), %di
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345678
+
+# CHECK:      [0,0]     DeeeeeER.   movw	(%rax), %sp
+# CHECK-NEXT: [0,1]     DeeeeeER.   movw	(%rcx), %bp
+# CHECK-NEXT: [0,2]     D=eeeeeER   movw	(%rdx), %si
+# CHECK-NEXT: [0,3]     D=eeeeeER   movw	(%rbx), %di
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movw	(%rax), %sp
+# CHECK-NEXT: 1.     1     1.0    1.0    0.0       movw	(%rcx), %bp
+# CHECK-NEXT: 2.     1     2.0    2.0    0.0       movw	(%rdx), %si
+# CHECK-NEXT: 3.     1     2.0    2.0    0.0       movw	(%rbx), %di
+
+# CHECK:      [2] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      207
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    1.93
+# CHECK-NEXT: IPC:               1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      5     0.50    *                   movl	(%rax), %esp
+# CHECK-NEXT:  1      5     0.50    *                   movl	(%rcx), %ebp
+# CHECK-NEXT:  1      5     0.50    *                   movl	(%rdx), %esi
+# CHECK-NEXT:  1      5     0.50    *                   movl	(%rbx), %edi
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT: 2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movl	(%rax), %esp
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movl	(%rcx), %ebp
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movl	(%rdx), %esi
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movl	(%rbx), %edi
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345678
+
+# CHECK:      [0,0]     DeeeeeER.   movl	(%rax), %esp
+# CHECK-NEXT: [0,1]     DeeeeeER.   movl	(%rcx), %ebp
+# CHECK-NEXT: [0,2]     D=eeeeeER   movl	(%rdx), %esi
+# CHECK-NEXT: [0,3]     D=eeeeeER   movl	(%rbx), %edi
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movl	(%rax), %esp
+# CHECK-NEXT: 1.     1     1.0    1.0    0.0       movl	(%rcx), %ebp
+# CHECK-NEXT: 2.     1     2.0    2.0    0.0       movl	(%rdx), %esi
+# CHECK-NEXT: 3.     1     2.0    2.0    0.0       movl	(%rbx), %edi
+
+# CHECK:      [3] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      207
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    1.93
+# CHECK-NEXT: IPC:               1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      5     0.50    *                   movq	(%rax), %rsp
+# CHECK-NEXT:  1      5     0.50    *                   movq	(%rcx), %rbp
+# CHECK-NEXT:  1      5     0.50    *                   movq	(%rdx), %rsi
+# CHECK-NEXT:  1      5     0.50    *                   movq	(%rbx), %rdi
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT: 2.00   2.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movq	(%rax), %rsp
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movq	(%rcx), %rbp
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movq	(%rdx), %rsi
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movq	(%rbx), %rdi
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345678
+
+# CHECK:      [0,0]     DeeeeeER.   movq	(%rax), %rsp
+# CHECK-NEXT: [0,1]     DeeeeeER.   movq	(%rcx), %rbp
+# CHECK-NEXT: [0,2]     D=eeeeeER   movq	(%rdx), %rsi
+# CHECK-NEXT: [0,3]     D=eeeeeER   movq	(%rbx), %rdi
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movq	(%rax), %rsp
+# CHECK-NEXT: 1.     1     1.0    1.0    0.0       movq	(%rcx), %rbp
+# CHECK-NEXT: 2.     1     2.0    2.0    0.0       movq	(%rdx), %rsi
+# CHECK-NEXT: 3.     1     2.0    2.0    0.0       movq	(%rbx), %rdi
+
+# CHECK:      [4] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      207
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    1.93
+# CHECK-NEXT: IPC:               1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      5     0.50    *                   movd	(%rax), %mm0
+# CHECK-NEXT:  1      5     0.50    *                   movd	(%rcx), %mm1
+# CHECK-NEXT:  1      5     0.50    *                   movd	(%rdx), %mm2
+# CHECK-NEXT:  1      5     0.50    *                   movd	(%rbx), %mm3
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT: 2.00   2.00    -      -      -      -      -      -      -      -     2.00   2.00    -      -     2.00   2.00    -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -     1.00    -      -      -     1.00    -      -      -      -     movd	(%rax), %mm0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -     1.00    -      -      -     1.00    -      -      -      -      -     movd	(%rcx), %mm1
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     1.00    -      -      -      -     movd	(%rdx), %mm2
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -     1.00    -      -     1.00    -      -      -      -      -     movd	(%rbx), %mm3
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345678
+
+# CHECK:      [0,0]     DeeeeeER.   movd	(%rax), %mm0
+# CHECK-NEXT: [0,1]     DeeeeeER.   movd	(%rcx), %mm1
+# CHECK-NEXT: [0,2]     D=eeeeeER   movd	(%rdx), %mm2
+# CHECK-NEXT: [0,3]     D=eeeeeER   movd	(%rbx), %mm3
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movd	(%rax), %mm0
+# CHECK-NEXT: 1.     1     1.0    1.0    0.0       movd	(%rcx), %mm1
+# CHECK-NEXT: 2.     1     2.0    2.0    0.0       movd	(%rdx), %mm2
+# CHECK-NEXT: 3.     1     2.0    2.0    0.0       movd	(%rbx), %mm3
+
+# CHECK:      [5] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      207
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    1.93
+# CHECK-NEXT: IPC:               1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      5     0.50    *                   movaps	(%rax), %xmm0
+# CHECK-NEXT:  1      5     0.50    *                   movaps	(%rcx), %xmm1
+# CHECK-NEXT:  1      5     0.50    *                   movaps	(%rdx), %xmm2
+# CHECK-NEXT:  1      5     0.50    *                   movaps	(%rbx), %xmm3
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT: 2.00   2.00    -      -      -      -      -      -     2.00   2.00    -      -      -      -     2.00   2.00    -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -     1.00    -      -      -      -      -     1.00    -      -      -      -     movaps	(%rax), %xmm0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -     1.00    -      -      -      -      -     1.00    -      -      -      -      -     movaps	(%rcx), %xmm1
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -     1.00    -      -      -      -      -      -     1.00    -      -      -      -     movaps	(%rdx), %xmm2
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -     1.00    -      -      -      -     1.00    -      -      -      -      -     movaps	(%rbx), %xmm3
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345678
+
+# CHECK:      [0,0]     DeeeeeER.   movaps	(%rax), %xmm0
+# CHECK-NEXT: [0,1]     DeeeeeER.   movaps	(%rcx), %xmm1
+# CHECK-NEXT: [0,2]     D=eeeeeER   movaps	(%rdx), %xmm2
+# CHECK-NEXT: [0,3]     D=eeeeeER   movaps	(%rbx), %xmm3
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movaps	(%rax), %xmm0
+# CHECK-NEXT: 1.     1     1.0    1.0    0.0       movaps	(%rcx), %xmm1
+# CHECK-NEXT: 2.     1     2.0    2.0    0.0       movaps	(%rdx), %xmm2
+# CHECK-NEXT: 3.     1     2.0    2.0    0.0       movaps	(%rbx), %xmm3
+
+# CHECK:      [6] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      207
+# CHECK-NEXT: Total uOps:        800
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    3.86
+# CHECK-NEXT: IPC:               1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  2      5     0.50    *                   vmovaps	(%rax), %ymm0
+# CHECK-NEXT:  2      5     0.50    *                   vmovaps	(%rcx), %ymm1
+# CHECK-NEXT:  2      5     0.50    *                   vmovaps	(%rdx), %ymm2
+# CHECK-NEXT:  2      5     0.50    *                   vmovaps	(%rbx), %ymm3
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT: 2.00   2.00    -      -      -      -      -      -     2.00   2.00    -      -      -      -     2.00   2.00    -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -     1.00    -      -      -      -      -     1.00    -      -      -      -     vmovaps	(%rax), %ymm0
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -     1.00    -      -      -      -      -     1.00    -      -      -      -      -     vmovaps	(%rcx), %ymm1
+# CHECK-NEXT: 1.00    -      -      -      -      -      -      -     1.00    -      -      -      -      -      -     1.00    -      -      -      -     vmovaps	(%rdx), %ymm2
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -     1.00    -      -      -      -     1.00    -      -      -      -      -     vmovaps	(%rbx), %ymm3
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345678
+
+# CHECK:      [0,0]     DeeeeeER.   vmovaps	(%rax), %ymm0
+# CHECK-NEXT: [0,1]     DeeeeeER.   vmovaps	(%rcx), %ymm1
+# CHECK-NEXT: [0,2]     .DeeeeeER   vmovaps	(%rdx), %ymm2
+# CHECK-NEXT: [0,3]     .DeeeeeER   vmovaps	(%rbx), %ymm3
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       vmovaps	(%rax), %ymm0
+# CHECK-NEXT: 1.     1     1.0    1.0    0.0       vmovaps	(%rcx), %ymm1
+# CHECK-NEXT: 2.     1     1.0    1.0    0.0       vmovaps	(%rdx), %ymm2
+# CHECK-NEXT: 3.     1     1.0    1.0    0.0       vmovaps	(%rbx), %ymm3

Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/store-throughput.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/store-throughput.s?rev=346404&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/store-throughput.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/store-throughput.s Thu Nov  8 06:48:56 2018
@@ -0,0 +1,605 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN
+movb %spl, (%rax)
+movb %bpl, (%rcx)
+movb %sil, (%rdx)
+movb %dil, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movw %sp, (%rax)
+movw %bp, (%rcx)
+movw %si, (%rdx)
+movw %di, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movl %esp, (%rax)
+movl %ebp, (%rcx)
+movl %esi, (%rdx)
+movl %edi, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movq %rsp, (%rax)
+movq %rbp, (%rcx)
+movq %rsi, (%rdx)
+movq %rdi, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movd %mm0, (%rax)
+movd %mm1, (%rcx)
+movd %mm2, (%rdx)
+movd %mm3, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movaps %xmm0, (%rax)
+movaps %xmm1, (%rcx)
+movaps %xmm2, (%rdx)
+movaps %xmm3, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+vmovaps %ymm0, (%rax)
+vmovaps %ymm1, (%rcx)
+vmovaps %ymm2, (%rdx)
+vmovaps %ymm3, (%rbx)
+# LLVM-MCA-END
+
+# CHECK:      [0] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      403
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    0.99
+# CHECK-NEXT: IPC:               0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.50           *            movb	%spl, (%rax)
+# CHECK-NEXT:  1      1     0.50           *            movb	%bpl, (%rcx)
+# CHECK-NEXT:  1      1     0.50           *            movb	%sil, (%rdx)
+# CHECK-NEXT:  1      1     0.50           *            movb	%dil, (%rbx)
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT:  -     4.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movb	%spl, (%rax)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movb	%bpl, (%rcx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movb	%sil, (%rdx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movb	%dil, (%rbx)
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123456
+
+# CHECK:      [0,0]     DeER ..   movb	%spl, (%rax)
+# CHECK-NEXT: [0,1]     D=eER..   movb	%bpl, (%rcx)
+# CHECK-NEXT: [0,2]     D==eER.   movb	%sil, (%rdx)
+# CHECK-NEXT: [0,3]     D===eER   movb	%dil, (%rbx)
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movb	%spl, (%rax)
+# CHECK-NEXT: 1.     1     2.0    0.0    0.0       movb	%bpl, (%rcx)
+# CHECK-NEXT: 2.     1     3.0    0.0    0.0       movb	%sil, (%rdx)
+# CHECK-NEXT: 3.     1     4.0    0.0    0.0       movb	%dil, (%rbx)
+
+# CHECK:      [1] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      403
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    0.99
+# CHECK-NEXT: IPC:               0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.50           *            movw	%sp, (%rax)
+# CHECK-NEXT:  1      1     0.50           *            movw	%bp, (%rcx)
+# CHECK-NEXT:  1      1     0.50           *            movw	%si, (%rdx)
+# CHECK-NEXT:  1      1     0.50           *            movw	%di, (%rbx)
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT:  -     4.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movw	%sp, (%rax)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movw	%bp, (%rcx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movw	%si, (%rdx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movw	%di, (%rbx)
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123456
+
+# CHECK:      [0,0]     DeER ..   movw	%sp, (%rax)
+# CHECK-NEXT: [0,1]     D=eER..   movw	%bp, (%rcx)
+# CHECK-NEXT: [0,2]     D==eER.   movw	%si, (%rdx)
+# CHECK-NEXT: [0,3]     D===eER   movw	%di, (%rbx)
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movw	%sp, (%rax)
+# CHECK-NEXT: 1.     1     2.0    0.0    0.0       movw	%bp, (%rcx)
+# CHECK-NEXT: 2.     1     3.0    0.0    0.0       movw	%si, (%rdx)
+# CHECK-NEXT: 3.     1     4.0    0.0    0.0       movw	%di, (%rbx)
+
+# CHECK:      [2] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      403
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    0.99
+# CHECK-NEXT: IPC:               0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.50           *            movl	%esp, (%rax)
+# CHECK-NEXT:  1      1     0.50           *            movl	%ebp, (%rcx)
+# CHECK-NEXT:  1      1     0.50           *            movl	%esi, (%rdx)
+# CHECK-NEXT:  1      1     0.50           *            movl	%edi, (%rbx)
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT:  -     4.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movl	%esp, (%rax)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movl	%ebp, (%rcx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movl	%esi, (%rdx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movl	%edi, (%rbx)
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123456
+
+# CHECK:      [0,0]     DeER ..   movl	%esp, (%rax)
+# CHECK-NEXT: [0,1]     D=eER..   movl	%ebp, (%rcx)
+# CHECK-NEXT: [0,2]     D==eER.   movl	%esi, (%rdx)
+# CHECK-NEXT: [0,3]     D===eER   movl	%edi, (%rbx)
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movl	%esp, (%rax)
+# CHECK-NEXT: 1.     1     2.0    0.0    0.0       movl	%ebp, (%rcx)
+# CHECK-NEXT: 2.     1     3.0    0.0    0.0       movl	%esi, (%rdx)
+# CHECK-NEXT: 3.     1     4.0    0.0    0.0       movl	%edi, (%rbx)
+
+# CHECK:      [3] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      403
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    0.99
+# CHECK-NEXT: IPC:               0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.50           *            movq	%rsp, (%rax)
+# CHECK-NEXT:  1      1     0.50           *            movq	%rbp, (%rcx)
+# CHECK-NEXT:  1      1     0.50           *            movq	%rsi, (%rdx)
+# CHECK-NEXT:  1      1     0.50           *            movq	%rdi, (%rbx)
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT:  -     4.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movq	%rsp, (%rax)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movq	%rbp, (%rcx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movq	%rsi, (%rdx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     movq	%rdi, (%rbx)
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123456
+
+# CHECK:      [0,0]     DeER ..   movq	%rsp, (%rax)
+# CHECK-NEXT: [0,1]     D=eER..   movq	%rbp, (%rcx)
+# CHECK-NEXT: [0,2]     D==eER.   movq	%rsi, (%rdx)
+# CHECK-NEXT: [0,3]     D===eER   movq	%rdi, (%rbx)
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movq	%rsp, (%rax)
+# CHECK-NEXT: 1.     1     2.0    0.0    0.0       movq	%rbp, (%rcx)
+# CHECK-NEXT: 2.     1     3.0    0.0    0.0       movq	%rsi, (%rdx)
+# CHECK-NEXT: 3.     1     4.0    0.0    0.0       movq	%rdi, (%rbx)
+
+# CHECK:      [4] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      803
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    0.50
+# CHECK-NEXT: IPC:               0.50
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      2     1.00           *      U     movd	%mm0, (%rax)
+# CHECK-NEXT:  1      2     1.00           *      U     movd	%mm1, (%rcx)
+# CHECK-NEXT:  1      2     1.00           *      U     movd	%mm2, (%rdx)
+# CHECK-NEXT:  1      2     1.00           *      U     movd	%mm3, (%rbx)
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT:  -     4.00    -      -      -      -      -      -      -      -      -      -      -     4.00    -     4.00    -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     movd	%mm0, (%rax)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     movd	%mm1, (%rcx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     movd	%mm2, (%rdx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     movd	%mm3, (%rbx)
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeER.    .   movd	%mm0, (%rax)
+# CHECK-NEXT: [0,1]     D==eeER   .   movd	%mm1, (%rcx)
+# CHECK-NEXT: [0,2]     D====eeER .   movd	%mm2, (%rdx)
+# CHECK-NEXT: [0,3]     D======eeER   movd	%mm3, (%rbx)
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movd	%mm0, (%rax)
+# CHECK-NEXT: 1.     1     3.0    0.0    0.0       movd	%mm1, (%rcx)
+# CHECK-NEXT: 2.     1     5.0    0.0    0.0       movd	%mm2, (%rdx)
+# CHECK-NEXT: 3.     1     7.0    0.0    0.0       movd	%mm3, (%rbx)
+
+# CHECK:      [5] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      403
+# CHECK-NEXT: Total uOps:        400
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    0.99
+# CHECK-NEXT: IPC:               0.99
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     1.00           *            movaps	%xmm0, (%rax)
+# CHECK-NEXT:  1      1     1.00           *            movaps	%xmm1, (%rcx)
+# CHECK-NEXT:  1      1     1.00           *            movaps	%xmm2, (%rdx)
+# CHECK-NEXT:  1      1     1.00           *            movaps	%xmm3, (%rbx)
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT:  -     4.00    -      -      -      -      -      -      -      -      -      -      -     4.00    -     4.00    -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     movaps	%xmm0, (%rax)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     movaps	%xmm1, (%rcx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     movaps	%xmm2, (%rdx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     movaps	%xmm3, (%rbx)
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123456
+
+# CHECK:      [0,0]     DeER ..   movaps	%xmm0, (%rax)
+# CHECK-NEXT: [0,1]     D=eER..   movaps	%xmm1, (%rcx)
+# CHECK-NEXT: [0,2]     D==eER.   movaps	%xmm2, (%rdx)
+# CHECK-NEXT: [0,3]     D===eER   movaps	%xmm3, (%rbx)
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       movaps	%xmm0, (%rax)
+# CHECK-NEXT: 1.     1     2.0    0.0    0.0       movaps	%xmm1, (%rcx)
+# CHECK-NEXT: 2.     1     3.0    0.0    0.0       movaps	%xmm2, (%rdx)
+# CHECK-NEXT: 3.     1     4.0    0.0    0.0       movaps	%xmm3, (%rbx)
+
+# CHECK:      [6] Code Region
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      400
+# CHECK-NEXT: Total Cycles:      403
+# CHECK-NEXT: Total uOps:        1600
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    3.97
+# CHECK-NEXT: IPC:               0.99
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  4      1     1.00           *            vmovaps	%ymm0, (%rax)
+# CHECK-NEXT:  4      1     1.00           *            vmovaps	%ymm1, (%rcx)
+# CHECK-NEXT:  4      1     1.00           *            vmovaps	%ymm2, (%rdx)
+# CHECK-NEXT:  4      1     1.00           *            vmovaps	%ymm3, (%rbx)
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1]   - PdBranch
+# CHECK-NEXT: [2]   - PdCount
+# CHECK-NEXT: [3]   - PdDiv
+# CHECK-NEXT: [4]   - PdEX0
+# CHECK-NEXT: [5]   - PdEX1
+# CHECK-NEXT: [6]   - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9]   - PdFPMMA
+# CHECK-NEXT: [10]  - PdFPSTO
+# CHECK-NEXT: [11]  - PdFPU0
+# CHECK-NEXT: [12]  - PdFPU1
+# CHECK-NEXT: [13]  - PdFPU2
+# CHECK-NEXT: [14]  - PdFPU3
+# CHECK-NEXT: [15]  - PdFPXBR
+# CHECK-NEXT: [16]  - PdMul
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]
+# CHECK-NEXT:  -     4.00    -      -      -      -      -      -      -      -      -      -      -     4.00    -     4.00    -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1]    [2]    [3]    [4]    [5]    [6]    [7.0]  [7.1]  [8.0]  [8.1]  [9]    [10]   [11]   [12]   [13]   [14]   [15]   [16]   Instructions:
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     vmovaps	%ymm0, (%rax)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     vmovaps	%ymm1, (%rcx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     vmovaps	%ymm2, (%rdx)
+# CHECK-NEXT:  -     1.00    -      -      -      -      -      -      -      -      -      -      -     1.00    -     1.00    -      -      -      -     vmovaps	%ymm3, (%rbx)
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123456
+
+# CHECK:      [0,0]     DeER ..   vmovaps	%ymm0, (%rax)
+# CHECK-NEXT: [0,1]     .DeER..   vmovaps	%ymm1, (%rcx)
+# CHECK-NEXT: [0,2]     . DeER.   vmovaps	%ymm2, (%rdx)
+# CHECK-NEXT: [0,3]     .  DeER   vmovaps	%ymm3, (%rbx)
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       vmovaps	%ymm0, (%rax)
+# CHECK-NEXT: 1.     1     1.0    0.0    0.0       vmovaps	%ymm1, (%rcx)
+# CHECK-NEXT: 2.     1     1.0    0.0    0.0       vmovaps	%ymm2, (%rdx)
+# CHECK-NEXT: 3.     1     1.0    0.0    0.0       vmovaps	%ymm3, (%rbx)




More information about the llvm-commits mailing list