[llvm] r346404 - [NFC][BdVer2] Tests for load and store throughput (PR39465)
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 8 06:48:56 PST 2018
Author: lebedevri
Date: Thu Nov 8 06:48:56 2018
New Revision: 346404
URL: http://llvm.org/viewvc/llvm-project?rev=346404&view=rev
Log:
[NFC][BdVer2] Tests for load and store throughput (PR39465)
During review it was noted that while it appears that
the Piledriver can do two [consecutive] loads per cycle,
it can only do one store per cycle. It was suggested
that the sched model incorrectly models that,
but it was opted to fix this afterwards.
These tests show that the two consecutive loads are
modelled correctly, and one consecutive stores is not
modelled incorrectly. Unless i'm missing the point.
https://bugs.llvm.org/show_bug.cgi?id=39465
Added:
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-throughput.s
llvm/trunk/test/tools/llvm-mca/X86/BdVer2/store-throughput.s
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-throughput.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-throughput.s?rev=346404&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-throughput.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-throughput.s Thu Nov 8 06:48:56 2018
@@ -0,0 +1,604 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN
+movb (%rax), %spl
+movb (%rcx), %bpl
+movb (%rdx), %sil
+movb (%rbx), %dil
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movw (%rax), %sp
+movw (%rcx), %bp
+movw (%rdx), %si
+movw (%rbx), %di
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movl (%rax), %esp
+movl (%rcx), %ebp
+movl (%rdx), %esi
+movl (%rbx), %edi
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movq (%rax), %rsp
+movq (%rcx), %rbp
+movq (%rdx), %rsi
+movq (%rbx), %rdi
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movd (%rax), %mm0
+movd (%rcx), %mm1
+movd (%rdx), %mm2
+movd (%rbx), %mm3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movaps (%rax), %xmm0
+movaps (%rcx), %xmm1
+movaps (%rdx), %xmm2
+movaps (%rbx), %xmm3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+vmovaps (%rax), %ymm0
+vmovaps (%rcx), %ymm1
+vmovaps (%rdx), %ymm2
+vmovaps (%rbx), %ymm3
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movb (%rax), %spl
+# CHECK-NEXT: 1 5 0.50 * movb (%rcx), %bpl
+# CHECK-NEXT: 1 5 0.50 * movb (%rdx), %sil
+# CHECK-NEXT: 1 5 0.50 * movb (%rbx), %dil
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb (%rax), %spl
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movb (%rcx), %bpl
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movb (%rdx), %sil
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb (%rbx), %dil
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movb (%rax), %spl
+# CHECK-NEXT: [0,1] DeeeeeER. movb (%rcx), %bpl
+# CHECK-NEXT: [0,2] D=eeeeeER movb (%rdx), %sil
+# CHECK-NEXT: [0,3] D=eeeeeER movb (%rbx), %dil
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb (%rax), %spl
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movb (%rbx), %dil
+
+# CHECK: [1] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movw (%rax), %sp
+# CHECK-NEXT: 1 5 0.50 * movw (%rcx), %bp
+# CHECK-NEXT: 1 5 0.50 * movw (%rdx), %si
+# CHECK-NEXT: 1 5 0.50 * movw (%rbx), %di
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw (%rax), %sp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movw (%rcx), %bp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movw (%rdx), %si
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw (%rbx), %di
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movw (%rax), %sp
+# CHECK-NEXT: [0,1] DeeeeeER. movw (%rcx), %bp
+# CHECK-NEXT: [0,2] D=eeeeeER movw (%rdx), %si
+# CHECK-NEXT: [0,3] D=eeeeeER movw (%rbx), %di
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw (%rax), %sp
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movw (%rbx), %di
+
+# CHECK: [2] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movl (%rax), %esp
+# CHECK-NEXT: 1 5 0.50 * movl (%rcx), %ebp
+# CHECK-NEXT: 1 5 0.50 * movl (%rdx), %esi
+# CHECK-NEXT: 1 5 0.50 * movl (%rbx), %edi
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl (%rax), %esp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movl (%rcx), %ebp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movl (%rdx), %esi
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl (%rbx), %edi
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movl (%rax), %esp
+# CHECK-NEXT: [0,1] DeeeeeER. movl (%rcx), %ebp
+# CHECK-NEXT: [0,2] D=eeeeeER movl (%rdx), %esi
+# CHECK-NEXT: [0,3] D=eeeeeER movl (%rbx), %edi
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl (%rax), %esp
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movl (%rbx), %edi
+
+# CHECK: [3] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movq (%rax), %rsp
+# CHECK-NEXT: 1 5 0.50 * movq (%rcx), %rbp
+# CHECK-NEXT: 1 5 0.50 * movq (%rdx), %rsi
+# CHECK-NEXT: 1 5 0.50 * movq (%rbx), %rdi
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq (%rax), %rsp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movq (%rcx), %rbp
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - movq (%rdx), %rsi
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq (%rbx), %rdi
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movq (%rax), %rsp
+# CHECK-NEXT: [0,1] DeeeeeER. movq (%rcx), %rbp
+# CHECK-NEXT: [0,2] D=eeeeeER movq (%rdx), %rsi
+# CHECK-NEXT: [0,3] D=eeeeeER movq (%rbx), %rdi
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq (%rax), %rsp
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movq (%rbx), %rdi
+
+# CHECK: [4] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movd (%rax), %mm0
+# CHECK-NEXT: 1 5 0.50 * movd (%rcx), %mm1
+# CHECK-NEXT: 1 5 0.50 * movd (%rdx), %mm2
+# CHECK-NEXT: 1 5 0.50 * movd (%rbx), %mm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - - - 2.00 2.00 - - 2.00 2.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - 1.00 - - - 1.00 - - - - movd (%rax), %mm0
+# CHECK-NEXT: 1.00 - - - - - - - - - 1.00 - - - 1.00 - - - - - movd (%rcx), %mm1
+# CHECK-NEXT: 1.00 - - - - - - - - - 1.00 - - - - 1.00 - - - - movd (%rdx), %mm2
+# CHECK-NEXT: - 1.00 - - - - - - - - - 1.00 - - 1.00 - - - - - movd (%rbx), %mm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movd (%rax), %mm0
+# CHECK-NEXT: [0,1] DeeeeeER. movd (%rcx), %mm1
+# CHECK-NEXT: [0,2] D=eeeeeER movd (%rdx), %mm2
+# CHECK-NEXT: [0,3] D=eeeeeER movd (%rbx), %mm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd (%rax), %mm0
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movd (%rbx), %mm3
+
+# CHECK: [5] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.93
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 5 0.50 * movaps (%rax), %xmm0
+# CHECK-NEXT: 1 5 0.50 * movaps (%rcx), %xmm1
+# CHECK-NEXT: 1 5 0.50 * movaps (%rdx), %xmm2
+# CHECK-NEXT: 1 5 0.50 * movaps (%rbx), %xmm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - - 1.00 - - - - movaps (%rax), %xmm0
+# CHECK-NEXT: 1.00 - - - - - - - 1.00 - - - - - 1.00 - - - - - movaps (%rcx), %xmm1
+# CHECK-NEXT: 1.00 - - - - - - - 1.00 - - - - - - 1.00 - - - - movaps (%rdx), %xmm2
+# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - 1.00 - - - - - movaps (%rbx), %xmm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. movaps (%rax), %xmm0
+# CHECK-NEXT: [0,1] DeeeeeER. movaps (%rcx), %xmm1
+# CHECK-NEXT: [0,2] D=eeeeeER movaps (%rdx), %xmm2
+# CHECK-NEXT: [0,3] D=eeeeeER movaps (%rbx), %xmm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps (%rax), %xmm0
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2
+# CHECK-NEXT: 3. 1 2.0 2.0 0.0 movaps (%rbx), %xmm3
+
+# CHECK: [6] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 207
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 3.86
+# CHECK-NEXT: IPC: 1.93
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 5 0.50 * vmovaps (%rax), %ymm0
+# CHECK-NEXT: 2 5 0.50 * vmovaps (%rcx), %ymm1
+# CHECK-NEXT: 2 5 0.50 * vmovaps (%rdx), %ymm2
+# CHECK-NEXT: 2 5 0.50 * vmovaps (%rbx), %ymm3
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: 2.00 2.00 - - - - - - 2.00 2.00 - - - - 2.00 2.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - - 1.00 - - - - vmovaps (%rax), %ymm0
+# CHECK-NEXT: 1.00 - - - - - - - 1.00 - - - - - 1.00 - - - - - vmovaps (%rcx), %ymm1
+# CHECK-NEXT: 1.00 - - - - - - - 1.00 - - - - - - 1.00 - - - - vmovaps (%rdx), %ymm2
+# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - 1.00 - - - - - vmovaps (%rbx), %ymm3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeeER. vmovaps (%rax), %ymm0
+# CHECK-NEXT: [0,1] DeeeeeER. vmovaps (%rcx), %ymm1
+# CHECK-NEXT: [0,2] .DeeeeeER vmovaps (%rdx), %ymm2
+# CHECK-NEXT: [0,3] .DeeeeeER vmovaps (%rbx), %ymm3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rax), %ymm0
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vmovaps (%rcx), %ymm1
+# CHECK-NEXT: 2. 1 1.0 1.0 0.0 vmovaps (%rdx), %ymm2
+# CHECK-NEXT: 3. 1 1.0 1.0 0.0 vmovaps (%rbx), %ymm3
Added: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/store-throughput.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BdVer2/store-throughput.s?rev=346404&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/store-throughput.s (added)
+++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/store-throughput.s Thu Nov 8 06:48:56 2018
@@ -0,0 +1,605 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN
+movb %spl, (%rax)
+movb %bpl, (%rcx)
+movb %sil, (%rdx)
+movb %dil, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movw %sp, (%rax)
+movw %bp, (%rcx)
+movw %si, (%rdx)
+movw %di, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movl %esp, (%rax)
+movl %ebp, (%rcx)
+movl %esi, (%rdx)
+movl %edi, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movq %rsp, (%rax)
+movq %rbp, (%rcx)
+movq %rsi, (%rdx)
+movq %rdi, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movd %mm0, (%rax)
+movd %mm1, (%rcx)
+movd %mm2, (%rdx)
+movd %mm3, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+movaps %xmm0, (%rax)
+movaps %xmm1, (%rcx)
+movaps %xmm2, (%rdx)
+movaps %xmm3, (%rbx)
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+vmovaps %ymm0, (%rax)
+vmovaps %ymm1, (%rcx)
+vmovaps %ymm2, (%rdx)
+vmovaps %ymm3, (%rbx)
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 * movb %spl, (%rax)
+# CHECK-NEXT: 1 1 0.50 * movb %bpl, (%rcx)
+# CHECK-NEXT: 1 1 0.50 * movb %sil, (%rdx)
+# CHECK-NEXT: 1 1 0.50 * movb %dil, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb %spl, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb %bpl, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb %sil, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movb %dil, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. movb %spl, (%rax)
+# CHECK-NEXT: [0,1] D=eER.. movb %bpl, (%rcx)
+# CHECK-NEXT: [0,2] D==eER. movb %sil, (%rdx)
+# CHECK-NEXT: [0,3] D===eER movb %dil, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movb %spl, (%rax)
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movb %bpl, (%rcx)
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movb %sil, (%rdx)
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movb %dil, (%rbx)
+
+# CHECK: [1] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 * movw %sp, (%rax)
+# CHECK-NEXT: 1 1 0.50 * movw %bp, (%rcx)
+# CHECK-NEXT: 1 1 0.50 * movw %si, (%rdx)
+# CHECK-NEXT: 1 1 0.50 * movw %di, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw %sp, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw %bp, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw %si, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movw %di, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. movw %sp, (%rax)
+# CHECK-NEXT: [0,1] D=eER.. movw %bp, (%rcx)
+# CHECK-NEXT: [0,2] D==eER. movw %si, (%rdx)
+# CHECK-NEXT: [0,3] D===eER movw %di, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movw %sp, (%rax)
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movw %bp, (%rcx)
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movw %si, (%rdx)
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movw %di, (%rbx)
+
+# CHECK: [2] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 * movl %esp, (%rax)
+# CHECK-NEXT: 1 1 0.50 * movl %ebp, (%rcx)
+# CHECK-NEXT: 1 1 0.50 * movl %esi, (%rdx)
+# CHECK-NEXT: 1 1 0.50 * movl %edi, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl %esp, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl %ebp, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl %esi, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movl %edi, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. movl %esp, (%rax)
+# CHECK-NEXT: [0,1] D=eER.. movl %ebp, (%rcx)
+# CHECK-NEXT: [0,2] D==eER. movl %esi, (%rdx)
+# CHECK-NEXT: [0,3] D===eER movl %edi, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movl %esp, (%rax)
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movl %ebp, (%rcx)
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movl %esi, (%rdx)
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movl %edi, (%rbx)
+
+# CHECK: [3] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 * movq %rsp, (%rax)
+# CHECK-NEXT: 1 1 0.50 * movq %rbp, (%rcx)
+# CHECK-NEXT: 1 1 0.50 * movq %rsi, (%rdx)
+# CHECK-NEXT: 1 1 0.50 * movq %rdi, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq %rsp, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq %rbp, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq %rsi, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - - - - - - - movq %rdi, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. movq %rsp, (%rax)
+# CHECK-NEXT: [0,1] D=eER.. movq %rbp, (%rcx)
+# CHECK-NEXT: [0,2] D==eER. movq %rsi, (%rdx)
+# CHECK-NEXT: [0,3] D===eER movq %rdi, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movq %rsp, (%rax)
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movq %rbp, (%rcx)
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movq %rsi, (%rdx)
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movq %rdi, (%rbx)
+
+# CHECK: [4] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 803
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 2 1.00 * U movd %mm0, (%rax)
+# CHECK-NEXT: 1 2 1.00 * U movd %mm1, (%rcx)
+# CHECK-NEXT: 1 2 1.00 * U movd %mm2, (%rdx)
+# CHECK-NEXT: 1 2 1.00 * U movd %mm3, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - 4.00 - 4.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movd %mm0, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movd %mm1, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movd %mm2, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movd %mm3, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . movd %mm0, (%rax)
+# CHECK-NEXT: [0,1] D==eeER . movd %mm1, (%rcx)
+# CHECK-NEXT: [0,2] D====eeER . movd %mm2, (%rdx)
+# CHECK-NEXT: [0,3] D======eeER movd %mm3, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movd %mm0, (%rax)
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 movd %mm1, (%rcx)
+# CHECK-NEXT: 2. 1 5.0 0.0 0.0 movd %mm2, (%rdx)
+# CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx)
+
+# CHECK: [5] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 1.00 * movaps %xmm0, (%rax)
+# CHECK-NEXT: 1 1 1.00 * movaps %xmm1, (%rcx)
+# CHECK-NEXT: 1 1 1.00 * movaps %xmm2, (%rdx)
+# CHECK-NEXT: 1 1 1.00 * movaps %xmm3, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - 4.00 - 4.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movaps %xmm0, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movaps %xmm1, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movaps %xmm2, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - movaps %xmm3, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. movaps %xmm0, (%rax)
+# CHECK-NEXT: [0,1] D=eER.. movaps %xmm1, (%rcx)
+# CHECK-NEXT: [0,2] D==eER. movaps %xmm2, (%rdx)
+# CHECK-NEXT: [0,3] D===eER movaps %xmm3, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 movaps %xmm0, (%rax)
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 movaps %xmm1, (%rcx)
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 movaps %xmm2, (%rdx)
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 movaps %xmm3, (%rbx)
+
+# CHECK: [6] Code Region
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 1600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 3.97
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 4 1 1.00 * vmovaps %ymm0, (%rax)
+# CHECK-NEXT: 4 1 1.00 * vmovaps %ymm1, (%rcx)
+# CHECK-NEXT: 4 1 1.00 * vmovaps %ymm2, (%rdx)
+# CHECK-NEXT: 4 1 1.00 * vmovaps %ymm3, (%rbx)
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - PdAGLU01
+# CHECK-NEXT: [0.1] - PdAGLU01
+# CHECK-NEXT: [1] - PdBranch
+# CHECK-NEXT: [2] - PdCount
+# CHECK-NEXT: [3] - PdDiv
+# CHECK-NEXT: [4] - PdEX0
+# CHECK-NEXT: [5] - PdEX1
+# CHECK-NEXT: [6] - PdFPCVT
+# CHECK-NEXT: [7.0] - PdFPFMA
+# CHECK-NEXT: [7.1] - PdFPFMA
+# CHECK-NEXT: [8.0] - PdFPMAL
+# CHECK-NEXT: [8.1] - PdFPMAL
+# CHECK-NEXT: [9] - PdFPMMA
+# CHECK-NEXT: [10] - PdFPSTO
+# CHECK-NEXT: [11] - PdFPU0
+# CHECK-NEXT: [12] - PdFPU1
+# CHECK-NEXT: [13] - PdFPU2
+# CHECK-NEXT: [14] - PdFPU3
+# CHECK-NEXT: [15] - PdFPXBR
+# CHECK-NEXT: [16] - PdMul
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16]
+# CHECK-NEXT: - 4.00 - - - - - - - - - - - 4.00 - 4.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %ymm0, (%rax)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %ymm1, (%rcx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %ymm2, (%rdx)
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %ymm3, (%rbx)
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeER .. vmovaps %ymm0, (%rax)
+# CHECK-NEXT: [0,1] .DeER.. vmovaps %ymm1, (%rcx)
+# CHECK-NEXT: [0,2] . DeER. vmovaps %ymm2, (%rdx)
+# CHECK-NEXT: [0,3] . DeER vmovaps %ymm3, (%rbx)
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps %ymm0, (%rax)
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmovaps %ymm1, (%rcx)
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 vmovaps %ymm2, (%rdx)
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 vmovaps %ymm3, (%rbx)
More information about the llvm-commits
mailing list