[llvm] 2c1f37b - [test] precommit sched model for tsv110, NFC
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 12 06:44:51 PDT 2023
Author: zhongyunde 00443407
Date: 2023-10-12T21:42:50+08:00
New Revision: 2c1f37b3b95233c9e8eb9d51b50ac37640919eba
URL: https://github.com/llvm/llvm-project/commit/2c1f37b3b95233c9e8eb9d51b50ac37640919eba
DIFF: https://github.com/llvm/llvm-project/commit/2c1f37b3b95233c9e8eb9d51b50ac37640919eba.diff
LOG: [test] precommit sched model for tsv110, NFC
Added:
llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s
Modified:
Removed:
################################################################################
diff --git a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s
new file mode 100644
index 000000000000000..8a7022aaca05138
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-writeback.s
@@ -0,0 +1,3959 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=tsv110 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+ld1 { v1.2d }, [x27], #16
+ld1 { v1.2s }, [x27], #8
+ld1 { v1.4h }, [x27], #8
+ld1 { v1.4s }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+ld1 { v1.8h }, [x27], #16
+ld1 { v1.16b }, [x27], #16
+ld1 { v1.1d }, [x27], x28
+ld1 { v1.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+ld1 { v1.4h }, [x27], x28
+ld1 { v1.4s }, [x27], x28
+ld1 { v1.8b }, [x27], x28
+ld1 { v1.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+ld1 { v1.1d, v2.1d }, [x27], #16
+ld1 { v1.2d, v2.2d }, [x27], #32
+ld1 { v1.2s, v2.2s }, [x27], #16
+ld1 { v1.4h, v2.4h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+ld1 { v1.8b, v2.8b }, [x27], #16
+ld1 { v1.8h, v2.8h }, [x27], #32
+ld1 { v1.16b, v2.16b }, [x27], #32
+ld1 { v1.1d, v2.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+ld1 { v1.2s, v2.2s }, [x27], x28
+ld1 { v1.4h, v2.4h }, [x27], x28
+ld1 { v1.4s, v2.4s }, [x27], x28
+ld1 { v1.8b, v2.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+ld1 { v1.16b, v2.16b }, [x27], x28
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+ld1 { v1.b }[0], [x27], #1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+ld1 { v1.b }[0], [x27], x28
+ld1 { v1.b }[8], [x27], x28
+ld1 { v1.h }[0], [x27], #2
+ld1 { v1.h }[4], [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+ld1 { v1.h }[4], [x27], x28
+ld1 { v1.s }[0], [x27], #4
+ld1 { v1.s }[0], [x27], x28
+ld1 { v1.d }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+ld1r { v1.1d }, [x27], #8
+ld1r { v1.2d }, [x27], #8
+ld1r { v1.2s }, [x27], #4
+ld1r { v1.4h }, [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+ld1r { v1.8b }, [x27], #1
+ld1r { v1.8h }, [x27], #2
+ld1r { v1.16b }, [x27], #1
+ld1r { v1.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+ld1r { v1.2s }, [x27], x28
+ld1r { v1.4h }, [x27], x28
+ld1r { v1.4s }, [x27], x28
+ld1r { v1.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+ld1r { v1.16b }, [x27], x28
+ld2 { v1.2d, v2.2d }, [x27], #32
+ld2 { v1.2s, v2.2s }, [x27], #16
+ld2 { v1.4h, v2.4h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+ld2 { v1.8b, v2.8b }, [x27], #16
+ld2 { v1.8h, v2.8h }, [x27], #32
+ld2 { v1.16b, v2.16b }, [x27], #32
+ld2 { v1.2d, v2.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+ld2 { v1.4h, v2.4h }, [x27], x28
+ld2 { v1.4s, v2.4s }, [x27], x28
+ld2 { v1.8b, v2.8b }, [x27], x28
+ld2 { v1.8h, v2.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+ld2 { v1.b, v2.b }[0], [x27], #2
+ld2 { v1.b, v2.b }[8], [x27], #2
+ld2 { v1.b, v2.b }[0], [x27], x28
+ld2 { v1.b, v2.b }[8], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+ld2 { v1.h, v2.h }[4], [x27], #4
+ld2 { v1.h, v2.h }[0], [x27], x28
+ld2 { v1.h, v2.h }[4], [x27], x28
+ld2 { v1.s, v2.s }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+ld2 { v1.d, v2.d }[0], [x27], #16
+ld2 { v1.d, v2.d }[0], [x27], x28
+ld2r { v1.1d, v2.1d }, [x27], #16
+ld2r { v1.2d, v2.2d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+ld2r { v1.4h, v2.4h }, [x27], #4
+ld2r { v1.4s, v2.4s }, [x27], #8
+ld2r { v1.8b, v2.8b }, [x27], #2
+ld2r { v1.8h, v2.8h }, [x27], #4
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+ld2r { v1.1d, v2.1d }, [x27], x28
+ld2r { v1.2d, v2.2d }, [x27], x28
+ld2r { v1.2s, v2.2s }, [x27], x28
+ld2r { v1.4h, v2.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+ld2r { v1.8b, v2.8b }, [x27], x28
+ld2r { v1.8h, v2.8h }, [x27], x28
+ld2r { v1.16b, v2.16b }, [x27], x28
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G34
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G35
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G36
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G37
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G38
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G39
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G40
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G41
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G42
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G43
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G44
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+ldp s1, s2, [x27], #248
+ldp d1, d2, [x27], #496
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G45
+ldp q1, q2, [x27], #992
+ldp s1, s2, [x27, #248]!
+ldp d1, d2, [x27, #496]!
+ldp q1, q2, [x27, #992]!
+ldp w1, w2, [x27], #248
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G46
+ldp x1, x2, [x27], #496
+ldp w1, w2, [x27, #248]!
+ldp x1, x2, [x27, #496]!
+ldpsw x1, x2, [x27], #248
+ldpsw x1, x2, [x27, #248]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G47
+ldr b1, [x27], #254
+ldr h1, [x27], #254
+ldr s1, [x27], #254
+ldr d1, [x27], #254
+ldr q1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G48
+ldr b1, [x27, #254]!
+ldr h1, [x27, #254]!
+ldr s1, [x27, #254]!
+ldr d1, [x27, #254]!
+ldr q1, [x27, #254]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G49
+ldr w1, [x27], #254
+ldr x1, [x27], #254
+ldr w1, [x27, #254]!
+ldr x1, [x27, #254]!
+ldrb w1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G50
+ldrb w1, [x27, #254]!
+ldrh w1, [x27], #254
+ldrh w1, [x27, #254]!
+ldrsb w1, [x27], #254
+ldrsb x1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G51
+ldrsb w1, [x27, #254]!
+ldrsb x1, [x27, #254]!
+ldrsh w1, [x27], #254
+ldrsh x1, [x27], #254
+ldrsh w1, [x27, #254]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G52
+ldrsh x1, [x27, #254]!
+ldrsw x1, [x27], #254
+ldrsw x1, [x27, #254]!
+st1 { v1.1d }, [x27], #8
+st1 { v1.2d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G53
+st1 { v1.2s }, [x27], #8
+st1 { v1.4h }, [x27], #8
+st1 { v1.4s }, [x27], #16
+st1 { v1.8b }, [x27], #8
+st1 { v1.8h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G54
+st1 { v1.16b }, [x27], #16
+st1 { v1.1d }, [x27], x28
+st1 { v1.2d }, [x27], x28
+st1 { v1.2s }, [x27], x28
+st1 { v1.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G55
+st1 { v1.4s }, [x27], x28
+st1 { v1.8b }, [x27], x28
+st1 { v1.8h }, [x27], x28
+st1 { v1.16b }, [x27], x28
+st1 { v1.1d, v2.1d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G56
+st1 { v1.2d, v2.2d }, [x27], #32
+st1 { v1.2s, v2.2s }, [x27], #16
+st1 { v1.4h, v2.4h }, [x27], #16
+st1 { v1.4s, v2.4s }, [x27], #32
+st1 { v1.8b, v2.8b }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G57
+st1 { v1.8h, v2.8h }, [x27], #32
+st1 { v1.16b, v2.16b }, [x27], #32
+st1 { v1.1d, v2.1d }, [x27], x28
+st1 { v1.2d, v2.2d }, [x27], x28
+st1 { v1.2s, v2.2s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G58
+st1 { v1.4h, v2.4h }, [x27], x28
+st1 { v1.4s, v2.4s }, [x27], x28
+st1 { v1.8b, v2.8b }, [x27], x28
+st1 { v1.8h, v2.8h }, [x27], x28
+st1 { v1.16b, v2.16b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G59
+st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G60
+st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G61
+st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G62
+st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G63
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G64
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G65
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+st1 { v1.b }[0], [x27], #1
+st1 { v1.b }[8], [x27], #1
+st1 { v1.b }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G66
+st1 { v1.b }[8], [x27], x28
+st1 { v1.h }[0], [x27], #2
+st1 { v1.h }[4], [x27], #2
+st1 { v1.h }[0], [x27], x28
+st1 { v1.h }[4], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G67
+st1 { v1.s }[0], [x27], #4
+st1 { v1.s }[0], [x27], x28
+st1 { v1.d }[0], [x27], #8
+st1 { v1.d }[0], [x27], x28
+st2 { v1.2d, v2.2d }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G68
+st2 { v1.2s, v2.2s }, [x27], #16
+st2 { v1.4h, v2.4h }, [x27], #16
+st2 { v1.4s, v2.4s }, [x27], #32
+st2 { v1.8b, v2.8b }, [x27], #16
+st2 { v1.8h, v2.8h }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G69
+st2 { v1.16b, v2.16b }, [x27], #32
+st2 { v1.2d, v2.2d }, [x27], x28
+st2 { v1.2s, v2.2s }, [x27], x28
+st2 { v1.4h, v2.4h }, [x27], x28
+st2 { v1.4s, v2.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G70
+st2 { v1.8b, v2.8b }, [x27], x28
+st2 { v1.8h, v2.8h }, [x27], x28
+st2 { v1.16b, v2.16b }, [x27], x28
+st2 { v1.b, v2.b }[0], [x27], #2
+st2 { v1.b, v2.b }[8], [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G71
+st2 { v1.b, v2.b }[0], [x27], x28
+st2 { v1.b, v2.b }[8], [x27], x28
+st2 { v1.h, v2.h }[0], [x27], #4
+st2 { v1.h, v2.h }[4], [x27], #4
+st2 { v1.h, v2.h }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G72
+st2 { v1.h, v2.h }[4], [x27], x28
+st2 { v1.s, v2.s }[0], [x27], #8
+st2 { v1.s, v2.s }[0], [x27], x28
+st2 { v1.d, v2.d }[0], [x27], #16
+st2 { v1.d, v2.d }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G73
+st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G74
+st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G75
+st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G76
+st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G77
+st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G78
+st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G79
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G80
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G81
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G82
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G83
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G84
+stp s1, s2, [x27], #248
+stp d1, d2, [x27], #496
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G85
+stp q1, q2, [x27], #992
+stp s1, s2, [x27, #248]!
+stp d1, d2, [x27, #496]!
+stp q1, q2, [x27, #992]!
+stp w1, w2, [x27], #248
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G86
+stp x1, x2, [x27], #496
+stp w1, w2, [x27, #248]!
+stp x1, x2, [x27, #496]!
+str b1, [x27], #254
+str h1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G87
+str s1, [x27], #254
+str d1, [x27], #254
+str q1, [x27], #254
+str b1, [x27, #254]!
+str h1, [x27, #254]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G88
+str s1, [x27, #254]!
+str d1, [x27, #254]!
+str q1, [x27, #254]!
+str w1, [x27], #254
+str x1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G89
+str w1, [x27, #254]!
+str x1, [x27, #254]!
+strb w1, [x27], #254
+strb w1, [x27, #254]!
+strh w1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G90
+strh w1, [x27, #254]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G91
+ldr x1, [x27], #254
+ldr x2, [x1], #254
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - G01
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,4] . D==================eeeeeER ld1 { v1.4s }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [1] Code Region - G02
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,4] . D==================eeeeeER ld1 { v1.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [2] Code Region - G03
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==================eeeeeER ld1 { v1.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [3] Code Region - G04
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,4] . D==================eeeeeER ld1 { v1.4h, v2.4h }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [4] Code Region - G05
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,4] . D==================eeeeeER ld1 { v1.1d, v2.1d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [5] Code Region - G06
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,4] . D==================eeeeeER ld1 { v1.8b, v2.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [6] Code Region - G07
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2803
+# CHECK-NEXT: Total uOps: 1600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,2] .D=========eeeeeeER . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,3] . D==============eeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,4] . D===================eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 4. 1 20.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1 10.4 0.2 0.0 <total>
+
+# CHECK: [7] Code Region - G08
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] .D=====eeeeeeER. . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,2] . D==========eeeeeeER . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] . D===============eeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,4] . D====================eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 2. 1 11.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 4. 1 21.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1 11.0 0.2 0.0 <total>
+
+# CHECK: [8] Code Region - G09
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] .D=====eeeeeeER. . . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,2] . D==========eeeeeeER . . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . D===============eeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . D====================eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 11.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 21.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 1 11.0 0.2 0.0 <total>
+
+# CHECK: [9] Code Region - G10
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.60
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .D=====eeeeeeER. . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,2] . D==========eeeeeeER . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D===============eeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,4] . D====================eeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 11.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 4. 1 21.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 1 11.0 0.2 0.0 <total>
+
+# CHECK: [10] Code Region - G11
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] .D=====eeeeeeER. . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,2] . D==========eeeeeeER . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] . D===============eeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,4] . D====================eeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 2. 1 11.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 4. 1 21.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 1 11.0 0.2 0.0 <total>
+
+# CHECK: [11] Code Region - G12
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] .D=====eeeeeeER. . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,2] . D==========eeeeeeER . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . D===============eeeeeeER . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,4] . D====================eeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 2. 1 11.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 4. 1 21.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1 11.0 0.2 0.0 <total>
+
+# CHECK: [12] Code Region - G13
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3103
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.48
+# CHECK-NEXT: IPC: 0.16
+# CHECK-NEXT: Block RThroughput: 4.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] .D=====eeeeeeER. . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,2] . D==========eeeeeeER . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . D===============eeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,4] . D====================eeeeeeeER ld1 { v1.b }[0], [x27], #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 2. 1 11.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 16.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 4. 1 21.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 1 11.0 0.2 0.0 <total>
+
+# CHECK: [13] Code Region - G14
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld1 { v1.h }[4], [x27], #2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [14] Code Region - G15
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld1 { v1.d }[0], [x27], #8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [15] Code Region - G16
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld1r { v1.4h }, [x27], #2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [16] Code Region - G17
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld1r { v1.1d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [17] Code Region - G18
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld1r { v1.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [18] Code Region - G19
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 1800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.51
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 4.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [19] Code Region - G20
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [20] Code Region - G21
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [21] Code Region - G22
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [22] Code Region - G23
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [23] Code Region - G24
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [24] Code Region - G25
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [25] Code Region - G26
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3503
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234567
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,4] . D========================eeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [26] Code Region - G27
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3603
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.58
+# CHECK-NEXT: IPC: 0.14
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] .D======eeeeeeeER . . . . . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeER . . . . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] . D==================eeeeeeeER . . ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,4] . D========================eeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [27] Code Region - G28
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [28] Code Region - G29
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [29] Code Region - G30
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [30] Code Region - G31
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [31] Code Region - G32
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [32] Code Region - G33
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [33] Code Region - G34
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [34] Code Region - G35
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [35] Code Region - G36
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4803
+# CHECK-NEXT: Total uOps: 4100
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.85
+# CHECK-NEXT: IPC: 0.10
+# CHECK-NEXT: Block RThroughput: 10.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] . D======eeeeeeeeeeER . . . . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,2] . D=============eeeeeeeeeeER . . . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,3] . . D====================eeeeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,4] . . .D===========================eeeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 2. 1 14.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 3. 1 21.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 4. 1 28.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1 14.2 0.2 0.0 <total>
+
+# CHECK: [36] Code Region - G37
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 5003
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.90
+# CHECK-NEXT: IPC: 0.10
+# CHECK-NEXT: Block RThroughput: 11.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeER . . . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] . D=======eeeeeeeeeeER . . . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,2] . .D==============eeeeeeeeeeER . . . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,3] . . D=====================eeeeeeeeeeER . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,4] . . . D============================eeeeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [37] Code Region - G38
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 5003
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.90
+# CHECK-NEXT: IPC: 0.10
+# CHECK-NEXT: Block RThroughput: 11.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeeeER . . . . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] . D=======eeeeeeeeeeER . . . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,2] . .D==============eeeeeeeeeeER . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . . D=====================eeeeeeeeeeER . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,4] . . . D============================eeeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [38] Code Region - G39
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [39] Code Region - G40
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [40] Code Region - G41
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [41] Code Region - G42
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [42] Code Region - G43
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . .D==================eeeeeeeeER. . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . . D========================eeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1 13.0 0.2 0.0 <total>
+
+# CHECK: [43] Code Region - G44
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3403
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.71
+# CHECK-NEXT: IPC: 0.15
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . .. ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] . D======eeeeeeeeER . . . .. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,2] . D============eeeeeeeeER . .. ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . .D==================eeeeeER .. ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,4] . . D======================eeeeeER ldp d1, d2, [x27], #496
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 4. 1 23.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 1 12.6 0.2 0.0 <total>
+
+# CHECK: [44] Code Region - G45
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2403
+# CHECK-NEXT: Total uOps: 1600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. ldp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] .D====eeeeeER . . .. ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,2] . D========eeeeeER . .. ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,3] . D============eeeeeER .. ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,4] . D================eeeeER ldp w1, w2, [x27], #248
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: 3. 1 13.0 0.0 0.0 ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 1 9.0 0.2 0.0 <total>
+
+# CHECK: [45] Code Region - G46
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.90
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 4.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . ldp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] .D===eeeeER . . . ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,2] . D======eeeeER. . . ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,3] . D=========eeeeER . . ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: [0,4] . D============eeeeER ldpsw x1, x2, [x27, #248]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: 3. 1 10.0 0.0 0.0 ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: 4. 1 13.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 1 7.0 0.2 0.0 <total>
+
+# CHECK: [46] Code Region - G47
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . ldr h1, [x27], #254
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . ldr s1, [x27], #254
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . ldr d1, [x27], #254
+# CHECK-NEXT: [0,4] . D==================eeeeeER ldr q1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [47] Code Region - G48
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . ldr b1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . ldr h1, [x27, #254]!
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . ldr s1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . ldr d1, [x27, #254]!
+# CHECK-NEXT: [0,4] . D==================eeeeeER ldr q1, [x27, #254]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]!
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 ldr h1, [x27, #254]!
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 ldr s1, [x27, #254]!
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 ldr d1, [x27, #254]!
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 ldr q1, [x27, #254]!
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [48] Code Region - G49
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . ldr w1, [x27], #254
+# CHECK-NEXT: [0,1] D====eeeeER . . . ldr x1, [x27], #254
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . ldr w1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D===========eeeeER . . ldr x1, [x27, #254]!
+# CHECK-NEXT: [0,4] . D==============eeeeER ldrb w1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ldr w1, [x27, #254]!
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 ldr x1, [x27, #254]!
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ldrb w1, [x27], #254
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [49] Code Region - G50
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . ldrb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D====eeeeER . . . ldrh w1, [x27], #254
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . ldrh w1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D===========eeeeER . . ldrsb w1, [x27], #254
+# CHECK-NEXT: [0,4] . D==============eeeeER ldrsb x1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 ldrh w1, [x27], #254
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ldrh w1, [x27, #254]!
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 ldrsb w1, [x27], #254
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ldrsb x1, [x27], #254
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [50] Code Region - G51
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . ldrsb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D====eeeeER . . . ldrsb x1, [x27, #254]!
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . ldrsh w1, [x27], #254
+# CHECK-NEXT: [0,3] .D===========eeeeER . . ldrsh x1, [x27], #254
+# CHECK-NEXT: [0,4] . D==============eeeeER ldrsh w1, [x27, #254]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 ldrsb x1, [x27, #254]!
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ldrsh w1, [x27], #254
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 ldrsh x1, [x27], #254
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ldrsh w1, [x27, #254]!
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [51] Code Region - G52
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1803
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.55
+# CHECK-NEXT: IPC: 0.28
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0
+
+# CHECK: [0,0] DeeeeER . . . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] D====eeeeER . . ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,2] .D=======eeeeER. . ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D===========eeeER . st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,4] . D=============eeeER st1 { v1.2d }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]!
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 ldrsw x1, [x27], #254
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ldrsw x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 4. 1 14.0 0.0 0.0 st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 1 8.0 0.2 0.0 <total>
+
+# CHECK: [52] Code Region - G53
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D===eeeER . . . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,2] .D=====eeeER . . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,3] .D========eeeER. . st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,4] . D==========eeeER st1 { v1.8h }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 1 6.2 0.2 0.0 <total>
+
+# CHECK: [53] Code Region - G54
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] D===eeeER . . . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,2] .D=====eeeER . . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D========eeeER. . st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,4] . D==========eeeER st1 { v1.4h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 1 6.2 0.2 0.0 <total>
+
+# CHECK: [54] Code Region - G55
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1603
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D===eeeER . . . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,2] .D=====eeeER . . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D========eeeER. . st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==========eeeeER st1 { v1.1d, v2.1d }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 1 6.2 0.2 0.0 <total>
+
+# CHECK: [55] Code Region - G56
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] D====eeeeER . . . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] .D===========eeeeER . . st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,4] . D==============eeeeER st1 { v1.8b, v2.8b }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [56] Code Region - G57
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] D====eeeeER . . . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] .D===========eeeeER . . st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,4] . D==============eeeeER st1 { v1.2s, v2.2s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [57] Code Region - G58
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] D====eeeeER . . . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] .D===========eeeeER . . st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,4] . D==============eeeeER st1 { v1.16b, v2.16b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [58] Code Region - G59
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,4] . D==================eeeeeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [59] Code Region - G60
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,4] . D==================eeeeeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [60] Code Region - G61
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==================eeeeeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [61] Code Region - G62
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2903
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.34
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . .. st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eeeeeeER . . . .. st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,2] .D==========eeeeeeER. . .. st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] .D================eeeeeeER .. st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,4] . D=====================eeeeeeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 2. 1 11.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 4. 1 22.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1 11.4 0.2 0.0 <total>
+
+# CHECK: [62] Code Region - G63
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,2] .D===========eeeeeeER . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] .D=================eeeeeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,4] . D======================eeeeeeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 2. 1 12.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 18.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 4. 1 23.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1 12.2 0.2 0.0 <total>
+
+# CHECK: [63] Code Region - G64
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,2] .D===========eeeeeeER . . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D=================eeeeeeER . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,4] . D======================eeeeeeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 12.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 18.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 23.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1 12.2 0.2 0.0 <total>
+
+# CHECK: [64] Code Region - G65
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2103
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.48
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeER . . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D======eeeeeeER. . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,2] .D===========eeeER . . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,3] .D==============eeeER . st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,4] . D================eeeER st1 { v1.b }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 12.0 0.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 1 10.4 0.2 0.0 <total>
+
+# CHECK: [65] Code Region - G66
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.67
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D===eeeER . . . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,2] .D=====eeeER . . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,3] .D========eeeER. . st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D==========eeeER st1 { v1.h }[4], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 1 6.2 0.2 0.0 <total>
+
+# CHECK: [66] Code Region - G67
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1603
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.62
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeER . . . st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] D===eeeER . . . st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,2] .D=====eeeER . . st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,3] .D========eeeER. . st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D==========eeeeER st2 { v1.2d, v2.2d }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 1. 1 4.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 2. 1 6.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1 6.2 0.2 0.0 <total>
+
+# CHECK: [67] Code Region - G68
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] D====eeeeER . . . st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,3] .D===========eeeeER . . st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,4] . D==============eeeeER st2 { v1.8h, v2.8h }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [68] Code Region - G69
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] D====eeeeER . . . st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] .D===========eeeeER . . st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . D==============eeeeER st2 { v1.4s, v2.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [69] Code Region - G70
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D====eeeeER . . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] .D===========eeeeER . . st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,4] . D==============eeeeER st2 { v1.b, v2.b }[8], [x27], #2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [70] Code Region - G71
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] D====eeeeER . . . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,3] .D===========eeeeER . . st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,4] . D==============eeeeER st2 { v1.h, v2.h }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [71] Code Region - G72
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeER . . . . st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] D====eeeeER . . . st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,2] .D=======eeeeER. . . st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D===========eeeeER . . st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,4] . D==============eeeeER st2 { v1.d, v2.d }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 1 8.2 0.2 0.0 <total>
+
+# CHECK: [72] Code Region - G73
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 300
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,1] D=====eeeeeER . . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,2] .D=========eeeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1 5.7 0.3 0.0 <total>
+
+# CHECK: [73] Code Region - G74
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,4] . D==================eeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [74] Code Region - G75
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==================eeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [75] Code Region - G76
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D==================eeeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [76] Code Region - G77
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.40
+# CHECK-NEXT: IPC: 0.20
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234567
+
+# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D==============eeeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,4] . D==================eeeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 4. 1 19.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 1 10.2 0.2 0.0 <total>
+
+# CHECK: [77] Code Region - G78
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3103
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.32
+# CHECK-NEXT: IPC: 0.16
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,2] .D=========eeeeeER . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D==============eeeeeeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,4] . D=====================eeeeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 4. 1 22.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1 10.8 0.2 0.0 <total>
+
+# CHECK: [78] Code Region - G79
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.25
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 2. 1 16.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 3. 1 24.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 4. 1 31.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1 16.2 0.2 0.0 <total>
+
+# CHECK: [79] Code Region - G80
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.25
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 16.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 24.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 31.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1 16.2 0.2 0.0 <total>
+
+# CHECK: [80] Code Region - G81
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3403
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.29
+# CHECK-NEXT: IPC: 0.15
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . .. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . .. st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,2] .D===============eeeeeeER. . .. st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,3] .D=====================eeeeeeER .. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,4] . D==========================eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 16.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 4. 1 27.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [81] Code Region - G82
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3003
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,2] .D===========eeeeeeER . . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,3] .D=================eeeeeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D======================eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 2. 1 12.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 3. 1 18.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 4. 1 23.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 1 12.2 0.2 0.0 <total>
+
+# CHECK: [82] Code Region - G83
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 2403
+# CHECK-NEXT: Total uOps: 800
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.17
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeeER . . . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] D======eeeeeeER. . .. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,2] .D===========eeeeeeER .. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,3] .D=================eeeeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 1. 1 7.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 2. 1 12.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 3. 1 18.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 1 9.5 0.3 0.0 <total>
+
+# CHECK: [83] Code Region - G84
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 403
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.49
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456
+
+# CHECK: [0,0] DeeER.. stp s1, s2, [x27], #248
+# CHECK-NEXT: [0,1] .D=eeER stp d1, d2, [x27], #496
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp s1, s2, [x27], #248
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stp d1, d2, [x27], #496
+# CHECK-NEXT: 1 1.5 0.5 0.0 <total>
+
+# CHECK: [84] Code Region - G85
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 903
+# CHECK-NEXT: Total uOps: 1400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.55
+# CHECK-NEXT: IPC: 0.55
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. .. stp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] .D=eeER .. stp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,2] . D==eeER .. stp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,3] . D===eeER. stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,4] . D====eER stp w1, w2, [x27], #248
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stp s1, s2, [x27, #248]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp d1, d2, [x27, #496]!
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 stp w1, w2, [x27], #248
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [85] Code Region - G86
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eER. . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,2] .D=eER . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,3] .D==eER. str b1, [x27], #254
+# CHECK-NEXT: [0,4] . D==eER str h1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stp w1, w2, [x27, #248]!
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 stp x1, x2, [x27, #496]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 str b1, [x27], #254
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [86] Code Region - G87
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeER . . str s1, [x27], #254
+# CHECK-NEXT: [0,1] D=eER. . str d1, [x27], #254
+# CHECK-NEXT: [0,2] .D=eER . str q1, [x27], #254
+# CHECK-NEXT: [0,3] .D==eER. str b1, [x27, #254]!
+# CHECK-NEXT: [0,4] . D==eER str h1, [x27, #254]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [87] Code Region - G88
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeER . . str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . str d1, [x27, #254]!
+# CHECK-NEXT: [0,2] .D=eER . str q1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D==eER. str w1, [x27], #254
+# CHECK-NEXT: [0,4] . D==eER str x1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [88] Code Region - G89
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 503
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeER . . str w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . str x1, [x27, #254]!
+# CHECK-NEXT: [0,2] .D=eER . strb w1, [x27], #254
+# CHECK-NEXT: [0,3] .D==eER. strb w1, [x27, #254]!
+# CHECK-NEXT: [0,4] . D==eER strh w1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str x1, [x27, #254]!
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 strb w1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 strb w1, [x27, #254]!
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 strh w1, [x27], #254
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [89] Code Region - G90
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 100
+# CHECK-NEXT: Total Cycles: 103
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 1.94
+# CHECK-NEXT: IPC: 0.97
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123
+
+# CHECK: [0,0] DeER strh w1, [x27, #254]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]!
+
+# CHECK: [90] Code Region - G91
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 404
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 4
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeeeER. ldr x1, [x27], #254
+# CHECK-NEXT: [0,1] D=eeeeER ldr x2, [x1], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr x2, [x1], #254
+# CHECK-NEXT: 1 1.5 0.5 0.0 <total>
More information about the llvm-commits
mailing list